logpare 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -34,12 +34,29 @@ Top templates by frequency:
34
34
 
35
35
  ## Installation
36
36
 
37
+ ### As a CLI tool (recommended for command-line usage)
38
+
39
+ Install globally to use `logpare` directly from anywhere:
40
+
41
+ ```bash
42
+ npm install -g logpare
43
+
44
+ # Now works directly
45
+ logpare server.log
46
+ ```
47
+
48
+ ### As a library
49
+
50
+ Install locally in your project for programmatic usage:
51
+
37
52
  ```bash
38
53
  npm install logpare
39
54
  # or
40
55
  pnpm add logpare
41
56
  ```
42
57
 
58
+ > **Note:** Local installs require `npx` to run the CLI: `npx logpare server.log`
59
+
43
60
  ## CLI Usage
44
61
 
45
62
  LogPare includes a command-line interface for quick log compression:
@@ -64,6 +81,12 @@ logpare --output templates.txt error.log
64
81
  logpare access.log error.log server.log
65
82
  ```
66
83
 
84
+ > **Using a local install?** Prefix commands with `npx`:
85
+ > ```bash
86
+ > npx logpare server.log
87
+ > cat /var/log/syslog | npx logpare
88
+ > ```
89
+
67
90
  ### CLI Options
68
91
 
69
92
  | Option | Short | Description | Default |
@@ -134,18 +157,101 @@ const result = drain.getResult('detailed');
134
157
  ## Output Formats
135
158
 
136
159
  ### Summary (default)
137
- Compact overview with top templates and rare events.
160
+
161
+ Compact overview with top templates and rare events:
162
+
163
+ ```
164
+ === Log Compression Summary ===
165
+ Input: 10,847 lines → 23 templates (99.8% reduction)
166
+
167
+ Top templates by frequency:
168
+ 1. [4,521x] INFO Connection from <*> established
169
+ 2. [3,892x] DEBUG Request <*> processed in <*>
170
+ 3. [1,203x] WARN Retry attempt <*> for <*>
171
+
172
+ Rare events (≤5 occurrences):
173
+ - [1x] FATAL Database connection lost
174
+ - [2x] ERROR Out of memory exception in <*>
175
+ ```
138
176
 
139
177
  ### Detailed
140
- Full template list with sample variable values.
178
+
179
+ Full template list with all diagnostic metadata:
180
+
181
+ ```
182
+ Template #1: INFO Connection from <*> established
183
+ Occurrences: 4,521
184
+ Severity: info
185
+ First seen: line 1
186
+ Last seen: line 10,234
187
+ Sample values: [["192.168.1.1"], ["10.0.0.55"], ["172.16.0.1"]]
188
+ URLs: api.example.com, cdn.example.com
189
+ Status codes: 200, 201
190
+ Correlation IDs: req-abc123, trace-xyz789
191
+ Durations: 45ms, 120ms, 2.5s
192
+ ```
141
193
 
142
194
  ### JSON
143
- Machine-readable format for programmatic use.
195
+
196
+ Machine-readable format with version field and complete metadata:
197
+
198
+ ```json
199
+ {
200
+ "version": "1.1",
201
+ "stats": {
202
+ "inputLines": 10847,
203
+ "uniqueTemplates": 23,
204
+ "compressionRatio": 0.998,
205
+ "estimatedTokenReduction": 0.95,
206
+ "processingTimeMs": 234
207
+ },
208
+ "templates": [{
209
+ "id": "abc123",
210
+ "pattern": "INFO Connection from <*> established",
211
+ "occurrences": 4521,
212
+ "severity": "info",
213
+ "isStackFrame": false,
214
+ "firstSeen": 1,
215
+ "lastSeen": 10234,
216
+ "sampleVariables": [["192.168.1.1"], ["10.0.0.55"]],
217
+ "urlSamples": ["api.example.com"],
218
+ "fullUrlSamples": ["https://api.example.com/v1/users"],
219
+ "statusCodeSamples": [200, 201],
220
+ "correlationIdSamples": ["req-abc123"],
221
+ "durationSamples": ["45ms", "120ms"]
222
+ }]
223
+ }
224
+ ```
144
225
 
145
226
  ```typescript
146
227
  compress(logs, { format: 'json' });
147
228
  ```
148
229
 
230
+ ## Diagnostic Metadata
231
+
232
+ LogPare automatically extracts diagnostic information from matching log lines:
233
+
234
+ | Metadata | Description | Supported Formats |
235
+ |----------|-------------|-------------------|
236
+ | **URLs** | Hostnames and full URLs | `https://...`, `http://...` |
237
+ | **Status codes** | HTTP status codes | `status 404`, `HTTP/1.1 500`, `code=200` |
238
+ | **Correlation IDs** | Request/trace identifiers | `trace-id: xxx`, `request-id: xxx`, UUIDs |
239
+ | **Durations** | Timing values | `45ms`, `1.5s`, `200µs`, `2min`, `1h` |
240
+
241
+ This metadata is preserved in templates and available in detailed/JSON output formats.
242
+
243
+ ## Severity Detection
244
+
245
+ Each template is automatically tagged with a severity level:
246
+
247
+ | Severity | Detected Patterns |
248
+ |----------|------------------|
249
+ | `error` | ERROR, FATAL, Exception, Failed, TypeError, ReferenceError, panic |
250
+ | `warning` | WARN, Warning, Deprecated, [Violation] |
251
+ | `info` | Default for other logs |
252
+
253
+ Stack traces are also automatically detected (V8/Node.js, Firefox, Chrome DevTools formats) and marked with `isStackFrame: true`.
254
+
149
255
  ## API Reference
150
256
 
151
257
  ### `compress(lines, options?)`
@@ -172,6 +278,33 @@ Create a Drain instance for incremental processing.
172
278
  - `options.maxChildren`: `number` - Max children per node (default: `100`)
173
279
  - `options.maxClusters`: `number` - Max total templates (default: `1000`)
174
280
  - `options.preprocessing`: `ParsingStrategy` - Custom preprocessing
281
+ - `options.onProgress`: `ProgressCallback` - Progress reporting callback
282
+
283
+ #### Progress Reporting
284
+
285
+ Track progress during long-running operations:
286
+
287
+ ```typescript
288
+ import { createDrain } from 'logpare';
289
+
290
+ const drain = createDrain({
291
+ onProgress: (event) => {
292
+ console.log(`${event.currentPhase}: ${event.processedLines} lines`);
293
+ if (event.percentComplete !== undefined) {
294
+ console.log(`Progress: ${event.percentComplete.toFixed(1)}%`);
295
+ }
296
+ }
297
+ });
298
+
299
+ drain.addLogLines(logs);
300
+ const result = drain.getResult();
301
+ ```
302
+
303
+ The callback receives `ProgressEvent` with:
304
+ - `processedLines`: Lines processed so far
305
+ - `totalLines`: Total lines (if known)
306
+ - `currentPhase`: `'parsing'` | `'clustering'` | `'finalizing'`
307
+ - `percentComplete`: 0-100 (only if `totalLines` known)
175
308
 
176
309
  ### `defineStrategy(overrides)`
177
310
 
@@ -190,6 +323,7 @@ const strategy = defineStrategy({
190
323
  LogPare automatically masks common variable types:
191
324
 
192
325
  - IPv4/IPv6 addresses
326
+ - Port numbers (e.g., `:443`, `:8080`)
193
327
  - UUIDs
194
328
  - Timestamps (ISO, Unix)
195
329
  - File paths and URLs
@@ -197,6 +331,11 @@ LogPare automatically masks common variable types:
197
331
  - Block IDs (HDFS)
198
332
  - Numbers with units (e.g., `250ms`, `1024KB`)
199
333
 
334
+ **Automatic detection features:**
335
+ - **Severity tagging** — Templates are tagged as `error`, `warning`, or `info`
336
+ - **Stack frame detection** — Identifies stack traces (V8, Firefox, Chrome formats)
337
+ - **Diagnostic extraction** — Captures URLs, HTTP status codes, correlation IDs, and durations
338
+
200
339
  ## Performance
201
340
 
202
341
  - **Speed**: >10,000 lines/second
@@ -1,5 +1,113 @@
1
1
  // src/preprocessing/patterns.ts
2
+ var SEVERITY_PATTERNS = {
3
+ error: /\b(Error|ERROR|error|Uncaught|UNCAUGHT|Failed|FAILED|Exception|EXCEPTION|FATAL|fatal|TypeError|ReferenceError|SyntaxError|RangeError)\b/,
4
+ warning: /\b(Warning|WARNING|warn|WARN|\[Violation\]|Violation|DEPRECATED|deprecated|Deprecation)\b/
5
+ };
6
+ var STACK_FRAME_PATTERNS = [
7
+ /^\s*at\s+/,
8
+ // " at Function.x" (V8/Node)
9
+ /^\s*@\s*\S+:\d+/,
10
+ // "@ file.js:123" (Firefox)
11
+ /^\s*\w+@\S+:\d+/,
12
+ // "fn@file.js:123" (Firefox named)
13
+ /^\s*\(anonymous\)\s*@/,
14
+ // "(anonymous) @ file.js:123" (Chrome DevTools)
15
+ /^\s*[A-Za-z_$][\w$]*\s+@\s+\S+:\d+/
16
+ // "functionName @ file.js:123"
17
+ ];
18
+ function detectSeverity(line) {
19
+ if (SEVERITY_PATTERNS.error.test(line)) {
20
+ return "error";
21
+ }
22
+ if (SEVERITY_PATTERNS.warning.test(line)) {
23
+ return "warning";
24
+ }
25
+ return "info";
26
+ }
27
+ function isStackFrame(line) {
28
+ return STACK_FRAME_PATTERNS.some((pattern) => pattern.test(line));
29
+ }
30
+ function extractUrls(line) {
31
+ const urlPattern = /https?:\/\/[^\s"'<>]+/g;
32
+ const matches = line.match(urlPattern);
33
+ if (!matches) return [];
34
+ return matches.map((url) => {
35
+ try {
36
+ const parsed = new URL(url);
37
+ return parsed.hostname;
38
+ } catch {
39
+ return url;
40
+ }
41
+ }).filter((v, i, a) => a.indexOf(v) === i);
42
+ }
43
+ function extractFullUrls(line) {
44
+ const urlPattern = /https?:\/\/[^\s"'<>]+/g;
45
+ const matches = line.match(urlPattern);
46
+ if (!matches) return [];
47
+ return [...new Set(matches)];
48
+ }
49
+ function extractStatusCodes(line) {
50
+ const patterns = [
51
+ /\bstatus[:\s]+(\d{3})\b/gi,
52
+ // "status 404", "status: 500"
53
+ /\bHTTP[\/\s]\d\.\d\s+(\d{3})\b/gi,
54
+ // "HTTP/1.1 404", "HTTP 1.1 500"
55
+ /\bcode[:\s]+(\d{3})\b/gi,
56
+ // "code: 403", "code 500"
57
+ /\b(\d{3})\s+(?:OK|Not Found|Bad Request|Unauthorized|Forbidden|Internal Server Error|Service Unavailable)\b/gi
58
+ ];
59
+ const codes = [];
60
+ for (const pattern of patterns) {
61
+ const regex = new RegExp(pattern.source, pattern.flags);
62
+ let match;
63
+ while ((match = regex.exec(line)) !== null) {
64
+ const codeStr = match[1];
65
+ if (codeStr) {
66
+ const code = parseInt(codeStr, 10);
67
+ if (code >= 100 && code <= 599 && !codes.includes(code)) {
68
+ codes.push(code);
69
+ }
70
+ }
71
+ }
72
+ }
73
+ return codes;
74
+ }
75
+ function extractCorrelationIds(line) {
76
+ const patterns = [
77
+ // Named correlation IDs: trace-id=xxx, request_id: xxx, x-request-id=xxx
78
+ /\b(?:trace[-_]?id|request[-_]?id|correlation[-_]?id|x-request-id)[=:\s]+["']?([a-zA-Z0-9-_]+)["']?/gi,
79
+ // Standalone UUIDs (common correlation ID format)
80
+ /\b([a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})\b/gi
81
+ ];
82
+ const ids = [];
83
+ for (const pattern of patterns) {
84
+ const regex = new RegExp(pattern.source, pattern.flags);
85
+ let match;
86
+ while ((match = regex.exec(line)) !== null) {
87
+ const id = match[1] || match[0];
88
+ if (id && !ids.includes(id)) {
89
+ ids.push(id);
90
+ }
91
+ }
92
+ }
93
+ return ids.slice(0, 3);
94
+ }
95
+ function extractDurations(line) {
96
+ const durationPattern = /\b(\d+(?:\.\d+)?)\s*(ms|milliseconds?|s|sec(?:onds?)?|µs|μs|us|microseconds?|ns|nanoseconds?|min(?:utes?)?|h(?:ours?)?|hr)\b/gi;
97
+ const durations = [];
98
+ let match;
99
+ while ((match = durationPattern.exec(line)) !== null) {
100
+ const duration = `${match[1]}${match[2]}`;
101
+ if (!durations.includes(duration)) {
102
+ durations.push(duration);
103
+ }
104
+ }
105
+ return durations.slice(0, 5);
106
+ }
2
107
  var DEFAULT_PATTERNS = {
108
+ // Timestamps (most specific - must run before port to avoid fragmentation)
109
+ isoTimestamp: /\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(?:[.,]\d+)?(?:Z|[+-]\d{2}:?\d{2})?/g,
110
+ unixTimestamp: /\b\d{10,13}\b/g,
3
111
  // Network addresses
4
112
  ipv4: /\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/g,
5
113
  ipv6: /\b[0-9a-fA-F:]{7,39}\b/g,
@@ -8,15 +116,18 @@ var DEFAULT_PATTERNS = {
8
116
  uuid: /\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b/g,
9
117
  hexId: /\b0x[0-9a-fA-F]+\b/g,
10
118
  blockId: /\bblk_-?\d+\b/g,
11
- // Timestamps
12
- isoTimestamp: /\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(?:[.,]\d+)?(?:Z|[+-]\d{2}:?\d{2})?/g,
13
- unixTimestamp: /\b\d{10,13}\b/g,
14
119
  // Paths and URLs
15
- filePath: /(?:\/[\w.-]+)+/g,
120
+ // Note: Don't match version-like paths (e.g., /2.7, /v1.0)
121
+ // Require at least 2 path segments or a segment with letters
122
+ filePath: /(?:\/[a-zA-Z][\w.-]*)+/g,
16
123
  url: /https?:\/\/[^\s]+/g,
17
- // Numbers (applied last - most aggressive)
18
- // Matches standalone numbers and numbers with units (e.g., 250ms, 1024KB)
19
- numbers: /\b\d+(?:\.\d+)?[a-zA-Z]*\b/g
124
+ // Long numeric IDs only (6+ digits) - preserves status codes, line numbers
125
+ // Examples masked: request IDs (12345678), order numbers (1234567890)
126
+ // Examples preserved: HTTP 404, line:123, /v2.7
127
+ numericId: /\b\d{6,}\b/g,
128
+ // Numbers with optional duration/size suffixes
129
+ // Matches: 1500, 250ms, 1.5s, 100KB, etc.
130
+ numbers: /\b\d+(?:\.\d+)?(?:ms|s|µs|us|ns|min|h|hr|sec|[KkMmGgTt][Bb]?)?\b/g
20
131
  };
21
132
  var WILDCARD = "<*>";
22
133
  function applyPatterns(line, patterns, wildcard = WILDCARD) {
@@ -138,9 +249,31 @@ var LogCluster = class {
138
249
  firstSeen;
139
250
  /** Line index of most recent occurrence */
140
251
  lastSeen;
252
+ /** Detected severity level */
253
+ severity;
254
+ /** Sample URLs extracted from matching lines (hostnames only) */
255
+ urlSamples;
256
+ /** Full URLs extracted from matching lines (complete paths) */
257
+ fullUrlSamples;
258
+ /** HTTP status codes extracted from matching lines */
259
+ statusCodeSamples;
260
+ /** Correlation/trace IDs extracted from matching lines */
261
+ correlationIdSamples;
262
+ /** Duration/timing values extracted from matching lines */
263
+ durationSamples;
264
+ /** Whether this template represents a stack trace frame */
265
+ isStackFrame;
141
266
  /** Maximum number of sample variables to store */
142
267
  maxSamples;
143
- constructor(id, tokens, lineIndex, maxSamples = 3) {
268
+ /** Maximum number of URL samples to store */
269
+ maxUrlSamples = 5;
270
+ /** Maximum number of status code samples to store */
271
+ maxStatusCodeSamples = 5;
272
+ /** Maximum number of correlation ID samples to store */
273
+ maxCorrelationIdSamples = 3;
274
+ /** Maximum number of duration samples to store */
275
+ maxDurationSamples = 5;
276
+ constructor(id, tokens, lineIndex, maxSamples = 3, originalLine = "") {
144
277
  this.id = id;
145
278
  this.tokens = tokens.slice();
146
279
  this.count = 1;
@@ -148,12 +281,39 @@ var LogCluster = class {
148
281
  this.firstSeen = lineIndex;
149
282
  this.lastSeen = lineIndex;
150
283
  this.maxSamples = maxSamples;
284
+ this.severity = detectSeverity(originalLine);
285
+ this.isStackFrame = isStackFrame(originalLine);
286
+ this.urlSamples = [];
287
+ this.fullUrlSamples = [];
288
+ this.statusCodeSamples = [];
289
+ this.correlationIdSamples = [];
290
+ this.durationSamples = [];
291
+ const urls = extractUrls(originalLine);
292
+ if (urls.length > 0) {
293
+ this.urlSamples.push(...urls.slice(0, this.maxUrlSamples));
294
+ }
295
+ const fullUrls = extractFullUrls(originalLine);
296
+ if (fullUrls.length > 0) {
297
+ this.fullUrlSamples.push(...fullUrls.slice(0, this.maxUrlSamples));
298
+ }
299
+ const statusCodes = extractStatusCodes(originalLine);
300
+ if (statusCodes.length > 0) {
301
+ this.statusCodeSamples.push(...statusCodes.slice(0, this.maxStatusCodeSamples));
302
+ }
303
+ const correlationIds = extractCorrelationIds(originalLine);
304
+ if (correlationIds.length > 0) {
305
+ this.correlationIdSamples.push(...correlationIds.slice(0, this.maxCorrelationIdSamples));
306
+ }
307
+ const durations = extractDurations(originalLine);
308
+ if (durations.length > 0) {
309
+ this.durationSamples.push(...durations.slice(0, this.maxDurationSamples));
310
+ }
151
311
  }
152
312
  /**
153
313
  * Update the cluster with a new matching log line.
154
314
  * Returns the variables extracted from this match.
155
315
  */
156
- update(tokens, lineIndex) {
316
+ update(tokens, lineIndex, originalLine = "") {
157
317
  this.count++;
158
318
  this.lastSeen = lineIndex;
159
319
  const variables = [];
@@ -165,6 +325,48 @@ var LogCluster = class {
165
325
  if (this.sampleVariables.length < this.maxSamples) {
166
326
  this.sampleVariables.push(variables);
167
327
  }
328
+ if (originalLine) {
329
+ if (this.urlSamples.length < this.maxUrlSamples) {
330
+ const urls = extractUrls(originalLine);
331
+ for (const url of urls) {
332
+ if (!this.urlSamples.includes(url) && this.urlSamples.length < this.maxUrlSamples) {
333
+ this.urlSamples.push(url);
334
+ }
335
+ }
336
+ }
337
+ if (this.fullUrlSamples.length < this.maxUrlSamples) {
338
+ const fullUrls = extractFullUrls(originalLine);
339
+ for (const url of fullUrls) {
340
+ if (!this.fullUrlSamples.includes(url) && this.fullUrlSamples.length < this.maxUrlSamples) {
341
+ this.fullUrlSamples.push(url);
342
+ }
343
+ }
344
+ }
345
+ if (this.statusCodeSamples.length < this.maxStatusCodeSamples) {
346
+ const statusCodes = extractStatusCodes(originalLine);
347
+ for (const code of statusCodes) {
348
+ if (!this.statusCodeSamples.includes(code) && this.statusCodeSamples.length < this.maxStatusCodeSamples) {
349
+ this.statusCodeSamples.push(code);
350
+ }
351
+ }
352
+ }
353
+ if (this.correlationIdSamples.length < this.maxCorrelationIdSamples) {
354
+ const correlationIds = extractCorrelationIds(originalLine);
355
+ for (const id of correlationIds) {
356
+ if (!this.correlationIdSamples.includes(id) && this.correlationIdSamples.length < this.maxCorrelationIdSamples) {
357
+ this.correlationIdSamples.push(id);
358
+ }
359
+ }
360
+ }
361
+ if (this.durationSamples.length < this.maxDurationSamples) {
362
+ const durations = extractDurations(originalLine);
363
+ for (const duration of durations) {
364
+ if (!this.durationSamples.includes(duration) && this.durationSamples.length < this.maxDurationSamples) {
365
+ this.durationSamples.push(duration);
366
+ }
367
+ }
368
+ }
369
+ }
168
370
  return variables;
169
371
  }
170
372
  /**
@@ -259,8 +461,24 @@ function formatDetailed(templates, stats) {
259
461
  for (const template of templates) {
260
462
  lines.push(`=== Template ${template.id} (${template.occurrences.toLocaleString()} occurrences) ===`);
261
463
  lines.push(`Pattern: ${template.pattern}`);
464
+ lines.push(`Severity: ${template.severity}${template.isStackFrame ? " (stack frame)" : ""}`);
262
465
  lines.push(`First seen: line ${template.firstSeen + 1}`);
263
466
  lines.push(`Last seen: line ${template.lastSeen + 1}`);
467
+ if (template.fullUrlSamples.length > 0) {
468
+ lines.push("URLs:");
469
+ for (const url of template.fullUrlSamples) {
470
+ lines.push(` - ${url}`);
471
+ }
472
+ }
473
+ if (template.statusCodeSamples.length > 0) {
474
+ lines.push(`Status codes: ${template.statusCodeSamples.join(", ")}`);
475
+ }
476
+ if (template.correlationIdSamples.length > 0) {
477
+ lines.push(`Correlation IDs: ${template.correlationIdSamples.join(", ")}`);
478
+ }
479
+ if (template.durationSamples.length > 0) {
480
+ lines.push(`Durations: ${template.durationSamples.join(", ")}`);
481
+ }
264
482
  if (template.sampleVariables.length > 0) {
265
483
  lines.push("Sample variables:");
266
484
  for (const vars of template.sampleVariables) {
@@ -275,7 +493,7 @@ function formatDetailed(templates, stats) {
275
493
  }
276
494
  function formatJson(templates, stats) {
277
495
  const output = {
278
- version: "1.0",
496
+ version: "1.1",
279
497
  stats: {
280
498
  inputLines: stats.inputLines,
281
499
  uniqueTemplates: stats.uniqueTemplates,
@@ -286,7 +504,14 @@ function formatJson(templates, stats) {
286
504
  id: t.id,
287
505
  pattern: t.pattern,
288
506
  occurrences: t.occurrences,
507
+ severity: t.severity,
508
+ isStackFrame: t.isStackFrame,
289
509
  samples: t.sampleVariables,
510
+ urlSamples: t.urlSamples,
511
+ fullUrlSamples: t.fullUrlSamples,
512
+ statusCodeSamples: t.statusCodeSamples,
513
+ correlationIdSamples: t.correlationIdSamples,
514
+ durationSamples: t.durationSamples,
290
515
  firstSeen: t.firstSeen,
291
516
  lastSeen: t.lastSeen
292
517
  }))
@@ -311,6 +536,7 @@ var Drain = class {
311
536
  maxChildren;
312
537
  maxClusters;
313
538
  maxSamples;
539
+ onProgress;
314
540
  lineCount;
315
541
  nextClusterId;
316
542
  constructor(options = {}) {
@@ -321,6 +547,7 @@ var Drain = class {
321
547
  this.maxChildren = options.maxChildren ?? DEFAULTS.maxChildren;
322
548
  this.maxClusters = options.maxClusters ?? DEFAULTS.maxClusters;
323
549
  this.maxSamples = options.maxSamples ?? DEFAULTS.maxSamples;
550
+ this.onProgress = options.onProgress;
324
551
  this.lineCount = 0;
325
552
  this.nextClusterId = 1;
326
553
  }
@@ -340,21 +567,39 @@ var Drain = class {
340
567
  }
341
568
  const matchedCluster = this.treeSearch(tokens);
342
569
  if (matchedCluster !== null) {
343
- matchedCluster.update(tokens, lineIndex);
570
+ matchedCluster.update(tokens, lineIndex, trimmed);
344
571
  matchedCluster.mergeTokens(tokens);
345
572
  return matchedCluster;
346
573
  }
347
574
  if (this.clusters.length >= this.maxClusters) {
348
575
  return null;
349
576
  }
350
- return this.createCluster(tokens, lineIndex);
577
+ return this.createCluster(tokens, lineIndex, trimmed);
351
578
  }
352
579
  /**
353
- * Process multiple log lines.
580
+ * Process multiple log lines with optional progress reporting.
354
581
  */
355
582
  addLogLines(lines) {
356
- for (const line of lines) {
357
- this.addLogLine(line);
583
+ const total = lines.length;
584
+ const reportInterval = Math.max(1, Math.floor(total / 100));
585
+ for (let i = 0; i < total; i++) {
586
+ this.addLogLine(lines[i]);
587
+ if (this.onProgress && i % reportInterval === 0) {
588
+ this.onProgress({
589
+ processedLines: i + 1,
590
+ totalLines: total,
591
+ currentPhase: "clustering",
592
+ percentComplete: Math.round((i + 1) / total * 100)
593
+ });
594
+ }
595
+ }
596
+ if (this.onProgress && total > 0) {
597
+ this.onProgress({
598
+ processedLines: total,
599
+ totalLines: total,
600
+ currentPhase: "finalizing",
601
+ percentComplete: 100
602
+ });
358
603
  }
359
604
  }
360
605
  /**
@@ -414,9 +659,9 @@ var Drain = class {
414
659
  /**
415
660
  * Create a new cluster and add it to the tree.
416
661
  */
417
- createCluster(tokens, lineIndex) {
662
+ createCluster(tokens, lineIndex, originalLine = "") {
418
663
  const clusterId = `t${String(this.nextClusterId++).padStart(3, "0")}`;
419
- const cluster = new LogCluster(clusterId, tokens, lineIndex, this.maxSamples);
664
+ const cluster = new LogCluster(clusterId, tokens, lineIndex, this.maxSamples, originalLine);
420
665
  const tokenCount = tokens.length;
421
666
  const tokenCountKey = String(tokenCount);
422
667
  const lengthNode = this.root.getOrCreateChild(tokenCountKey);
@@ -481,7 +726,14 @@ var Drain = class {
481
726
  occurrences: cluster.count,
482
727
  sampleVariables: cluster.sampleVariables,
483
728
  firstSeen: cluster.firstSeen,
484
- lastSeen: cluster.lastSeen
729
+ lastSeen: cluster.lastSeen,
730
+ severity: cluster.severity,
731
+ urlSamples: cluster.urlSamples,
732
+ fullUrlSamples: cluster.fullUrlSamples,
733
+ statusCodeSamples: cluster.statusCodeSamples,
734
+ correlationIdSamples: cluster.correlationIdSamples,
735
+ durationSamples: cluster.durationSamples,
736
+ isStackFrame: cluster.isStackFrame
485
737
  }));
486
738
  }
487
739
  /**
@@ -553,9 +805,18 @@ function createDrain(options) {
553
805
  // src/api.ts
554
806
  function compress(lines, options = {}) {
555
807
  const { format = "summary", maxTemplates = 50, drain: drainOptions } = options;
808
+ const startTime = performance.now();
556
809
  const drain = createDrain(drainOptions);
557
810
  drain.addLogLines(lines);
558
- return drain.getResult(format, maxTemplates);
811
+ const result = drain.getResult(format, maxTemplates);
812
+ const processingTimeMs = Math.round(performance.now() - startTime);
813
+ return {
814
+ ...result,
815
+ stats: {
816
+ ...result.stats,
817
+ processingTimeMs
818
+ }
819
+ };
559
820
  }
560
821
  function compressText(text, options = {}) {
561
822
  const lines = text.split(/\r?\n/);
@@ -563,6 +824,11 @@ function compressText(text, options = {}) {
563
824
  }
564
825
 
565
826
  export {
827
+ SEVERITY_PATTERNS,
828
+ STACK_FRAME_PATTERNS,
829
+ detectSeverity,
830
+ isStackFrame,
831
+ extractUrls,
566
832
  DEFAULT_PATTERNS,
567
833
  WILDCARD,
568
834
  defineStrategy,
@@ -571,4 +837,4 @@ export {
571
837
  compress,
572
838
  compressText
573
839
  };
574
- //# sourceMappingURL=chunk-PVEO4LBX.js.map
840
+ //# sourceMappingURL=chunk-VVVVUJFY.js.map