logpare 0.0.1 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +309 -16
- package/dist/chunk-JRS36ZWP.js +574 -0
- package/dist/chunk-JRS36ZWP.js.map +1 -0
- package/dist/cli.cjs +729 -0
- package/dist/cli.cjs.map +1 -0
- package/dist/cli.d.cts +1 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +175 -0
- package/dist/cli.js.map +1 -0
- package/dist/index.cjs +606 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +259 -0
- package/dist/index.d.ts +259 -0
- package/dist/index.js +19 -0
- package/dist/index.js.map +1 -0
- package/package.json +43 -9
- package/index.js +0 -1
package/dist/cli.cjs
ADDED
|
@@ -0,0 +1,729 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
"use strict";
|
|
3
|
+
|
|
4
|
+
// src/cli.ts
|
|
5
|
+
var import_node_util = require("util");
|
|
6
|
+
var import_node_fs = require("fs");
|
|
7
|
+
var import_node_url = require("url");
|
|
8
|
+
var import_node_path = require("path");
|
|
9
|
+
|
|
10
|
+
// src/preprocessing/patterns.ts
|
|
11
|
+
var DEFAULT_PATTERNS = {
|
|
12
|
+
// Timestamps (most specific - must run before port to avoid fragmentation)
|
|
13
|
+
isoTimestamp: /\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(?:[.,]\d+)?(?:Z|[+-]\d{2}:?\d{2})?/g,
|
|
14
|
+
unixTimestamp: /\b\d{10,13}\b/g,
|
|
15
|
+
// Network addresses
|
|
16
|
+
ipv4: /\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b/g,
|
|
17
|
+
ipv6: /\b[0-9a-fA-F:]{7,39}\b/g,
|
|
18
|
+
port: /:\d{2,5}\b/g,
|
|
19
|
+
// Identifiers
|
|
20
|
+
uuid: /\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b/g,
|
|
21
|
+
hexId: /\b0x[0-9a-fA-F]+\b/g,
|
|
22
|
+
blockId: /\bblk_-?\d+\b/g,
|
|
23
|
+
// Paths and URLs
|
|
24
|
+
filePath: /(?:\/[\w.-]+)+/g,
|
|
25
|
+
url: /https?:\/\/[^\s]+/g,
|
|
26
|
+
// Numbers (applied last - most aggressive)
|
|
27
|
+
// Matches standalone numbers and numbers with units (e.g., 250ms, 1024KB)
|
|
28
|
+
numbers: /\b\d+(?:\.\d+)?[a-zA-Z]*\b/g
|
|
29
|
+
};
|
|
30
|
+
var WILDCARD = "<*>";
|
|
31
|
+
function applyPatterns(line, patterns, wildcard = WILDCARD) {
|
|
32
|
+
let result = line;
|
|
33
|
+
for (const pattern of Object.values(patterns)) {
|
|
34
|
+
const regex = new RegExp(pattern.source, pattern.flags);
|
|
35
|
+
result = result.replace(regex, wildcard);
|
|
36
|
+
}
|
|
37
|
+
return result;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// src/preprocessing/default.ts
|
|
41
|
+
var defaultStrategy = {
|
|
42
|
+
/**
|
|
43
|
+
* Preprocess a log line by masking common variable patterns.
|
|
44
|
+
*/
|
|
45
|
+
preprocess(line) {
|
|
46
|
+
return applyPatterns(line, DEFAULT_PATTERNS, WILDCARD);
|
|
47
|
+
},
|
|
48
|
+
/**
|
|
49
|
+
* Tokenize a line by splitting on whitespace.
|
|
50
|
+
*/
|
|
51
|
+
tokenize(line) {
|
|
52
|
+
return line.split(/\s+/).filter((token) => token.length > 0);
|
|
53
|
+
},
|
|
54
|
+
/**
|
|
55
|
+
* Get similarity threshold for a given tree depth.
|
|
56
|
+
* Uses a constant threshold of 0.4.
|
|
57
|
+
*/
|
|
58
|
+
getSimThreshold(_depth) {
|
|
59
|
+
return 0.4;
|
|
60
|
+
}
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
// src/drain/node.ts
|
|
64
|
+
var DrainNode = class _DrainNode {
|
|
65
|
+
/** Depth of this node in the tree (0 = root) */
|
|
66
|
+
depth;
|
|
67
|
+
/**
|
|
68
|
+
* Child nodes keyed by token value.
|
|
69
|
+
* Using Map instead of Object for V8 optimization.
|
|
70
|
+
*/
|
|
71
|
+
children;
|
|
72
|
+
/**
|
|
73
|
+
* Clusters (templates) stored at this node.
|
|
74
|
+
* Only leaf nodes contain clusters.
|
|
75
|
+
*/
|
|
76
|
+
clusters;
|
|
77
|
+
constructor(depth) {
|
|
78
|
+
this.depth = depth;
|
|
79
|
+
this.children = /* @__PURE__ */ new Map();
|
|
80
|
+
this.clusters = [];
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Get or create a child node for the given key.
|
|
84
|
+
*/
|
|
85
|
+
getOrCreateChild(key) {
|
|
86
|
+
let child = this.children.get(key);
|
|
87
|
+
if (child === void 0) {
|
|
88
|
+
child = new _DrainNode(this.depth + 1);
|
|
89
|
+
this.children.set(key, child);
|
|
90
|
+
}
|
|
91
|
+
return child;
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Check if this node has a child for the given key.
|
|
95
|
+
*/
|
|
96
|
+
hasChild(key) {
|
|
97
|
+
return this.children.has(key);
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Get a child node by key, or undefined if not found.
|
|
101
|
+
*/
|
|
102
|
+
getChild(key) {
|
|
103
|
+
return this.children.get(key);
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* Add a cluster to this node.
|
|
107
|
+
*/
|
|
108
|
+
addCluster(cluster) {
|
|
109
|
+
this.clusters.push(cluster);
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Get the number of children.
|
|
113
|
+
*/
|
|
114
|
+
get childCount() {
|
|
115
|
+
return this.children.size;
|
|
116
|
+
}
|
|
117
|
+
/**
|
|
118
|
+
* Get the number of clusters.
|
|
119
|
+
*/
|
|
120
|
+
get clusterCount() {
|
|
121
|
+
return this.clusters.length;
|
|
122
|
+
}
|
|
123
|
+
};
|
|
124
|
+
|
|
125
|
+
// src/drain/cluster.ts
|
|
126
|
+
var LogCluster = class {
|
|
127
|
+
/** Unique identifier for this cluster */
|
|
128
|
+
id;
|
|
129
|
+
/** Template tokens (with wildcards for variable positions) */
|
|
130
|
+
tokens;
|
|
131
|
+
/** Number of log lines matching this template */
|
|
132
|
+
count;
|
|
133
|
+
/** Sample variable values from first N matches */
|
|
134
|
+
sampleVariables;
|
|
135
|
+
/** Line index of first occurrence */
|
|
136
|
+
firstSeen;
|
|
137
|
+
/** Line index of most recent occurrence */
|
|
138
|
+
lastSeen;
|
|
139
|
+
/** Maximum number of sample variables to store */
|
|
140
|
+
maxSamples;
|
|
141
|
+
constructor(id, tokens, lineIndex, maxSamples = 3) {
|
|
142
|
+
this.id = id;
|
|
143
|
+
this.tokens = tokens.slice();
|
|
144
|
+
this.count = 1;
|
|
145
|
+
this.sampleVariables = [];
|
|
146
|
+
this.firstSeen = lineIndex;
|
|
147
|
+
this.lastSeen = lineIndex;
|
|
148
|
+
this.maxSamples = maxSamples;
|
|
149
|
+
}
|
|
150
|
+
/**
|
|
151
|
+
* Update the cluster with a new matching log line.
|
|
152
|
+
* Returns the variables extracted from this match.
|
|
153
|
+
*/
|
|
154
|
+
update(tokens, lineIndex) {
|
|
155
|
+
this.count++;
|
|
156
|
+
this.lastSeen = lineIndex;
|
|
157
|
+
const variables = [];
|
|
158
|
+
for (let i = 0; i < this.tokens.length && i < tokens.length; i++) {
|
|
159
|
+
if (this.tokens[i] === WILDCARD) {
|
|
160
|
+
variables.push(tokens[i] ?? "");
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
if (this.sampleVariables.length < this.maxSamples) {
|
|
164
|
+
this.sampleVariables.push(variables);
|
|
165
|
+
}
|
|
166
|
+
return variables;
|
|
167
|
+
}
|
|
168
|
+
/**
|
|
169
|
+
* Get the template pattern as a string.
|
|
170
|
+
*/
|
|
171
|
+
getPattern() {
|
|
172
|
+
return this.tokens.join(" ");
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* Compute similarity between this cluster's template and a set of tokens.
|
|
176
|
+
* Returns a value between 0.0 and 1.0.
|
|
177
|
+
*/
|
|
178
|
+
computeSimilarity(tokens) {
|
|
179
|
+
if (this.tokens.length === 0) {
|
|
180
|
+
return 0;
|
|
181
|
+
}
|
|
182
|
+
if (tokens.length !== this.tokens.length) {
|
|
183
|
+
return 0;
|
|
184
|
+
}
|
|
185
|
+
let matchCount = 0;
|
|
186
|
+
for (let i = 0; i < this.tokens.length; i++) {
|
|
187
|
+
const templateToken = this.tokens[i];
|
|
188
|
+
const inputToken = tokens[i];
|
|
189
|
+
if (templateToken === WILDCARD) {
|
|
190
|
+
matchCount++;
|
|
191
|
+
} else if (templateToken === inputToken) {
|
|
192
|
+
matchCount++;
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
return matchCount / this.tokens.length;
|
|
196
|
+
}
|
|
197
|
+
/**
|
|
198
|
+
* Merge tokens into the template, converting differing positions to wildcards.
|
|
199
|
+
* Mutates the template tokens in place.
|
|
200
|
+
*/
|
|
201
|
+
mergeTokens(tokens) {
|
|
202
|
+
for (let i = 0; i < this.tokens.length && i < tokens.length; i++) {
|
|
203
|
+
if (this.tokens[i] !== WILDCARD && this.tokens[i] !== tokens[i]) {
|
|
204
|
+
this.tokens[i] = WILDCARD;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
};
|
|
209
|
+
|
|
210
|
+
// src/output/formatter.ts
|
|
211
|
+
function formatSummary(templates, stats) {
|
|
212
|
+
const lines = [];
|
|
213
|
+
lines.push("=== Log Compression Summary ===");
|
|
214
|
+
lines.push(
|
|
215
|
+
`Input: ${stats.inputLines.toLocaleString()} lines \u2192 ${stats.uniqueTemplates} templates (${(stats.compressionRatio * 100).toFixed(1)}% reduction)`
|
|
216
|
+
);
|
|
217
|
+
lines.push("");
|
|
218
|
+
if (templates.length === 0) {
|
|
219
|
+
lines.push("No templates discovered.");
|
|
220
|
+
return lines.join("\n");
|
|
221
|
+
}
|
|
222
|
+
lines.push("Top templates by frequency:");
|
|
223
|
+
const topTemplates = templates.slice(0, 20);
|
|
224
|
+
topTemplates.forEach((template, index) => {
|
|
225
|
+
const count = template.occurrences.toLocaleString();
|
|
226
|
+
lines.push(`${index + 1}. [${count}x] ${template.pattern}`);
|
|
227
|
+
});
|
|
228
|
+
if (templates.length > 20) {
|
|
229
|
+
lines.push(`... and ${templates.length - 20} more templates`);
|
|
230
|
+
}
|
|
231
|
+
const rareTemplates = templates.filter((t) => t.occurrences <= 5);
|
|
232
|
+
if (rareTemplates.length > 0) {
|
|
233
|
+
lines.push("");
|
|
234
|
+
lines.push(`Rare events (\u22645 occurrences): ${rareTemplates.length} templates`);
|
|
235
|
+
const shownRare = rareTemplates.slice(0, 5);
|
|
236
|
+
for (const template of shownRare) {
|
|
237
|
+
lines.push(`- [${template.occurrences}x] ${template.pattern}`);
|
|
238
|
+
}
|
|
239
|
+
if (rareTemplates.length > 5) {
|
|
240
|
+
lines.push(`... and ${rareTemplates.length - 5} more rare templates`);
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
return lines.join("\n");
|
|
244
|
+
}
|
|
245
|
+
function formatDetailed(templates, stats) {
|
|
246
|
+
const lines = [];
|
|
247
|
+
lines.push("=== Log Compression Details ===");
|
|
248
|
+
lines.push(
|
|
249
|
+
`Input: ${stats.inputLines.toLocaleString()} lines \u2192 ${stats.uniqueTemplates} templates (${(stats.compressionRatio * 100).toFixed(1)}% reduction)`
|
|
250
|
+
);
|
|
251
|
+
lines.push(`Estimated token reduction: ${(stats.estimatedTokenReduction * 100).toFixed(1)}%`);
|
|
252
|
+
lines.push("");
|
|
253
|
+
if (templates.length === 0) {
|
|
254
|
+
lines.push("No templates discovered.");
|
|
255
|
+
return lines.join("\n");
|
|
256
|
+
}
|
|
257
|
+
for (const template of templates) {
|
|
258
|
+
lines.push(`=== Template ${template.id} (${template.occurrences.toLocaleString()} occurrences) ===`);
|
|
259
|
+
lines.push(`Pattern: ${template.pattern}`);
|
|
260
|
+
lines.push(`First seen: line ${template.firstSeen + 1}`);
|
|
261
|
+
lines.push(`Last seen: line ${template.lastSeen + 1}`);
|
|
262
|
+
if (template.sampleVariables.length > 0) {
|
|
263
|
+
lines.push("Sample variables:");
|
|
264
|
+
for (const vars of template.sampleVariables) {
|
|
265
|
+
if (vars.length > 0) {
|
|
266
|
+
lines.push(` - ${vars.join(", ")}`);
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
lines.push("");
|
|
271
|
+
}
|
|
272
|
+
return lines.join("\n");
|
|
273
|
+
}
|
|
274
|
+
function formatJson(templates, stats) {
|
|
275
|
+
const output = {
|
|
276
|
+
version: "1.0",
|
|
277
|
+
stats: {
|
|
278
|
+
inputLines: stats.inputLines,
|
|
279
|
+
uniqueTemplates: stats.uniqueTemplates,
|
|
280
|
+
compressionRatio: Math.round(stats.compressionRatio * 1e3) / 1e3,
|
|
281
|
+
estimatedTokenReduction: Math.round(stats.estimatedTokenReduction * 1e3) / 1e3
|
|
282
|
+
},
|
|
283
|
+
templates: templates.map((t) => ({
|
|
284
|
+
id: t.id,
|
|
285
|
+
pattern: t.pattern,
|
|
286
|
+
occurrences: t.occurrences,
|
|
287
|
+
samples: t.sampleVariables,
|
|
288
|
+
firstSeen: t.firstSeen,
|
|
289
|
+
lastSeen: t.lastSeen
|
|
290
|
+
}))
|
|
291
|
+
};
|
|
292
|
+
return JSON.stringify(output, null, 2);
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// src/drain/drain.ts
|
|
296
|
+
var DEFAULTS = {
|
|
297
|
+
depth: 4,
|
|
298
|
+
simThreshold: 0.4,
|
|
299
|
+
maxChildren: 100,
|
|
300
|
+
maxClusters: 1e3,
|
|
301
|
+
maxSamples: 3
|
|
302
|
+
};
|
|
303
|
+
var WILDCARD_KEY = "<WILDCARD>";
|
|
304
|
+
var Drain = class {
|
|
305
|
+
root;
|
|
306
|
+
clusters;
|
|
307
|
+
strategy;
|
|
308
|
+
depth;
|
|
309
|
+
maxChildren;
|
|
310
|
+
maxClusters;
|
|
311
|
+
maxSamples;
|
|
312
|
+
lineCount;
|
|
313
|
+
nextClusterId;
|
|
314
|
+
constructor(options = {}) {
|
|
315
|
+
this.root = new DrainNode(0);
|
|
316
|
+
this.clusters = [];
|
|
317
|
+
this.strategy = options.preprocessing ?? defaultStrategy;
|
|
318
|
+
this.depth = options.depth ?? DEFAULTS.depth;
|
|
319
|
+
this.maxChildren = options.maxChildren ?? DEFAULTS.maxChildren;
|
|
320
|
+
this.maxClusters = options.maxClusters ?? DEFAULTS.maxClusters;
|
|
321
|
+
this.maxSamples = options.maxSamples ?? DEFAULTS.maxSamples;
|
|
322
|
+
this.lineCount = 0;
|
|
323
|
+
this.nextClusterId = 1;
|
|
324
|
+
}
|
|
325
|
+
/**
|
|
326
|
+
* Process a single log line.
|
|
327
|
+
*/
|
|
328
|
+
addLogLine(line) {
|
|
329
|
+
const lineIndex = this.lineCount++;
|
|
330
|
+
const trimmed = line.trim();
|
|
331
|
+
if (trimmed.length === 0) {
|
|
332
|
+
return null;
|
|
333
|
+
}
|
|
334
|
+
const preprocessed = this.strategy.preprocess(trimmed);
|
|
335
|
+
const tokens = this.strategy.tokenize(preprocessed);
|
|
336
|
+
if (tokens.length === 0) {
|
|
337
|
+
return null;
|
|
338
|
+
}
|
|
339
|
+
const matchedCluster = this.treeSearch(tokens);
|
|
340
|
+
if (matchedCluster !== null) {
|
|
341
|
+
matchedCluster.update(tokens, lineIndex);
|
|
342
|
+
matchedCluster.mergeTokens(tokens);
|
|
343
|
+
return matchedCluster;
|
|
344
|
+
}
|
|
345
|
+
if (this.clusters.length >= this.maxClusters) {
|
|
346
|
+
return null;
|
|
347
|
+
}
|
|
348
|
+
return this.createCluster(tokens, lineIndex);
|
|
349
|
+
}
|
|
350
|
+
/**
|
|
351
|
+
* Process multiple log lines.
|
|
352
|
+
*/
|
|
353
|
+
addLogLines(lines) {
|
|
354
|
+
for (const line of lines) {
|
|
355
|
+
this.addLogLine(line);
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
/**
|
|
359
|
+
* Search the parse tree for a matching cluster.
|
|
360
|
+
*/
|
|
361
|
+
treeSearch(tokens) {
|
|
362
|
+
const tokenCount = tokens.length;
|
|
363
|
+
const tokenCountKey = String(tokenCount);
|
|
364
|
+
const lengthNode = this.root.getChild(tokenCountKey);
|
|
365
|
+
if (lengthNode === void 0) {
|
|
366
|
+
return null;
|
|
367
|
+
}
|
|
368
|
+
const firstToken = tokens[0];
|
|
369
|
+
if (firstToken === void 0) {
|
|
370
|
+
return null;
|
|
371
|
+
}
|
|
372
|
+
let currentNode = lengthNode.getChild(firstToken);
|
|
373
|
+
if (currentNode === void 0) {
|
|
374
|
+
currentNode = lengthNode.getChild(WILDCARD_KEY);
|
|
375
|
+
}
|
|
376
|
+
if (currentNode === void 0) {
|
|
377
|
+
return null;
|
|
378
|
+
}
|
|
379
|
+
let searchNode = currentNode;
|
|
380
|
+
for (let i = 1; i < Math.min(tokens.length, this.depth); i++) {
|
|
381
|
+
const token = tokens[i];
|
|
382
|
+
if (token === void 0) {
|
|
383
|
+
break;
|
|
384
|
+
}
|
|
385
|
+
let nextNode = searchNode.getChild(token);
|
|
386
|
+
if (nextNode === void 0) {
|
|
387
|
+
nextNode = searchNode.getChild(WILDCARD_KEY);
|
|
388
|
+
}
|
|
389
|
+
if (nextNode === void 0) {
|
|
390
|
+
break;
|
|
391
|
+
}
|
|
392
|
+
searchNode = nextNode;
|
|
393
|
+
}
|
|
394
|
+
return this.findBestMatch(searchNode, tokens);
|
|
395
|
+
}
|
|
396
|
+
/**
|
|
397
|
+
* Find the best matching cluster at a node.
|
|
398
|
+
*/
|
|
399
|
+
findBestMatch(node, tokens) {
|
|
400
|
+
let bestCluster = null;
|
|
401
|
+
let bestSimilarity = 0;
|
|
402
|
+
const threshold = this.strategy.getSimThreshold(node.depth);
|
|
403
|
+
for (const cluster of node.clusters) {
|
|
404
|
+
const similarity = cluster.computeSimilarity(tokens);
|
|
405
|
+
if (similarity >= threshold && similarity > bestSimilarity) {
|
|
406
|
+
bestSimilarity = similarity;
|
|
407
|
+
bestCluster = cluster;
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
return bestCluster;
|
|
411
|
+
}
|
|
412
|
+
/**
|
|
413
|
+
* Create a new cluster and add it to the tree.
|
|
414
|
+
*/
|
|
415
|
+
createCluster(tokens, lineIndex) {
|
|
416
|
+
const clusterId = `t${String(this.nextClusterId++).padStart(3, "0")}`;
|
|
417
|
+
const cluster = new LogCluster(clusterId, tokens, lineIndex, this.maxSamples);
|
|
418
|
+
const tokenCount = tokens.length;
|
|
419
|
+
const tokenCountKey = String(tokenCount);
|
|
420
|
+
const lengthNode = this.root.getOrCreateChild(tokenCountKey);
|
|
421
|
+
const firstToken = tokens[0];
|
|
422
|
+
if (firstToken === void 0) {
|
|
423
|
+
this.clusters.push(cluster);
|
|
424
|
+
return cluster;
|
|
425
|
+
}
|
|
426
|
+
const firstKey = this.shouldUseWildcard(lengthNode, firstToken) ? WILDCARD_KEY : firstToken;
|
|
427
|
+
let currentNode = lengthNode.getOrCreateChild(firstKey);
|
|
428
|
+
for (let i = 1; i < Math.min(tokens.length, this.depth); i++) {
|
|
429
|
+
const token = tokens[i];
|
|
430
|
+
if (token === void 0) {
|
|
431
|
+
break;
|
|
432
|
+
}
|
|
433
|
+
const key = this.shouldUseWildcard(currentNode, token) ? WILDCARD_KEY : token;
|
|
434
|
+
currentNode = currentNode.getOrCreateChild(key);
|
|
435
|
+
}
|
|
436
|
+
currentNode.addCluster(cluster);
|
|
437
|
+
this.clusters.push(cluster);
|
|
438
|
+
return cluster;
|
|
439
|
+
}
|
|
440
|
+
/**
|
|
441
|
+
* Determine if we should use a wildcard key for a token.
|
|
442
|
+
* Uses maxChildren limit to prevent tree explosion.
|
|
443
|
+
*/
|
|
444
|
+
shouldUseWildcard(node, token) {
|
|
445
|
+
if (node.hasChild(token)) {
|
|
446
|
+
return false;
|
|
447
|
+
}
|
|
448
|
+
if (node.hasChild(WILDCARD_KEY) && node.childCount >= this.maxChildren) {
|
|
449
|
+
return true;
|
|
450
|
+
}
|
|
451
|
+
if (this.looksLikeVariable(token)) {
|
|
452
|
+
return true;
|
|
453
|
+
}
|
|
454
|
+
return false;
|
|
455
|
+
}
|
|
456
|
+
/**
|
|
457
|
+
* Heuristic to detect if a token looks like a variable value.
|
|
458
|
+
*/
|
|
459
|
+
looksLikeVariable(token) {
|
|
460
|
+
if (token === WILDCARD) {
|
|
461
|
+
return true;
|
|
462
|
+
}
|
|
463
|
+
const firstChar = token.charAt(0);
|
|
464
|
+
if (firstChar >= "0" && firstChar <= "9") {
|
|
465
|
+
return true;
|
|
466
|
+
}
|
|
467
|
+
if (/^[0-9a-fA-F]+$/.test(token) && token.length > 8) {
|
|
468
|
+
return true;
|
|
469
|
+
}
|
|
470
|
+
return false;
|
|
471
|
+
}
|
|
472
|
+
/**
|
|
473
|
+
* Get all discovered templates.
|
|
474
|
+
*/
|
|
475
|
+
getTemplates() {
|
|
476
|
+
return this.clusters.map((cluster) => ({
|
|
477
|
+
id: cluster.id,
|
|
478
|
+
pattern: cluster.getPattern(),
|
|
479
|
+
occurrences: cluster.count,
|
|
480
|
+
sampleVariables: cluster.sampleVariables,
|
|
481
|
+
firstSeen: cluster.firstSeen,
|
|
482
|
+
lastSeen: cluster.lastSeen
|
|
483
|
+
}));
|
|
484
|
+
}
|
|
485
|
+
/**
|
|
486
|
+
* Get compression result with formatted output.
|
|
487
|
+
*/
|
|
488
|
+
getResult(format = "summary", maxTemplates = 50) {
|
|
489
|
+
const templates = this.getTemplates();
|
|
490
|
+
templates.sort((a, b) => b.occurrences - a.occurrences);
|
|
491
|
+
const limitedTemplates = templates.slice(0, maxTemplates);
|
|
492
|
+
const stats = this.calculateStats(templates);
|
|
493
|
+
let formatted;
|
|
494
|
+
switch (format) {
|
|
495
|
+
case "detailed":
|
|
496
|
+
formatted = formatDetailed(limitedTemplates, stats);
|
|
497
|
+
break;
|
|
498
|
+
case "json":
|
|
499
|
+
formatted = formatJson(limitedTemplates, stats);
|
|
500
|
+
break;
|
|
501
|
+
case "summary":
|
|
502
|
+
default:
|
|
503
|
+
formatted = formatSummary(limitedTemplates, stats);
|
|
504
|
+
break;
|
|
505
|
+
}
|
|
506
|
+
return {
|
|
507
|
+
templates: limitedTemplates,
|
|
508
|
+
stats,
|
|
509
|
+
formatted
|
|
510
|
+
};
|
|
511
|
+
}
|
|
512
|
+
/**
|
|
513
|
+
* Calculate compression statistics.
|
|
514
|
+
*/
|
|
515
|
+
calculateStats(templates) {
|
|
516
|
+
const inputLines = this.lineCount;
|
|
517
|
+
const uniqueTemplates = templates.length;
|
|
518
|
+
const compressionRatio = inputLines > 0 ? 1 - uniqueTemplates / inputLines : 0;
|
|
519
|
+
let originalChars = 0;
|
|
520
|
+
let compressedChars = 0;
|
|
521
|
+
for (const template of templates) {
|
|
522
|
+
const patternLength = template.pattern.length;
|
|
523
|
+
originalChars += patternLength * template.occurrences;
|
|
524
|
+
compressedChars += patternLength + 20;
|
|
525
|
+
}
|
|
526
|
+
const estimatedTokenReduction = originalChars > 0 ? 1 - compressedChars / originalChars : 0;
|
|
527
|
+
return {
|
|
528
|
+
inputLines,
|
|
529
|
+
uniqueTemplates,
|
|
530
|
+
compressionRatio: Math.max(0, Math.min(1, compressionRatio)),
|
|
531
|
+
estimatedTokenReduction: Math.max(0, Math.min(1, estimatedTokenReduction))
|
|
532
|
+
};
|
|
533
|
+
}
|
|
534
|
+
/**
|
|
535
|
+
* Get the number of lines processed.
|
|
536
|
+
*/
|
|
537
|
+
get totalLines() {
|
|
538
|
+
return this.lineCount;
|
|
539
|
+
}
|
|
540
|
+
/**
|
|
541
|
+
* Get the number of clusters (templates) discovered.
|
|
542
|
+
*/
|
|
543
|
+
get totalClusters() {
|
|
544
|
+
return this.clusters.length;
|
|
545
|
+
}
|
|
546
|
+
};
|
|
547
|
+
function createDrain(options) {
|
|
548
|
+
return new Drain(options);
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
// src/api.ts
|
|
552
|
+
function compress(lines, options = {}) {
|
|
553
|
+
const { format = "summary", maxTemplates = 50, drain: drainOptions } = options;
|
|
554
|
+
const drain = createDrain(drainOptions);
|
|
555
|
+
drain.addLogLines(lines);
|
|
556
|
+
return drain.getResult(format, maxTemplates);
|
|
557
|
+
}
|
|
558
|
+
function compressText(text, options = {}) {
|
|
559
|
+
const lines = text.split(/\r?\n/);
|
|
560
|
+
return compress(lines, options);
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
// src/cli.ts
|
|
564
|
+
var import_meta = {};
|
|
565
|
+
var __dirname = (0, import_node_path.dirname)((0, import_node_url.fileURLToPath)(import_meta.url));
|
|
566
|
+
var pkg = JSON.parse((0, import_node_fs.readFileSync)((0, import_node_path.join)(__dirname, "..", "package.json"), "utf-8"));
|
|
567
|
+
var VERSION = pkg.version;
|
|
568
|
+
var HELP = `
|
|
569
|
+
logpare - Semantic log compression for LLM context windows
|
|
570
|
+
|
|
571
|
+
USAGE:
|
|
572
|
+
logpare [options] [file...]
|
|
573
|
+
cat logs.txt | logpare [options]
|
|
574
|
+
|
|
575
|
+
OPTIONS:
|
|
576
|
+
-f, --format <fmt> Output format: summary, detailed, json (default: summary)
|
|
577
|
+
-o, --output <file> Write output to file instead of stdout
|
|
578
|
+
-d, --depth <n> Parse tree depth (default: 4)
|
|
579
|
+
-t, --threshold <n> Similarity threshold 0.0-1.0 (default: 0.4)
|
|
580
|
+
-c, --max-children <n> Max children per node (default: 100)
|
|
581
|
+
-m, --max-clusters <n> Max total clusters (default: 1000)
|
|
582
|
+
-n, --max-templates <n> Max templates in output (default: 50)
|
|
583
|
+
-h, --help Show this help message
|
|
584
|
+
-v, --version Show version number
|
|
585
|
+
|
|
586
|
+
EXAMPLES:
|
|
587
|
+
logpare server.log
|
|
588
|
+
cat /var/log/syslog | logpare --format json
|
|
589
|
+
logpare --depth 5 --threshold 0.5 app.log -o templates.txt
|
|
590
|
+
logpare access.log error.log --format detailed
|
|
591
|
+
|
|
592
|
+
DESCRIPTION:
|
|
593
|
+
logpare uses the Drain algorithm to extract templates from repetitive log
|
|
594
|
+
data, achieving 60-90% token reduction while preserving diagnostic information.
|
|
595
|
+
This is useful for fitting more log context into LLM prompts.
|
|
596
|
+
|
|
597
|
+
For more information: https://github.com/logpare/logpare
|
|
598
|
+
`;
|
|
599
|
+
function parseCliArgs() {
|
|
600
|
+
const { values, positionals } = (0, import_node_util.parseArgs)({
|
|
601
|
+
options: {
|
|
602
|
+
format: { type: "string", short: "f", default: "summary" },
|
|
603
|
+
output: { type: "string", short: "o" },
|
|
604
|
+
depth: { type: "string", short: "d", default: "4" },
|
|
605
|
+
threshold: { type: "string", short: "t", default: "0.4" },
|
|
606
|
+
"max-children": { type: "string", short: "c", default: "100" },
|
|
607
|
+
"max-clusters": { type: "string", short: "m", default: "1000" },
|
|
608
|
+
"max-templates": { type: "string", short: "n", default: "50" },
|
|
609
|
+
help: { type: "boolean", short: "h", default: false },
|
|
610
|
+
version: { type: "boolean", short: "v", default: false }
|
|
611
|
+
},
|
|
612
|
+
allowPositionals: true
|
|
613
|
+
});
|
|
614
|
+
const format = values.format;
|
|
615
|
+
if (!["summary", "detailed", "json"].includes(format)) {
|
|
616
|
+
console.error(
|
|
617
|
+
`Error: Invalid format "${format}". Use: summary, detailed, json`
|
|
618
|
+
);
|
|
619
|
+
process.exit(1);
|
|
620
|
+
}
|
|
621
|
+
const depth = parseInt(values.depth, 10);
|
|
622
|
+
const threshold = parseFloat(values.threshold);
|
|
623
|
+
const maxChildren = parseInt(values["max-children"], 10);
|
|
624
|
+
const maxClusters = parseInt(values["max-clusters"], 10);
|
|
625
|
+
const maxTemplates = parseInt(values["max-templates"], 10);
|
|
626
|
+
if (isNaN(depth) || depth < 1) {
|
|
627
|
+
console.error(
|
|
628
|
+
`Error: Invalid depth "${values.depth}". Must be a positive integer.`
|
|
629
|
+
);
|
|
630
|
+
process.exit(1);
|
|
631
|
+
}
|
|
632
|
+
if (isNaN(threshold) || threshold < 0 || threshold > 1) {
|
|
633
|
+
console.error(
|
|
634
|
+
`Error: Invalid threshold "${values.threshold}". Must be a number between 0.0 and 1.0.`
|
|
635
|
+
);
|
|
636
|
+
process.exit(1);
|
|
637
|
+
}
|
|
638
|
+
if (isNaN(maxChildren) || maxChildren < 1) {
|
|
639
|
+
console.error(
|
|
640
|
+
`Error: Invalid max-children "${values["max-children"]}". Must be a positive integer.`
|
|
641
|
+
);
|
|
642
|
+
process.exit(1);
|
|
643
|
+
}
|
|
644
|
+
if (isNaN(maxClusters) || maxClusters < 1) {
|
|
645
|
+
console.error(
|
|
646
|
+
`Error: Invalid max-clusters "${values["max-clusters"]}". Must be a positive integer.`
|
|
647
|
+
);
|
|
648
|
+
process.exit(1);
|
|
649
|
+
}
|
|
650
|
+
if (isNaN(maxTemplates) || maxTemplates < 1) {
|
|
651
|
+
console.error(
|
|
652
|
+
`Error: Invalid max-templates "${values["max-templates"]}". Must be a positive integer.`
|
|
653
|
+
);
|
|
654
|
+
process.exit(1);
|
|
655
|
+
}
|
|
656
|
+
return {
|
|
657
|
+
format,
|
|
658
|
+
output: values.output,
|
|
659
|
+
depth,
|
|
660
|
+
threshold,
|
|
661
|
+
maxChildren,
|
|
662
|
+
maxClusters,
|
|
663
|
+
maxTemplates,
|
|
664
|
+
files: positionals,
|
|
665
|
+
help: values.help,
|
|
666
|
+
version: values.version
|
|
667
|
+
};
|
|
668
|
+
}
|
|
669
|
+
function readInput(files) {
|
|
670
|
+
if (files.length > 0) {
|
|
671
|
+
const contents = [];
|
|
672
|
+
for (const file of files) {
|
|
673
|
+
if (!(0, import_node_fs.existsSync)(file)) {
|
|
674
|
+
console.error(`Error: File not found: ${file}`);
|
|
675
|
+
process.exit(1);
|
|
676
|
+
}
|
|
677
|
+
contents.push((0, import_node_fs.readFileSync)(file, "utf-8"));
|
|
678
|
+
}
|
|
679
|
+
return contents.join("\n");
|
|
680
|
+
}
|
|
681
|
+
if (process.stdin.isTTY) {
|
|
682
|
+
console.error(
|
|
683
|
+
"Error: No input provided. Provide file(s) or pipe input via stdin."
|
|
684
|
+
);
|
|
685
|
+
console.error('Run "logpare --help" for usage information.');
|
|
686
|
+
process.exit(1);
|
|
687
|
+
}
|
|
688
|
+
return (0, import_node_fs.readFileSync)(0, "utf-8");
|
|
689
|
+
}
|
|
690
|
+
function main() {
|
|
691
|
+
const args = parseCliArgs();
|
|
692
|
+
if (args.help) {
|
|
693
|
+
console.log(HELP);
|
|
694
|
+
process.exit(0);
|
|
695
|
+
}
|
|
696
|
+
if (args.version) {
|
|
697
|
+
console.log(`logpare v${VERSION}`);
|
|
698
|
+
process.exit(0);
|
|
699
|
+
}
|
|
700
|
+
const input = readInput(args.files);
|
|
701
|
+
if (!input.trim()) {
|
|
702
|
+
console.error("Error: Empty input");
|
|
703
|
+
process.exit(1);
|
|
704
|
+
}
|
|
705
|
+
const options = {
|
|
706
|
+
format: args.format,
|
|
707
|
+
maxTemplates: args.maxTemplates,
|
|
708
|
+
drain: {
|
|
709
|
+
depth: args.depth,
|
|
710
|
+
simThreshold: args.threshold,
|
|
711
|
+
maxChildren: args.maxChildren,
|
|
712
|
+
maxClusters: args.maxClusters
|
|
713
|
+
}
|
|
714
|
+
};
|
|
715
|
+
const result = compressText(input, options);
|
|
716
|
+
const output = args.format === "json" ? JSON.stringify(
|
|
717
|
+
{ templates: result.templates, stats: result.stats },
|
|
718
|
+
null,
|
|
719
|
+
2
|
|
720
|
+
) : result.formatted;
|
|
721
|
+
if (args.output) {
|
|
722
|
+
(0, import_node_fs.writeFileSync)(args.output, output, "utf-8");
|
|
723
|
+
console.error(`Output written to ${args.output}`);
|
|
724
|
+
} else {
|
|
725
|
+
console.log(output);
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
main();
|
|
729
|
+
//# sourceMappingURL=cli.cjs.map
|