logpare 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,259 @@
1
+ /**
2
+ * Strategy for preprocessing and parsing log lines.
3
+ * Decouples policy from mechanism for extensibility.
4
+ */
5
+ interface ParsingStrategy {
6
+ /** Transform line before parsing (masking, normalization) */
7
+ preprocess(line: string): string;
8
+ /** Split line into tokens */
9
+ tokenize(line: string): string[];
10
+ /** Dynamic similarity threshold (can vary by tree depth) */
11
+ getSimThreshold(depth: number): number;
12
+ }
13
+ /**
14
+ * A discovered log template.
15
+ */
16
+ interface Template {
17
+ /** Unique identifier for this template */
18
+ id: string;
19
+ /** Pattern string with variables replaced by <*> */
20
+ pattern: string;
21
+ /** Number of log lines matching this template */
22
+ occurrences: number;
23
+ /** Sample variable values from first N matches */
24
+ sampleVariables: string[][];
25
+ /** Line index of first occurrence */
26
+ firstSeen: number;
27
+ /** Line index of most recent occurrence */
28
+ lastSeen: number;
29
+ }
30
+ /**
31
+ * Result of log compression.
32
+ */
33
+ interface CompressionResult {
34
+ /** All discovered templates */
35
+ templates: Template[];
36
+ /** Compression statistics */
37
+ stats: {
38
+ /** Total input lines processed */
39
+ inputLines: number;
40
+ /** Number of unique templates discovered */
41
+ uniqueTemplates: number;
42
+ /** Compression ratio (0.0 - 1.0, higher = more compression) */
43
+ compressionRatio: number;
44
+ /** Estimated token reduction percentage */
45
+ estimatedTokenReduction: number;
46
+ };
47
+ /** Formatted output string */
48
+ formatted: string;
49
+ }
50
+ /**
51
+ * Configuration options for Drain algorithm.
52
+ */
53
+ interface DrainOptions {
54
+ /** Parse tree depth (excluding length/first-token levels). Default: 4 */
55
+ depth?: number;
56
+ /** Minimum similarity to match existing cluster (0.0-1.0). Default: 0.4 */
57
+ simThreshold?: number;
58
+ /** Maximum children per node (prevents explosion). Default: 100 */
59
+ maxChildren?: number;
60
+ /** Maximum total clusters (memory bound). Default: 1000 */
61
+ maxClusters?: number;
62
+ /** Maximum sample variables to store per template. Default: 3 */
63
+ maxSamples?: number;
64
+ /** Custom preprocessing strategy */
65
+ preprocessing?: ParsingStrategy;
66
+ }
67
+ /**
68
+ * Output format for compression results.
69
+ */
70
+ type OutputFormat = 'summary' | 'detailed' | 'json';
71
+ /**
72
+ * Options for the compress() function.
73
+ */
74
+ interface CompressOptions {
75
+ /** Output format. Default: 'summary' */
76
+ format?: OutputFormat;
77
+ /** Maximum templates to include in output. Default: 50 */
78
+ maxTemplates?: number;
79
+ /** Drain algorithm options */
80
+ drain?: DrainOptions;
81
+ }
82
+
83
+ /**
84
+ * Compress log lines by extracting templates.
85
+ *
86
+ * This is the main entry point for simple use cases.
87
+ * For more control, use `createDrain()` directly.
88
+ *
89
+ * @param lines - Array of log lines to compress
90
+ * @param options - Compression options
91
+ * @returns Compression result with templates and statistics
92
+ *
93
+ * @example
94
+ * ```typescript
95
+ * import { compress } from 'logpare';
96
+ *
97
+ * const logs = [
98
+ * 'Connection from 192.168.1.1 established',
99
+ * 'Connection from 192.168.1.2 established',
100
+ * 'Connection from 10.0.0.1 established',
101
+ * ];
102
+ *
103
+ * const result = compress(logs);
104
+ * console.log(result.formatted);
105
+ * // Output: [3x] Connection from <*> established
106
+ * ```
107
+ */
108
+ declare function compress(lines: string[], options?: CompressOptions): CompressionResult;
109
+ /**
110
+ * Compress a single string containing multiple log lines.
111
+ *
112
+ * @param text - Raw log text (lines separated by newlines)
113
+ * @param options - Compression options
114
+ * @returns Compression result with templates and statistics
115
+ */
116
+ declare function compressText(text: string, options?: CompressOptions): CompressionResult;
117
+
118
+ /**
119
+ * Represents a log cluster (template) discovered by the Drain algorithm.
120
+ *
121
+ * V8 Optimization: All properties are initialized in the constructor
122
+ * to ensure monomorphic object shapes for optimal property access.
123
+ */
124
+ declare class LogCluster {
125
+ /** Unique identifier for this cluster */
126
+ readonly id: string;
127
+ /** Template tokens (with wildcards for variable positions) */
128
+ readonly tokens: string[];
129
+ /** Number of log lines matching this template */
130
+ count: number;
131
+ /** Sample variable values from first N matches */
132
+ readonly sampleVariables: string[][];
133
+ /** Line index of first occurrence */
134
+ firstSeen: number;
135
+ /** Line index of most recent occurrence */
136
+ lastSeen: number;
137
+ /** Maximum number of sample variables to store */
138
+ private readonly maxSamples;
139
+ constructor(id: string, tokens: string[], lineIndex: number, maxSamples?: number);
140
+ /**
141
+ * Update the cluster with a new matching log line.
142
+ * Returns the variables extracted from this match.
143
+ */
144
+ update(tokens: string[], lineIndex: number): string[];
145
+ /**
146
+ * Get the template pattern as a string.
147
+ */
148
+ getPattern(): string;
149
+ /**
150
+ * Compute similarity between this cluster's template and a set of tokens.
151
+ * Returns a value between 0.0 and 1.0.
152
+ */
153
+ computeSimilarity(tokens: string[]): number;
154
+ /**
155
+ * Merge tokens into the template, converting differing positions to wildcards.
156
+ * Mutates the template tokens in place.
157
+ */
158
+ mergeTokens(tokens: string[]): void;
159
+ }
160
+
161
+ /**
162
+ * Drain algorithm implementation for log template mining.
163
+ *
164
+ * The algorithm constructs a fixed-depth parse tree to efficiently
165
+ * cluster log messages by their template structure.
166
+ *
167
+ * Tree Structure:
168
+ * - Level 0 (root): Entry point
169
+ * - Level 1: Token count (length of log message)
170
+ * - Level 2: First token of the message
171
+ * - Levels 3+: Subsequent tokens up to configured depth
172
+ * - Leaf: LogCluster containing the template
173
+ */
174
+ declare class Drain {
175
+ private readonly root;
176
+ private readonly clusters;
177
+ private readonly strategy;
178
+ private readonly depth;
179
+ private readonly maxChildren;
180
+ private readonly maxClusters;
181
+ private readonly maxSamples;
182
+ private lineCount;
183
+ private nextClusterId;
184
+ constructor(options?: DrainOptions);
185
+ /**
186
+ * Process a single log line.
187
+ */
188
+ addLogLine(line: string): LogCluster | null;
189
+ /**
190
+ * Process multiple log lines.
191
+ */
192
+ addLogLines(lines: string[]): void;
193
+ /**
194
+ * Search the parse tree for a matching cluster.
195
+ */
196
+ private treeSearch;
197
+ /**
198
+ * Find the best matching cluster at a node.
199
+ */
200
+ private findBestMatch;
201
+ /**
202
+ * Create a new cluster and add it to the tree.
203
+ */
204
+ private createCluster;
205
+ /**
206
+ * Determine if we should use a wildcard key for a token.
207
+ * Uses maxChildren limit to prevent tree explosion.
208
+ */
209
+ private shouldUseWildcard;
210
+ /**
211
+ * Heuristic to detect if a token looks like a variable value.
212
+ */
213
+ private looksLikeVariable;
214
+ /**
215
+ * Get all discovered templates.
216
+ */
217
+ getTemplates(): Template[];
218
+ /**
219
+ * Get compression result with formatted output.
220
+ */
221
+ getResult(format?: OutputFormat, maxTemplates?: number): CompressionResult;
222
+ /**
223
+ * Calculate compression statistics.
224
+ */
225
+ private calculateStats;
226
+ /**
227
+ * Get the number of lines processed.
228
+ */
229
+ get totalLines(): number;
230
+ /**
231
+ * Get the number of clusters (templates) discovered.
232
+ */
233
+ get totalClusters(): number;
234
+ }
235
+ /**
236
+ * Create a new Drain instance with the given options.
237
+ */
238
+ declare function createDrain(options?: DrainOptions): Drain;
239
+
240
+ /**
241
+ * Built-in regex patterns for common variable types.
242
+ * These are applied in order during preprocessing to mask variables.
243
+ * Order matters: more specific patterns (like timestamps) must run before
244
+ * patterns that could match substrings (like port numbers).
245
+ */
246
+ declare const DEFAULT_PATTERNS: Record<string, RegExp>;
247
+ /**
248
+ * Placeholder used when masking variables.
249
+ */
250
+ declare const WILDCARD = "<*>";
251
+
252
+ /**
253
+ * Create a custom parsing strategy by extending the default.
254
+ */
255
+ declare function defineStrategy(overrides: Partial<ParsingStrategy> & {
256
+ patterns?: Record<string, RegExp>;
257
+ }): ParsingStrategy;
258
+
259
+ export { type CompressOptions, type CompressionResult, DEFAULT_PATTERNS, Drain, type DrainOptions, type OutputFormat, type ParsingStrategy, type Template, WILDCARD, compress, compressText, createDrain, defineStrategy };
@@ -0,0 +1,259 @@
1
+ /**
2
+ * Strategy for preprocessing and parsing log lines.
3
+ * Decouples policy from mechanism for extensibility.
4
+ */
5
+ interface ParsingStrategy {
6
+ /** Transform line before parsing (masking, normalization) */
7
+ preprocess(line: string): string;
8
+ /** Split line into tokens */
9
+ tokenize(line: string): string[];
10
+ /** Dynamic similarity threshold (can vary by tree depth) */
11
+ getSimThreshold(depth: number): number;
12
+ }
13
+ /**
14
+ * A discovered log template.
15
+ */
16
+ interface Template {
17
+ /** Unique identifier for this template */
18
+ id: string;
19
+ /** Pattern string with variables replaced by <*> */
20
+ pattern: string;
21
+ /** Number of log lines matching this template */
22
+ occurrences: number;
23
+ /** Sample variable values from first N matches */
24
+ sampleVariables: string[][];
25
+ /** Line index of first occurrence */
26
+ firstSeen: number;
27
+ /** Line index of most recent occurrence */
28
+ lastSeen: number;
29
+ }
30
+ /**
31
+ * Result of log compression.
32
+ */
33
+ interface CompressionResult {
34
+ /** All discovered templates */
35
+ templates: Template[];
36
+ /** Compression statistics */
37
+ stats: {
38
+ /** Total input lines processed */
39
+ inputLines: number;
40
+ /** Number of unique templates discovered */
41
+ uniqueTemplates: number;
42
+ /** Compression ratio (0.0 - 1.0, higher = more compression) */
43
+ compressionRatio: number;
44
+ /** Estimated token reduction percentage */
45
+ estimatedTokenReduction: number;
46
+ };
47
+ /** Formatted output string */
48
+ formatted: string;
49
+ }
50
+ /**
51
+ * Configuration options for Drain algorithm.
52
+ */
53
+ interface DrainOptions {
54
+ /** Parse tree depth (excluding length/first-token levels). Default: 4 */
55
+ depth?: number;
56
+ /** Minimum similarity to match existing cluster (0.0-1.0). Default: 0.4 */
57
+ simThreshold?: number;
58
+ /** Maximum children per node (prevents explosion). Default: 100 */
59
+ maxChildren?: number;
60
+ /** Maximum total clusters (memory bound). Default: 1000 */
61
+ maxClusters?: number;
62
+ /** Maximum sample variables to store per template. Default: 3 */
63
+ maxSamples?: number;
64
+ /** Custom preprocessing strategy */
65
+ preprocessing?: ParsingStrategy;
66
+ }
67
+ /**
68
+ * Output format for compression results.
69
+ */
70
+ type OutputFormat = 'summary' | 'detailed' | 'json';
71
+ /**
72
+ * Options for the compress() function.
73
+ */
74
+ interface CompressOptions {
75
+ /** Output format. Default: 'summary' */
76
+ format?: OutputFormat;
77
+ /** Maximum templates to include in output. Default: 50 */
78
+ maxTemplates?: number;
79
+ /** Drain algorithm options */
80
+ drain?: DrainOptions;
81
+ }
82
+
83
+ /**
84
+ * Compress log lines by extracting templates.
85
+ *
86
+ * This is the main entry point for simple use cases.
87
+ * For more control, use `createDrain()` directly.
88
+ *
89
+ * @param lines - Array of log lines to compress
90
+ * @param options - Compression options
91
+ * @returns Compression result with templates and statistics
92
+ *
93
+ * @example
94
+ * ```typescript
95
+ * import { compress } from 'logpare';
96
+ *
97
+ * const logs = [
98
+ * 'Connection from 192.168.1.1 established',
99
+ * 'Connection from 192.168.1.2 established',
100
+ * 'Connection from 10.0.0.1 established',
101
+ * ];
102
+ *
103
+ * const result = compress(logs);
104
+ * console.log(result.formatted);
105
+ * // Output: [3x] Connection from <*> established
106
+ * ```
107
+ */
108
+ declare function compress(lines: string[], options?: CompressOptions): CompressionResult;
109
+ /**
110
+ * Compress a single string containing multiple log lines.
111
+ *
112
+ * @param text - Raw log text (lines separated by newlines)
113
+ * @param options - Compression options
114
+ * @returns Compression result with templates and statistics
115
+ */
116
+ declare function compressText(text: string, options?: CompressOptions): CompressionResult;
117
+
118
+ /**
119
+ * Represents a log cluster (template) discovered by the Drain algorithm.
120
+ *
121
+ * V8 Optimization: All properties are initialized in the constructor
122
+ * to ensure monomorphic object shapes for optimal property access.
123
+ */
124
+ declare class LogCluster {
125
+ /** Unique identifier for this cluster */
126
+ readonly id: string;
127
+ /** Template tokens (with wildcards for variable positions) */
128
+ readonly tokens: string[];
129
+ /** Number of log lines matching this template */
130
+ count: number;
131
+ /** Sample variable values from first N matches */
132
+ readonly sampleVariables: string[][];
133
+ /** Line index of first occurrence */
134
+ firstSeen: number;
135
+ /** Line index of most recent occurrence */
136
+ lastSeen: number;
137
+ /** Maximum number of sample variables to store */
138
+ private readonly maxSamples;
139
+ constructor(id: string, tokens: string[], lineIndex: number, maxSamples?: number);
140
+ /**
141
+ * Update the cluster with a new matching log line.
142
+ * Returns the variables extracted from this match.
143
+ */
144
+ update(tokens: string[], lineIndex: number): string[];
145
+ /**
146
+ * Get the template pattern as a string.
147
+ */
148
+ getPattern(): string;
149
+ /**
150
+ * Compute similarity between this cluster's template and a set of tokens.
151
+ * Returns a value between 0.0 and 1.0.
152
+ */
153
+ computeSimilarity(tokens: string[]): number;
154
+ /**
155
+ * Merge tokens into the template, converting differing positions to wildcards.
156
+ * Mutates the template tokens in place.
157
+ */
158
+ mergeTokens(tokens: string[]): void;
159
+ }
160
+
161
+ /**
162
+ * Drain algorithm implementation for log template mining.
163
+ *
164
+ * The algorithm constructs a fixed-depth parse tree to efficiently
165
+ * cluster log messages by their template structure.
166
+ *
167
+ * Tree Structure:
168
+ * - Level 0 (root): Entry point
169
+ * - Level 1: Token count (length of log message)
170
+ * - Level 2: First token of the message
171
+ * - Levels 3+: Subsequent tokens up to configured depth
172
+ * - Leaf: LogCluster containing the template
173
+ */
174
+ declare class Drain {
175
+ private readonly root;
176
+ private readonly clusters;
177
+ private readonly strategy;
178
+ private readonly depth;
179
+ private readonly maxChildren;
180
+ private readonly maxClusters;
181
+ private readonly maxSamples;
182
+ private lineCount;
183
+ private nextClusterId;
184
+ constructor(options?: DrainOptions);
185
+ /**
186
+ * Process a single log line.
187
+ */
188
+ addLogLine(line: string): LogCluster | null;
189
+ /**
190
+ * Process multiple log lines.
191
+ */
192
+ addLogLines(lines: string[]): void;
193
+ /**
194
+ * Search the parse tree for a matching cluster.
195
+ */
196
+ private treeSearch;
197
+ /**
198
+ * Find the best matching cluster at a node.
199
+ */
200
+ private findBestMatch;
201
+ /**
202
+ * Create a new cluster and add it to the tree.
203
+ */
204
+ private createCluster;
205
+ /**
206
+ * Determine if we should use a wildcard key for a token.
207
+ * Uses maxChildren limit to prevent tree explosion.
208
+ */
209
+ private shouldUseWildcard;
210
+ /**
211
+ * Heuristic to detect if a token looks like a variable value.
212
+ */
213
+ private looksLikeVariable;
214
+ /**
215
+ * Get all discovered templates.
216
+ */
217
+ getTemplates(): Template[];
218
+ /**
219
+ * Get compression result with formatted output.
220
+ */
221
+ getResult(format?: OutputFormat, maxTemplates?: number): CompressionResult;
222
+ /**
223
+ * Calculate compression statistics.
224
+ */
225
+ private calculateStats;
226
+ /**
227
+ * Get the number of lines processed.
228
+ */
229
+ get totalLines(): number;
230
+ /**
231
+ * Get the number of clusters (templates) discovered.
232
+ */
233
+ get totalClusters(): number;
234
+ }
235
+ /**
236
+ * Create a new Drain instance with the given options.
237
+ */
238
+ declare function createDrain(options?: DrainOptions): Drain;
239
+
240
+ /**
241
+ * Built-in regex patterns for common variable types.
242
+ * These are applied in order during preprocessing to mask variables.
243
+ * Order matters: more specific patterns (like timestamps) must run before
244
+ * patterns that could match substrings (like port numbers).
245
+ */
246
+ declare const DEFAULT_PATTERNS: Record<string, RegExp>;
247
+ /**
248
+ * Placeholder used when masking variables.
249
+ */
250
+ declare const WILDCARD = "<*>";
251
+
252
+ /**
253
+ * Create a custom parsing strategy by extending the default.
254
+ */
255
+ declare function defineStrategy(overrides: Partial<ParsingStrategy> & {
256
+ patterns?: Record<string, RegExp>;
257
+ }): ParsingStrategy;
258
+
259
+ export { type CompressOptions, type CompressionResult, DEFAULT_PATTERNS, Drain, type DrainOptions, type OutputFormat, type ParsingStrategy, type Template, WILDCARD, compress, compressText, createDrain, defineStrategy };
package/dist/index.js ADDED
@@ -0,0 +1,19 @@
1
+ import {
2
+ DEFAULT_PATTERNS,
3
+ Drain,
4
+ WILDCARD,
5
+ compress,
6
+ compressText,
7
+ createDrain,
8
+ defineStrategy
9
+ } from "./chunk-JRS36ZWP.js";
10
+ export {
11
+ DEFAULT_PATTERNS,
12
+ Drain,
13
+ WILDCARD,
14
+ compress,
15
+ compressText,
16
+ createDrain,
17
+ defineStrategy
18
+ };
19
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}
package/package.json CHANGED
@@ -1,23 +1,57 @@
1
1
  {
2
2
  "name": "logpare",
3
- "version": "0.0.1",
4
- "description": "Semantic log reduction for LLM debugging - see @logpare/mcp for the MCP server implementation",
5
- "main": "index.js",
3
+ "version": "0.0.3",
4
+ "description": "Semantic log compression that reduces repetitive events while preserving diagnostic information",
5
+ "type": "module",
6
+ "exports": {
7
+ ".": {
8
+ "import": "./dist/index.js",
9
+ "require": "./dist/index.cjs"
10
+ }
11
+ },
12
+ "main": "./dist/index.cjs",
13
+ "module": "./dist/index.js",
14
+ "types": "./dist/index.d.ts",
15
+ "files": [
16
+ "dist"
17
+ ],
18
+ "bin": {
19
+ "logpare": "./dist/cli.js"
20
+ },
21
+ "scripts": {
22
+ "build": "tsup",
23
+ "test": "vitest run",
24
+ "test:watch": "vitest",
25
+ "bench": "vitest bench",
26
+ "typecheck": "tsc --noEmit",
27
+ "prepublishOnly": "pnpm typecheck && pnpm test && pnpm build"
28
+ },
6
29
  "keywords": [
7
30
  "log",
8
31
  "compression",
32
+ "parsing",
33
+ "drain",
34
+ "template",
9
35
  "llm",
10
- "mcp",
11
- "semantic-reduction"
36
+ "mcp"
12
37
  ],
13
38
  "author": "Jeff Green <contact@hirejeffgreen.com>",
39
+ "homepage": "https://github.com/logpare/logpare",
40
+ "bugs": {
41
+ "url": "https://github.com/logpare/logpare/issues"
42
+ },
14
43
  "license": "MIT",
15
- "homepage": "https://github.com/logpare",
16
44
  "repository": {
17
45
  "type": "git",
18
- "url": "https://github.com/logpare/logpare-mcp"
46
+ "url": "https://github.com/logpare/logpare"
19
47
  },
20
- "bugs": {
21
- "url": "https://github.com/logpare/logpare-mcp/issues"
48
+ "engines": {
49
+ "node": "^20.0.0 || ^22.0.0 || >=24.0.0"
50
+ },
51
+ "devDependencies": {
52
+ "@types/node": "^22.10.0",
53
+ "tsup": "^8.5.1",
54
+ "typescript": "^5.9.3",
55
+ "vitest": "^4.0.16"
22
56
  }
23
57
  }
package/index.js DELETED
@@ -1 +0,0 @@
1
- module.exports = {};