agentic-log-analyser 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentic_log_analyser-0.1.0/LICENSE +21 -0
- agentic_log_analyser-0.1.0/PKG-INFO +290 -0
- agentic_log_analyser-0.1.0/README.md +270 -0
- agentic_log_analyser-0.1.0/pyproject.toml +32 -0
- agentic_log_analyser-0.1.0/setup.cfg +4 -0
- agentic_log_analyser-0.1.0/src/agentic_log_analyser/__init__.py +37 -0
- agentic_log_analyser-0.1.0/src/agentic_log_analyser/__main__.py +6 -0
- agentic_log_analyser-0.1.0/src/agentic_log_analyser/_entry.py +9 -0
- agentic_log_analyser-0.1.0/src/agentic_log_analyser/_mcp_entry.py +6 -0
- agentic_log_analyser-0.1.0/src/agentic_log_analyser/cli.py +124 -0
- agentic_log_analyser-0.1.0/src/agentic_log_analyser/compress.py +242 -0
- agentic_log_analyser-0.1.0/src/agentic_log_analyser/grouper.py +397 -0
- agentic_log_analyser-0.1.0/src/agentic_log_analyser/input.py +126 -0
- agentic_log_analyser-0.1.0/src/agentic_log_analyser/lex.py +121 -0
- agentic_log_analyser-0.1.0/src/agentic_log_analyser/mcp_server.py +373 -0
- agentic_log_analyser-0.1.0/src/agentic_log_analyser/profile.py +143 -0
- agentic_log_analyser-0.1.0/src/agentic_log_analyser/stream.py +37 -0
- agentic_log_analyser-0.1.0/src/agentic_log_analyser/template.py +121 -0
- agentic_log_analyser-0.1.0/src/agentic_log_analyser.egg-info/PKG-INFO +290 -0
- agentic_log_analyser-0.1.0/src/agentic_log_analyser.egg-info/SOURCES.txt +25 -0
- agentic_log_analyser-0.1.0/src/agentic_log_analyser.egg-info/dependency_links.txt +1 -0
- agentic_log_analyser-0.1.0/src/agentic_log_analyser.egg-info/entry_points.txt +3 -0
- agentic_log_analyser-0.1.0/src/agentic_log_analyser.egg-info/requires.txt +12 -0
- agentic_log_analyser-0.1.0/src/agentic_log_analyser.egg-info/top_level.txt +1 -0
- agentic_log_analyser-0.1.0/tests/test_compress.py +128 -0
- agentic_log_analyser-0.1.0/tests/test_input.py +49 -0
- agentic_log_analyser-0.1.0/tests/test_mcp_server.py +93 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 KiroAINativeCoder
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: agentic-log-analyser
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Deterministic log templating on top of Drain3, packaged as an artifact for AI agents.
|
|
5
|
+
Author: agentic-log-analyser
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.9
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Requires-Dist: drain3>=0.9.11
|
|
11
|
+
Requires-Dist: mcp>=1.2
|
|
12
|
+
Requires-Dist: boto3>=1.34
|
|
13
|
+
Provides-Extra: build
|
|
14
|
+
Requires-Dist: pyinstaller>=6.0; extra == "build"
|
|
15
|
+
Provides-Extra: dev
|
|
16
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
17
|
+
Provides-Extra: mcp
|
|
18
|
+
Requires-Dist: mcp[cli]>=1.2; extra == "mcp"
|
|
19
|
+
Dynamic: license-file
|
|
20
|
+
|
|
21
|
+
# AgenticAILogAnalyser
|
|
22
|
+
|
|
23
|
+
Python port of [codag-drain](https://github.com/codag-megalith/codag-drain) that
|
|
24
|
+
uses the upstream Python [Drain3](https://github.com/logpai/Drain3) package as
|
|
25
|
+
its grouping engine. Same CLI surface, same output shape, same evidence-rich
|
|
26
|
+
artifact, packaged as a single binary you can drop into any environment.
|
|
27
|
+
|
|
28
|
+
The intended consumer is an AI agent that needs to read a large log window
|
|
29
|
+
under a fixed token budget. Instead of feeding the agent 1,400 raw lines, you
|
|
30
|
+
feed it 8 templates with slot statistics and a few raw examples per group.
|
|
31
|
+
|
|
32
|
+
## What it does
|
|
33
|
+
|
|
34
|
+
Takes a stream of log lines on stdin, groups near-duplicates with Drain3, and
|
|
35
|
+
emits one templated line per group with:
|
|
36
|
+
|
|
37
|
+
- the count of collapsed lines,
|
|
38
|
+
- a derived `<*>` template,
|
|
39
|
+
- per-slot stats (min / max / median for numeric slots, distinct values for
|
|
40
|
+
enums, an auto-detected unit like `ms` or `MB`),
|
|
41
|
+
- a few raw sample lines.
|
|
42
|
+
|
|
43
|
+
The intended consumer is an LLM agent that needs to read a large log window
|
|
44
|
+
under a fixed token budget.
|
|
45
|
+
|
|
46
|
+
## Real-world example
|
|
47
|
+
|
|
48
|
+
A 1,438-line Kiro IDE log compresses to 8 templates at ~180x compression:
|
|
49
|
+
|
|
50
|
+
```
|
|
51
|
+
[x1] [WebviewProcessMonitor] Service starting
|
|
52
|
+
[x4] update#setState <*> [idle,downloading,downloaded,ready]
|
|
53
|
+
[x14] [WebviewProcessMonitor] Tracking webview renderer: pid=<*>, origin=<*>, windowId=<*> [13773..87619 p50=87288.5]
|
|
54
|
+
[x1] update#setState checking for updates
|
|
55
|
+
[x14] Extension host with pid <*> exited with code: 0, signal: unknown. [13697..89755 p50=73921]
|
|
56
|
+
[x1395] No ptyHost heartbeat after 6 seconds
|
|
57
|
+
[x8] [WebviewProcessMonitor] Webview renderer process gone: pid=<*>
|
|
58
|
+
[x1] Extracting content from 1 URIs
|
|
59
|
+
[codag-drain-py] 1438 lines -> 8 templates (179.8x)
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
The dominant signal — 97% of the file being one repeating warning — is the
|
|
63
|
+
first thing the model sees instead of being buried. Numeric ranges and enum
|
|
64
|
+
values are preserved, so outliers and state distributions stay visible.
|
|
65
|
+
|
|
66
|
+
## Install
|
|
67
|
+
|
|
68
|
+
From source:
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
pip install -e .
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
From source with the build extra (PyInstaller):
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
pip install -e ".[build]"
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## Usage
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
echo 'worker latency 20ms
|
|
84
|
+
worker latency 20ms
|
|
85
|
+
worker latency 20ms
|
|
86
|
+
worker latency 8400ms' | codag-drain-py --stats
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
```
|
|
90
|
+
[x4] worker latency <*> [20..8400ms p50=20ms]
|
|
91
|
+
[codag-drain-py] 4 lines -> 1 templates (4.0x)
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
JSON output:
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
echo 'worker ready shard=1' | codag-drain-py --format json
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Choose a grouper:
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
cat logs.txt | codag-drain-py --grouper drain-stock
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
NDJSON input:
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
cat events.ndjson | codag-drain-py --json
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
Available groupers:
|
|
113
|
+
|
|
114
|
+
| name | description |
|
|
115
|
+
|------|-------------|
|
|
116
|
+
| `drain` (default) | Drain3 with codag's compact-line tokenizer fallback |
|
|
117
|
+
| `drain-stock` | Drain3 with vanilla whitespace tokenization |
|
|
118
|
+
| `drain-delimited` | Drain with extra punctuation delimiters folded into whitespace |
|
|
119
|
+
| `drain-fullsearch` | Drain similarity over all same-length clusters (no prefix-tree) |
|
|
120
|
+
| `statistical` | Non-Drain control: IDF-weighted anchor co-occurrence |
|
|
121
|
+
|
|
122
|
+
## Build a single-file binary
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
./scripts/build_binary.sh
|
|
126
|
+
./dist/codag-drain-py --help
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
PyInstaller bundles the Python interpreter and `drain3` into one file under
|
|
130
|
+
`dist/`. Build on each OS / architecture you intend to ship.
|
|
131
|
+
|
|
132
|
+
## Programmatic API
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
from codag_drain_py import LogLine, TemplaterConfig, template_logs
|
|
136
|
+
|
|
137
|
+
result = template_logs(
|
|
138
|
+
[LogLine(message="latency 20ms"), LogLine(message="latency 8400ms")]
|
|
139
|
+
)
|
|
140
|
+
print(result.render())
|
|
141
|
+
print(result.to_json(indent=2))
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
`TemplateIndex` exposes the streaming variant:
|
|
145
|
+
|
|
146
|
+
```python
|
|
147
|
+
from codag_drain_py import LogLine, TemplateIndex
|
|
148
|
+
|
|
149
|
+
idx = TemplateIndex()
|
|
150
|
+
for msg in some_iterator():
|
|
151
|
+
idx.push(LogLine(message=msg))
|
|
152
|
+
print(idx.templates().render())
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
## Tests
|
|
156
|
+
|
|
157
|
+
```bash
|
|
158
|
+
pip install -e ".[dev]"
|
|
159
|
+
pytest
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## Credits
|
|
163
|
+
|
|
164
|
+
- [Drain3](https://github.com/logpai/Drain3) — the underlying log template
|
|
165
|
+
miner from logpai. We use the published PyPI package directly.
|
|
166
|
+
- [codag-drain](https://github.com/codag-megalith/codag-drain) — the Rust
|
|
167
|
+
project this Python port is modeled on. The compact-line tokenizer fallback,
|
|
168
|
+
multi-member template derivation, slot profiling, and CLI surface all
|
|
169
|
+
follow that design.
|
|
170
|
+
- [Drain paper](http://jiemingzhu.github.io/pub/pjhe_icws2017.pdf) — He et al.,
|
|
171
|
+
"Drain: An Online Log Parsing Approach with Fixed Depth Tree", ICWS 2017.
|
|
172
|
+
|
|
173
|
+
## License
|
|
174
|
+
|
|
175
|
+
MIT. See [`LICENSE`](LICENSE).
|
|
176
|
+
|
|
177
|
+
## Layout
|
|
178
|
+
|
|
179
|
+
```
|
|
180
|
+
src/codag_drain_py/
|
|
181
|
+
__init__.py public exports
|
|
182
|
+
__main__.py `python -m codag_drain_py`
|
|
183
|
+
cli.py argparse + stdin pipeline
|
|
184
|
+
compress.py templater entry point + rendering
|
|
185
|
+
grouper.py Drain / DrainStock / DrainDelimited / FullSearch / Statistical
|
|
186
|
+
input.py heuristic line + NDJSON parsers
|
|
187
|
+
lex.py character-class tokenizer + lex template derivation
|
|
188
|
+
profile.py slot capture, numeric stats, distinct-value summaries
|
|
189
|
+
stream.py TemplateIndex streaming wrapper
|
|
190
|
+
template.py whitespace template derivation + capture regex
|
|
191
|
+
tests/
|
|
192
|
+
test_compress.py
|
|
193
|
+
test_input.py
|
|
194
|
+
scripts/
|
|
195
|
+
build_binary.sh PyInstaller --onefile build
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
## MCP server (use as a tool from Kiro / Claude / any MCP client)
|
|
200
|
+
|
|
201
|
+
The analyser ships with a built-in [Model Context Protocol](https://modelcontextprotocol.io)
|
|
202
|
+
server. Once registered with Kiro or Claude Desktop, your assistant can call it
|
|
203
|
+
as a tool to compress logs on demand without you piping anything through a
|
|
204
|
+
shell.
|
|
205
|
+
|
|
206
|
+
### What it exposes
|
|
207
|
+
|
|
208
|
+
Five tools, all served over stdio:
|
|
209
|
+
|
|
210
|
+
| tool | description |
|
|
211
|
+
|------|-------------|
|
|
212
|
+
| `analyse_logs` | Compress an inline log body. Returns templated artifact + summary. |
|
|
213
|
+
| `analyse_log_file` | Same but reads the body from a local file path. |
|
|
214
|
+
| `stream_push` | Append lines to a named streaming session. |
|
|
215
|
+
| `stream_project` | Render templates over the accumulated session. |
|
|
216
|
+
| `stream_reset` | Clear a session. |
|
|
217
|
+
|
|
218
|
+
Each tool accepts the full set of analyser options: `grouper`, `sample_cap`,
|
|
219
|
+
`template_clip`, `body_format`, `output_format`.
|
|
220
|
+
|
|
221
|
+
### Build the MCP binary
|
|
222
|
+
|
|
223
|
+
```bash
|
|
224
|
+
./scripts/build_mcp_binary.sh
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
This produces a single self-contained binary at `dist/agentic-log-analyser-mcp`
|
|
228
|
+
(~22 MB). It bundles the Python interpreter, the analyser, `drain3`, and the
|
|
229
|
+
MCP SDK — no Python install required on the machine that runs it.
|
|
230
|
+
|
|
231
|
+
### Register with Kiro
|
|
232
|
+
|
|
233
|
+
Open Kiro's MCP config (Command Palette → "Open MCP Config" or edit
|
|
234
|
+
`.kiro/settings/mcp.json` in your workspace, or `~/.kiro/settings/mcp.json` for
|
|
235
|
+
user-wide). Add:
|
|
236
|
+
|
|
237
|
+
```json
|
|
238
|
+
{
|
|
239
|
+
"mcpServers": {
|
|
240
|
+
"agentic-log-analyser": {
|
|
241
|
+
"command": "/absolute/path/to/dist/agentic-log-analyser-mcp",
|
|
242
|
+
"args": [],
|
|
243
|
+
"disabled": false,
|
|
244
|
+
"autoApprove": ["analyse_logs", "analyse_log_file", "stream_project"]
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
There's a ready-to-paste example at `examples/mcp_config_kiro.json`. Reload the
|
|
251
|
+
MCP config from the MCP Server view in the Kiro feature panel.
|
|
252
|
+
|
|
253
|
+
### Register with Claude Desktop
|
|
254
|
+
|
|
255
|
+
Edit `~/Library/Application Support/Claude/claude_desktop_config.json`
|
|
256
|
+
(macOS) and merge in:
|
|
257
|
+
|
|
258
|
+
```json
|
|
259
|
+
{
|
|
260
|
+
"mcpServers": {
|
|
261
|
+
"agentic-log-analyser": {
|
|
262
|
+
"command": "/absolute/path/to/dist/agentic-log-analyser-mcp",
|
|
263
|
+
"args": []
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
Restart Claude Desktop. The tools will appear in the tools menu.
|
|
270
|
+
|
|
271
|
+
### Use it from a chat
|
|
272
|
+
|
|
273
|
+
In Kiro or Claude, just ask:
|
|
274
|
+
|
|
275
|
+
> "Compress this log file and tell me what stands out:
|
|
276
|
+
> `/Users/me/Desktop/logs/cloudtrail_event.txt`"
|
|
277
|
+
|
|
278
|
+
The assistant will pick up `analyse_log_file`, call it with the path, and
|
|
279
|
+
diagnose against the templated artifact instead of the raw bytes.
|
|
280
|
+
|
|
281
|
+
### Debug from the CLI
|
|
282
|
+
|
|
283
|
+
To run the server manually and tail its output:
|
|
284
|
+
|
|
285
|
+
```bash
|
|
286
|
+
./dist/agentic-log-analyser-mcp
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
It speaks JSON-RPC over stdio. The repo's `scripts/smoke_mcp_binary.py` shows a
|
|
290
|
+
real client roundtrip you can use as a reference.
|
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
# AgenticAILogAnalyser
|
|
2
|
+
|
|
3
|
+
Python port of [codag-drain](https://github.com/codag-megalith/codag-drain) that
|
|
4
|
+
uses the upstream Python [Drain3](https://github.com/logpai/Drain3) package as
|
|
5
|
+
its grouping engine. Same CLI surface, same output shape, same evidence-rich
|
|
6
|
+
artifact, packaged as a single binary you can drop into any environment.
|
|
7
|
+
|
|
8
|
+
The intended consumer is an AI agent that needs to read a large log window
|
|
9
|
+
under a fixed token budget. Instead of feeding the agent 1,400 raw lines, you
|
|
10
|
+
feed it 8 templates with slot statistics and a few raw examples per group.
|
|
11
|
+
|
|
12
|
+
## What it does
|
|
13
|
+
|
|
14
|
+
Takes a stream of log lines on stdin, groups near-duplicates with Drain3, and
|
|
15
|
+
emits one templated line per group with:
|
|
16
|
+
|
|
17
|
+
- the count of collapsed lines,
|
|
18
|
+
- a derived `<*>` template,
|
|
19
|
+
- per-slot stats (min / max / median for numeric slots, distinct values for
|
|
20
|
+
enums, an auto-detected unit like `ms` or `MB`),
|
|
21
|
+
- a few raw sample lines.
|
|
22
|
+
|
|
23
|
+
The intended consumer is an LLM agent that needs to read a large log window
|
|
24
|
+
under a fixed token budget.
|
|
25
|
+
|
|
26
|
+
## Real-world example
|
|
27
|
+
|
|
28
|
+
A 1,438-line Kiro IDE log compresses to 8 templates at ~180x compression:
|
|
29
|
+
|
|
30
|
+
```
|
|
31
|
+
[x1] [WebviewProcessMonitor] Service starting
|
|
32
|
+
[x4] update#setState <*> [idle,downloading,downloaded,ready]
|
|
33
|
+
[x14] [WebviewProcessMonitor] Tracking webview renderer: pid=<*>, origin=<*>, windowId=<*> [13773..87619 p50=87288.5]
|
|
34
|
+
[x1] update#setState checking for updates
|
|
35
|
+
[x14] Extension host with pid <*> exited with code: 0, signal: unknown. [13697..89755 p50=73921]
|
|
36
|
+
[x1395] No ptyHost heartbeat after 6 seconds
|
|
37
|
+
[x8] [WebviewProcessMonitor] Webview renderer process gone: pid=<*>
|
|
38
|
+
[x1] Extracting content from 1 URIs
|
|
39
|
+
[codag-drain-py] 1438 lines -> 8 templates (179.8x)
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
The dominant signal — 97% of the file being one repeating warning — is the
|
|
43
|
+
first thing the model sees instead of being buried. Numeric ranges and enum
|
|
44
|
+
values are preserved, so outliers and state distributions stay visible.
|
|
45
|
+
|
|
46
|
+
## Install
|
|
47
|
+
|
|
48
|
+
From source:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install -e .
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
From source with the build extra (PyInstaller):
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
pip install -e ".[build]"
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Usage
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
echo 'worker latency 20ms
|
|
64
|
+
worker latency 20ms
|
|
65
|
+
worker latency 20ms
|
|
66
|
+
worker latency 8400ms' | codag-drain-py --stats
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
```
|
|
70
|
+
[x4] worker latency <*> [20..8400ms p50=20ms]
|
|
71
|
+
[codag-drain-py] 4 lines -> 1 templates (4.0x)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
JSON output:
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
echo 'worker ready shard=1' | codag-drain-py --format json
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Choose a grouper:
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
cat logs.txt | codag-drain-py --grouper drain-stock
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
NDJSON input:
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
cat events.ndjson | codag-drain-py --json
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
Available groupers:
|
|
93
|
+
|
|
94
|
+
| name | description |
|
|
95
|
+
|------|-------------|
|
|
96
|
+
| `drain` (default) | Drain3 with codag's compact-line tokenizer fallback |
|
|
97
|
+
| `drain-stock` | Drain3 with vanilla whitespace tokenization |
|
|
98
|
+
| `drain-delimited` | Drain with extra punctuation delimiters folded into whitespace |
|
|
99
|
+
| `drain-fullsearch` | Drain similarity over all same-length clusters (no prefix-tree) |
|
|
100
|
+
| `statistical` | Non-Drain control: IDF-weighted anchor co-occurrence |
|
|
101
|
+
|
|
102
|
+
## Build a single-file binary
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
./scripts/build_binary.sh
|
|
106
|
+
./dist/codag-drain-py --help
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
PyInstaller bundles the Python interpreter and `drain3` into one file under
|
|
110
|
+
`dist/`. Build on each OS / architecture you intend to ship.
|
|
111
|
+
|
|
112
|
+
## Programmatic API
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
from codag_drain_py import LogLine, TemplaterConfig, template_logs
|
|
116
|
+
|
|
117
|
+
result = template_logs(
|
|
118
|
+
[LogLine(message="latency 20ms"), LogLine(message="latency 8400ms")]
|
|
119
|
+
)
|
|
120
|
+
print(result.render())
|
|
121
|
+
print(result.to_json(indent=2))
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
`TemplateIndex` exposes the streaming variant:
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
from codag_drain_py import LogLine, TemplateIndex
|
|
128
|
+
|
|
129
|
+
idx = TemplateIndex()
|
|
130
|
+
for msg in some_iterator():
|
|
131
|
+
idx.push(LogLine(message=msg))
|
|
132
|
+
print(idx.templates().render())
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
## Tests
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
pip install -e ".[dev]"
|
|
139
|
+
pytest
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
## Credits
|
|
143
|
+
|
|
144
|
+
- [Drain3](https://github.com/logpai/Drain3) — the underlying log template
|
|
145
|
+
miner from logpai. We use the published PyPI package directly.
|
|
146
|
+
- [codag-drain](https://github.com/codag-megalith/codag-drain) — the Rust
|
|
147
|
+
project this Python port is modeled on. The compact-line tokenizer fallback,
|
|
148
|
+
multi-member template derivation, slot profiling, and CLI surface all
|
|
149
|
+
follow that design.
|
|
150
|
+
- [Drain paper](http://jiemingzhu.github.io/pub/pjhe_icws2017.pdf) — He et al.,
|
|
151
|
+
"Drain: An Online Log Parsing Approach with Fixed Depth Tree", ICWS 2017.
|
|
152
|
+
|
|
153
|
+
## License
|
|
154
|
+
|
|
155
|
+
MIT. See [`LICENSE`](LICENSE).
|
|
156
|
+
|
|
157
|
+
## Layout
|
|
158
|
+
|
|
159
|
+
```
|
|
160
|
+
src/codag_drain_py/
|
|
161
|
+
__init__.py public exports
|
|
162
|
+
__main__.py `python -m codag_drain_py`
|
|
163
|
+
cli.py argparse + stdin pipeline
|
|
164
|
+
compress.py templater entry point + rendering
|
|
165
|
+
grouper.py Drain / DrainStock / DrainDelimited / FullSearch / Statistical
|
|
166
|
+
input.py heuristic line + NDJSON parsers
|
|
167
|
+
lex.py character-class tokenizer + lex template derivation
|
|
168
|
+
profile.py slot capture, numeric stats, distinct-value summaries
|
|
169
|
+
stream.py TemplateIndex streaming wrapper
|
|
170
|
+
template.py whitespace template derivation + capture regex
|
|
171
|
+
tests/
|
|
172
|
+
test_compress.py
|
|
173
|
+
test_input.py
|
|
174
|
+
scripts/
|
|
175
|
+
build_binary.sh PyInstaller --onefile build
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
## MCP server (use as a tool from Kiro / Claude / any MCP client)
|
|
180
|
+
|
|
181
|
+
The analyser ships with a built-in [Model Context Protocol](https://modelcontextprotocol.io)
|
|
182
|
+
server. Once registered with Kiro or Claude Desktop, your assistant can call it
|
|
183
|
+
as a tool to compress logs on demand without you piping anything through a
|
|
184
|
+
shell.
|
|
185
|
+
|
|
186
|
+
### What it exposes
|
|
187
|
+
|
|
188
|
+
Five tools, all served over stdio:
|
|
189
|
+
|
|
190
|
+
| tool | description |
|
|
191
|
+
|------|-------------|
|
|
192
|
+
| `analyse_logs` | Compress an inline log body. Returns templated artifact + summary. |
|
|
193
|
+
| `analyse_log_file` | Same but reads the body from a local file path. |
|
|
194
|
+
| `stream_push` | Append lines to a named streaming session. |
|
|
195
|
+
| `stream_project` | Render templates over the accumulated session. |
|
|
196
|
+
| `stream_reset` | Clear a session. |
|
|
197
|
+
|
|
198
|
+
Each tool accepts the full set of analyser options: `grouper`, `sample_cap`,
|
|
199
|
+
`template_clip`, `body_format`, `output_format`.
|
|
200
|
+
|
|
201
|
+
### Build the MCP binary
|
|
202
|
+
|
|
203
|
+
```bash
|
|
204
|
+
./scripts/build_mcp_binary.sh
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
This produces a single self-contained binary at `dist/agentic-log-analyser-mcp`
|
|
208
|
+
(~22 MB). It bundles the Python interpreter, the analyser, `drain3`, and the
|
|
209
|
+
MCP SDK — no Python install required on the machine that runs it.
|
|
210
|
+
|
|
211
|
+
### Register with Kiro
|
|
212
|
+
|
|
213
|
+
Open Kiro's MCP config (Command Palette → "Open MCP Config" or edit
|
|
214
|
+
`.kiro/settings/mcp.json` in your workspace, or `~/.kiro/settings/mcp.json` for
|
|
215
|
+
user-wide). Add:
|
|
216
|
+
|
|
217
|
+
```json
|
|
218
|
+
{
|
|
219
|
+
"mcpServers": {
|
|
220
|
+
"agentic-log-analyser": {
|
|
221
|
+
"command": "/absolute/path/to/dist/agentic-log-analyser-mcp",
|
|
222
|
+
"args": [],
|
|
223
|
+
"disabled": false,
|
|
224
|
+
"autoApprove": ["analyse_logs", "analyse_log_file", "stream_project"]
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
There's a ready-to-paste example at `examples/mcp_config_kiro.json`. Reload the
|
|
231
|
+
MCP config from the MCP Server view in the Kiro feature panel.
|
|
232
|
+
|
|
233
|
+
### Register with Claude Desktop
|
|
234
|
+
|
|
235
|
+
Edit `~/Library/Application Support/Claude/claude_desktop_config.json`
|
|
236
|
+
(macOS) and merge in:
|
|
237
|
+
|
|
238
|
+
```json
|
|
239
|
+
{
|
|
240
|
+
"mcpServers": {
|
|
241
|
+
"agentic-log-analyser": {
|
|
242
|
+
"command": "/absolute/path/to/dist/agentic-log-analyser-mcp",
|
|
243
|
+
"args": []
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
Restart Claude Desktop. The tools will appear in the tools menu.
|
|
250
|
+
|
|
251
|
+
### Use it from a chat
|
|
252
|
+
|
|
253
|
+
In Kiro or Claude, just ask:
|
|
254
|
+
|
|
255
|
+
> "Compress this log file and tell me what stands out:
|
|
256
|
+
> `/Users/me/Desktop/logs/cloudtrail_event.txt`"
|
|
257
|
+
|
|
258
|
+
The assistant will pick up `analyse_log_file`, call it with the path, and
|
|
259
|
+
diagnose against the templated artifact instead of the raw bytes.
|
|
260
|
+
|
|
261
|
+
### Debug from the CLI
|
|
262
|
+
|
|
263
|
+
To run the server manually and tail its output:
|
|
264
|
+
|
|
265
|
+
```bash
|
|
266
|
+
./dist/agentic-log-analyser-mcp
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
It speaks JSON-RPC over stdio. The repo's `scripts/smoke_mcp_binary.py` shows a
|
|
270
|
+
real client roundtrip you can use as a reference.
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "agentic-log-analyser"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Deterministic log templating on top of Drain3, packaged as an artifact for AI agents."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.9"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [{ name = "agentic-log-analyser" }]
|
|
13
|
+
dependencies = [
|
|
14
|
+
"drain3>=0.9.11",
|
|
15
|
+
"mcp>=1.2",
|
|
16
|
+
"boto3>=1.34",
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
[project.optional-dependencies]
|
|
20
|
+
build = ["pyinstaller>=6.0"]
|
|
21
|
+
dev = ["pytest>=7"]
|
|
22
|
+
mcp = ["mcp[cli]>=1.2"]
|
|
23
|
+
|
|
24
|
+
[project.scripts]
|
|
25
|
+
agentic-log-analyser = "agentic_log_analyser.cli:main"
|
|
26
|
+
agentic-log-analyser-mcp = "agentic_log_analyser.mcp_server:main"
|
|
27
|
+
|
|
28
|
+
[tool.setuptools.packages.find]
|
|
29
|
+
where = ["src"]
|
|
30
|
+
|
|
31
|
+
[tool.setuptools.package-dir]
|
|
32
|
+
"" = "src"
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""agentic-log-analyser: deterministic log templating on top of Drain3.
|
|
2
|
+
|
|
3
|
+
This package compresses streaming log windows into a small set of template
|
|
4
|
+
groups with raw samples and per-slot summaries, intended as an artifact for
|
|
5
|
+
AI agents that need to reason about logs under a fixed token budget.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .compress import (
|
|
9
|
+
GrouperKind,
|
|
10
|
+
LogLine,
|
|
11
|
+
SlotSummary,
|
|
12
|
+
TemplateGroup,
|
|
13
|
+
TemplateResult,
|
|
14
|
+
TemplateSample,
|
|
15
|
+
TemplaterConfig,
|
|
16
|
+
template_groups,
|
|
17
|
+
template_logs,
|
|
18
|
+
)
|
|
19
|
+
from .input import BodyFormat, parse_body, parse_json_line, parse_line
|
|
20
|
+
from .stream import TemplateIndex
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"BodyFormat",
|
|
24
|
+
"GrouperKind",
|
|
25
|
+
"LogLine",
|
|
26
|
+
"SlotSummary",
|
|
27
|
+
"TemplateGroup",
|
|
28
|
+
"TemplateIndex",
|
|
29
|
+
"TemplateResult",
|
|
30
|
+
"TemplateSample",
|
|
31
|
+
"TemplaterConfig",
|
|
32
|
+
"parse_body",
|
|
33
|
+
"parse_json_line",
|
|
34
|
+
"parse_line",
|
|
35
|
+
"template_groups",
|
|
36
|
+
"template_logs",
|
|
37
|
+
]
|