logslop 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,19 @@
1
+ Metadata-Version: 2.4
2
+ Name: logslop
3
+ Version: 0.1.0
4
+ Summary: Log deduplication: strip repeated lines so readers and agents can focus on key events.
5
+ Author: Advanced Micro Devices, Inc.
6
+ License: MIT
7
+ Project-URL: homepage, https://github.com/amd/logslop
8
+ Project-URL: repository, https://github.com/amd/logslop
9
+ Project-URL: issues, https://github.com/amd/logslop/issues
10
+ Requires-Python: >=3.8
11
+ Description-Content-Type: text/markdown
12
+ License-File: LICENSE
13
+ Dynamic: license-file
14
+
15
+ # LogsLop - Log Deduplication Tool
16
+
17
+ LogsLop is a log summarization tool that removes repeated messages from log files, enabling readers and agents to home in quickly on key message events.
18
+
19
+ In progress for public open source release.
@@ -0,0 +1,7 @@
1
+ logslop.py,sha256=MGwYyXf7F2vjtk86bpuWDmN96G2eQNpCeY8DiS5hK-w,4358
2
+ logslop-0.1.0.dist-info/licenses/LICENSE,sha256=UziuL9rXkIgS-RiFkwTp8KnFqQlyydMVYsE2nD8357k,1072
3
+ logslop-0.1.0.dist-info/METADATA,sha256=pQFNlwot06Iz1lbemDe6EIt7LGM7OticoLC33NG8nIg,708
4
+ logslop-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
5
+ logslop-0.1.0.dist-info/entry_points.txt,sha256=fFIZM5ziy26gBOVP_-ooNAbO9FVoG4VUKCzrYqAjCeg,41
6
+ logslop-0.1.0.dist-info/top_level.txt,sha256=UaZeEUOkXk--fH-0XKPKpolfyHeG_S3Di0-qa5XDons,8
7
+ logslop-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ logslop = logslop:main
@@ -0,0 +1,7 @@
1
+ Copyright (c) 2026 Advanced Micro Devices, Inc.
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1 @@
1
+ logslop
logslop.py ADDED
@@ -0,0 +1,102 @@
1
+ #!/usr/bin/env python3
2
+ # Copyright (C) 2025 Advanced Micro Devices, Inc.
3
+ # Use of this source code is governed by an MIT-style license that can be
4
+ # found in the LICENSE file or at https://opensource.org/licenses/MIT.
5
+ """LogsLop Standalone - Log Deduplication Script
6
+
7
+ Large log files are often full of repeated or near-duplicate lines.
8
+ LogsLop strips that redundancy, so you get one representative per message type.
9
+ Particularly useful when logs are too big to send or process as-is (e.g. into LLM context windows).
10
+
11
+ Usage:
12
+ python3 logslop.py (reads stdin)
13
+ your-command 2>&1 | python3 logslop.py (pipes stderr to stdout and both through logslop)
14
+
15
+ Examples:
16
+ journalctl --no-pager | python3 logslop.py
17
+ python3 logslop.py < your_log.txt
18
+
19
+ To add to PATH: copy to e.g. ~/.local/bin, then export PATH="$HOME/.local/bin:$PATH" (add to ~/.bashrc to persist).
20
+
21
+ Options (argparse, all optional):
22
+ -n 5000 max clusters to track
23
+ -t 0.6 Jaccard similarity threshold (0-1, higher = more aggressive deduping)
24
+ --no-normalize-digits disable digit/hex normalization (rare)
25
+ """
26
+
27
+ import argparse
28
+ import re
29
+ import sys
30
+
31
+
32
+ def tokenize(line: str, normalize_digits: bool) -> list[str]:
33
+ """Tokenize with optional digit and hex normalization."""
34
+ placeholder = '\u0001'
35
+ if normalize_digits:
36
+ line = re.sub(r'\d+', placeholder, line)
37
+ word_chars = r'a-zA-Z' + (placeholder if normalize_digits else '0-9') + r'_'
38
+ tokens = re.findall(rf'[{word_chars}]+|[^\w\s]', line)
39
+ if normalize_digits:
40
+ hex_pattern = re.compile(r'^[' + placeholder + r'a-fA-FxX]+$')
41
+ tokens = [placeholder if placeholder in t and hex_pattern.match(t) else t for t in tokens]
42
+ return [t for t in tokens if t]
43
+
44
+
45
+ def jaccard(a: list[str], b: list[str]) -> float:
46
+ """Token-counting Jaccard similarity."""
47
+ sa, sb = set(a), set(b)
48
+ inter = len(sa & sb)
49
+ union = len(sa | sb)
50
+ return inter / union if union else 1.0
51
+
52
+
53
+ def process(lines, *, max_clusters=5000, threshold=0.6, normalize_digits=True):
54
+ """Yield lines that pass through (first occurrence of each cluster).
55
+ Emits `` ...N...M`` markers (1-based omitted line ranges) before the next kept line; same at EOF for trailing skips."""
56
+ clusters = [] # [(exemplar_tokens, line_text), ...] most recent first; exemplar fixed, not updated on match
57
+ skipped = 0
58
+ lineno = 0
59
+ for line in lines:
60
+ lineno += 1
61
+ line = line.rstrip('\n\r')
62
+ tokens = tokenize(line, normalize_digits)
63
+ matched = False
64
+ for i, (ex_tokens, ex_line) in enumerate(clusters):
65
+ if jaccard(tokens, ex_tokens) >= threshold:
66
+ matched = True
67
+ clusters.pop(i)
68
+ clusters.insert(0, (ex_tokens, ex_line)) # Keep exemplar; replacing causes drift
69
+ break
70
+ if not matched:
71
+ if skipped > 0:
72
+ skip_start = lineno - skipped
73
+ skip_end = lineno - 1
74
+ yield f" ...{skip_start}...{skip_end}"
75
+ yield line
76
+ skipped = 0
77
+ if max_clusters and len(clusters) >= max_clusters:
78
+ clusters.pop()
79
+ clusters.insert(0, (tokens, line))
80
+ else:
81
+ skipped += 1
82
+ if skipped > 0:
83
+ skip_start = lineno - skipped + 1
84
+ skip_end = lineno
85
+ yield f" ...{skip_start}...{skip_end}"
86
+
87
+
88
+ def main():
89
+ p = argparse.ArgumentParser(description='Reduce log redundancy: print first occurrence of each pattern.')
90
+ p.add_argument('-n', '--max-clusters', type=int, default=5000, metavar='N', help='Max clusters to track (default: 5000)')
91
+ p.add_argument('-t', '--threshold', type=float, default=0.6, help='Jaccard threshold for match (default: 0.6)')
92
+ p.add_argument('--no-normalize-digits', action='store_true', help='Disable digit normalization')
93
+ args = p.parse_args()
94
+ c0, c1 = ("\033[36m", "\033[0m") if sys.stdout.isatty() else ("", "")
95
+ print(f"{c0}# LogsLop: near-duplicate input lines removed.{c1}", flush=True)
96
+ print(f"{c0}# ...N...M = omitted lines N through M in the original input (1-based).{c1}", flush=True)
97
+ for line in process(sys.stdin, max_clusters=args.max_clusters, threshold=args.threshold, normalize_digits=not args.no_normalize_digits):
98
+ print(line, flush=True)
99
+
100
+
101
+ if __name__ == '__main__':
102
+ main()