nm-tool-forge 0.2.3__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/PKG-INFO +53 -17
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/README.md +98 -62
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/pyproject.toml +1 -1
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/src/csvchunking/__init__.py +1 -1
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/src/csvchunking/chunker.py +6 -6
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/src/csvchunking/cli.py +9 -9
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/src/loganalysis/__init__.py +1 -1
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/src/nm_tool_forge.egg-info/PKG-INFO +53 -17
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/tests/test_csvchunking.py +5 -5
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/LICENSE +0 -0
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/setup.cfg +0 -0
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/src/csvchunking/__main__.py +0 -0
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/src/loganalysis/__main__.py +0 -0
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/src/loganalysis/analysis.py +0 -0
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/src/loganalysis/cli.py +0 -0
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/src/loganalysis/constants.py +0 -0
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/src/loganalysis/converters.py +0 -0
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/src/loganalysis/csv_export.py +0 -0
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/src/loganalysis/encoding.py +0 -0
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/src/loganalysis/filesystem.py +0 -0
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/src/loganalysis/models.py +0 -0
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/src/loganalysis/normalization.py +0 -0
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/src/loganalysis/parsing.py +0 -0
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/src/loganalysis/report_html.py +0 -0
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/src/loganalysis/report_markdown.py +0 -0
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/src/loganalysis/report_models.py +0 -0
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/src/loganalysis/report_pdf.py +0 -0
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/src/loganalysis/selftest.py +0 -0
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/src/nm_tool_forge.egg-info/SOURCES.txt +0 -0
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/src/nm_tool_forge.egg-info/dependency_links.txt +0 -0
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/src/nm_tool_forge.egg-info/entry_points.txt +0 -0
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/src/nm_tool_forge.egg-info/requires.txt +0 -0
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/src/nm_tool_forge.egg-info/top_level.txt +0 -0
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/tests/test_analysis.py +0 -0
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/tests/test_normalization.py +0 -0
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/tests/test_parsing.py +0 -0
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/tests/test_report_html.py +0 -0
- {nm_tool_forge-0.2.3 → nm_tool_forge-0.2.4}/tests/test_report_markdown.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nm-tool-forge
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: Analyze MigMan log files and generate aggregated CSV, Markdown, HTML, and optional PDF reports.
|
|
5
5
|
Author-email: Stefan Ewald <s.ew@outlook.de>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -31,7 +31,7 @@ Dynamic: license-file
|
|
|
31
31
|
|
|
32
32
|
# nm-tool-forge
|
|
33
33
|
|
|
34
|
-
`nm-tool-forge` analyzes MigMan text log files with severity tokens such as `INFO`, `ERROR`, and `WARNING` and generates aggregated CSV, Markdown, HTML, and optional PDF reports.
|
|
34
|
+
`nm-tool-forge` analyzes MigMan text log files with severity tokens such as `INFO`, `ERROR`, and `WARNING` and generates aggregated CSV, Markdown, HTML, and optional PDF reports. The package also includes `csvchunking`, a small helper for splitting large CSV files into migration-friendly chunks.
|
|
35
35
|
|
|
36
36
|
The project uses a package-ready `src` layout. The legacy `log_analysis.py` file remains available as a thin compatibility entry point for older local setups.
|
|
37
37
|
|
|
@@ -43,6 +43,7 @@ The project uses a package-ready `src` layout. The legacy `log_analysis.py` file
|
|
|
43
43
|
- Generate Markdown summary reports
|
|
44
44
|
- Optionally convert reports to HTML and PDF
|
|
45
45
|
- Keep a backup copy of analyzed log files
|
|
46
|
+
- Split large CSV files into numbered chunks while preserving the header row
|
|
46
47
|
- Run built-in self-tests from the CLI
|
|
47
48
|
|
|
48
49
|
## Installation
|
|
@@ -61,12 +62,14 @@ python -m pip install .[pdf,dev]
|
|
|
61
62
|
|
|
62
63
|
## Command-line usage
|
|
63
64
|
|
|
64
|
-
After installation,
|
|
65
|
+
After installation, the CLI entry points are available:
|
|
65
66
|
|
|
66
67
|
```powershell
|
|
67
68
|
python -m loganalysis --help
|
|
69
|
+
python -m csvchunking --help
|
|
68
70
|
loganalysis --help
|
|
69
71
|
nm-tool-forge --help
|
|
72
|
+
csvchunking --help
|
|
70
73
|
```
|
|
71
74
|
|
|
72
75
|
Typical analysis run:
|
|
@@ -89,29 +92,30 @@ python -m loganalysis --self-test
|
|
|
89
92
|
|
|
90
93
|
Legacy compatibility call:
|
|
91
94
|
|
|
95
|
+
```powershell
|
|
96
|
+
python .\log_analysis.py --convert
|
|
97
|
+
```
|
|
92
98
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
To publish a new release, always test on TestPyPI first, then upload to PyPI only after successful Conda/Smoke-Tests:
|
|
96
|
-
|
|
97
|
-
```bash
|
|
98
|
-
export TWINE_USERNAME="__token__"
|
|
99
|
-
export TWINE_PASSWORD="pypi-..."
|
|
99
|
+
CSV chunking run:
|
|
100
100
|
|
|
101
|
-
|
|
102
|
-
|
|
101
|
+
```powershell
|
|
102
|
+
csvchunking "data\large_export.csv" --chunk-size 5000
|
|
103
103
|
```
|
|
104
104
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
105
|
+
The command creates an output directory next to the input file named after the CSV stem. For example, `data\large_export.csv` is split into files such as `data\large_export\large_export_01.csv`, `data\large_export\large_export_02.csv`, and so on.
|
|
106
|
+
|
|
107
|
+
CSV chunking with an explicit encoding:
|
|
108
108
|
|
|
109
109
|
```powershell
|
|
110
|
-
python
|
|
110
|
+
python -m csvchunking "data\large_export.csv" --chunk-size 5000 --encoding utf-8-sig
|
|
111
111
|
```
|
|
112
112
|
|
|
113
|
+
Each chunk contains the original header row plus up to `--chunk-size` data rows. The delimiter is detected automatically; if detection fails, semicolon-separated CSV is used.
|
|
114
|
+
|
|
113
115
|
## Supported CLI options
|
|
114
116
|
|
|
117
|
+
Log analysis options:
|
|
118
|
+
|
|
115
119
|
- `--logs-dir`
|
|
116
120
|
- `--out-dir`
|
|
117
121
|
- `--backup-dir`
|
|
@@ -119,6 +123,28 @@ python .\log_analysis.py --convert
|
|
|
119
123
|
- `--convert`
|
|
120
124
|
- `--self-test`
|
|
121
125
|
|
|
126
|
+
CSV chunking options:
|
|
127
|
+
|
|
128
|
+
- `input_file` - path to the CSV file to split
|
|
129
|
+
- `--chunk-size` - required number of data rows per output file; must be greater than zero
|
|
130
|
+
- `--encoding` - input and output encoding; defaults to `utf-8-sig`
|
|
131
|
+
|
|
132
|
+
## Release process
|
|
133
|
+
|
|
134
|
+
To publish a new release, always test on TestPyPI first, then upload to PyPI only after successful Conda smoke tests:
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
export TWINE_USERNAME="__token__"
|
|
138
|
+
export TWINE_PASSWORD="pypi-..."
|
|
139
|
+
|
|
140
|
+
bash scripts/release_testpypi.sh --bump patch
|
|
141
|
+
bash scripts/release_pypi.sh --yes
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
**Notes:**
|
|
145
|
+
- Run and verify the TestPyPI release first, then upload the final package to PyPI.
|
|
146
|
+
- PyPI versions cannot be overwritten or reused.
|
|
147
|
+
|
|
122
148
|
## Library usage
|
|
123
149
|
|
|
124
150
|
```python
|
|
@@ -130,6 +156,7 @@ from loganalysis import (
|
|
|
130
156
|
iter_logical_entries,
|
|
131
157
|
normalize_message,
|
|
132
158
|
)
|
|
159
|
+
from csvchunking import split_csv
|
|
133
160
|
|
|
134
161
|
result = analyze_file(Path("logs/app.txt"))
|
|
135
162
|
print(result["norm_counts"])
|
|
@@ -146,14 +173,21 @@ convert_report_md_to_html_pdf(
|
|
|
146
173
|
Path("log_analyse_out/report.html"),
|
|
147
174
|
Path("log_analyse_out/report.pdf"),
|
|
148
175
|
)
|
|
176
|
+
|
|
177
|
+
chunk_result = split_csv(Path("data/large_export.csv"), chunk_size=5000)
|
|
178
|
+
print(chunk_result.output_dir)
|
|
179
|
+
print(chunk_result.output_files)
|
|
149
180
|
```
|
|
150
181
|
|
|
182
|
+
`split_csv()` returns a `ChunkResult` with the input file, output directory, chunk size, processed data-row count, created file count, and generated output file paths.
|
|
183
|
+
|
|
151
184
|
## Project structure
|
|
152
185
|
|
|
153
186
|
```text
|
|
154
187
|
.
|
|
155
188
|
├─ pyproject.toml
|
|
156
189
|
├─ src/loganalysis/
|
|
190
|
+
├─ src/csvchunking/
|
|
157
191
|
├─ tests/
|
|
158
192
|
├─ docs/
|
|
159
193
|
└─ log_analysis.py
|
|
@@ -168,7 +202,9 @@ Important modules:
|
|
|
168
202
|
- `report_html.py` - HTML/CSS rendering
|
|
169
203
|
- `report_pdf.py` - PDF engine selection and fallback handling
|
|
170
204
|
- `converters.py` - Markdown-to-HTML/PDF conversion
|
|
171
|
-
- `cli.py` - command-line entry point
|
|
205
|
+
- `loganalysis/cli.py` - log analysis command-line entry point
|
|
206
|
+
- `csvchunking/chunker.py` - CSV splitting logic and `ChunkResult`
|
|
207
|
+
- `csvchunking/cli.py` - CSV chunking command-line entry point
|
|
172
208
|
|
|
173
209
|
## HTML/PDF conversion
|
|
174
210
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# nm-tool-forge
|
|
2
2
|
|
|
3
|
-
`nm-tool-forge` analyzes MigMan text log files with severity tokens such as `INFO`, `ERROR`, and `WARNING` and generates aggregated CSV, Markdown, HTML, and optional PDF reports.
|
|
3
|
+
`nm-tool-forge` analyzes MigMan text log files with severity tokens such as `INFO`, `ERROR`, and `WARNING` and generates aggregated CSV, Markdown, HTML, and optional PDF reports. The package also includes `csvchunking`, a small helper for splitting large CSV files into migration-friendly chunks.
|
|
4
4
|
|
|
5
5
|
The project uses a package-ready `src` layout. The legacy `log_analysis.py` file remains available as a thin compatibility entry point for older local setups.
|
|
6
6
|
|
|
@@ -9,10 +9,11 @@ The project uses a package-ready `src` layout. The legacy `log_analysis.py` file
|
|
|
9
9
|
- Parse logical log entries from multi-line text logs
|
|
10
10
|
- Normalize recurring error patterns for better aggregation
|
|
11
11
|
- Generate aggregated CSV reports
|
|
12
|
-
- Generate Markdown summary reports
|
|
13
|
-
- Optionally convert reports to HTML and PDF
|
|
14
|
-
- Keep a backup copy of analyzed log files
|
|
15
|
-
-
|
|
12
|
+
- Generate Markdown summary reports
|
|
13
|
+
- Optionally convert reports to HTML and PDF
|
|
14
|
+
- Keep a backup copy of analyzed log files
|
|
15
|
+
- Split large CSV files into numbered chunks while preserving the header row
|
|
16
|
+
- Run built-in self-tests from the CLI
|
|
16
17
|
|
|
17
18
|
## Installation
|
|
18
19
|
|
|
@@ -30,13 +31,15 @@ python -m pip install .[pdf,dev]
|
|
|
30
31
|
|
|
31
32
|
## Command-line usage
|
|
32
33
|
|
|
33
|
-
After installation,
|
|
34
|
-
|
|
35
|
-
```powershell
|
|
36
|
-
python -m loganalysis --help
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
34
|
+
After installation, the CLI entry points are available:
|
|
35
|
+
|
|
36
|
+
```powershell
|
|
37
|
+
python -m loganalysis --help
|
|
38
|
+
python -m csvchunking --help
|
|
39
|
+
loganalysis --help
|
|
40
|
+
nm-tool-forge --help
|
|
41
|
+
csvchunking --help
|
|
42
|
+
```
|
|
40
43
|
|
|
41
44
|
Typical analysis run:
|
|
42
45
|
|
|
@@ -50,18 +53,54 @@ Analysis with HTML/PDF conversion:
|
|
|
50
53
|
nm-tool-forge --logs-dir logs --out-dir log_analyse_out --convert
|
|
51
54
|
```
|
|
52
55
|
|
|
53
|
-
Self-test mode:
|
|
54
|
-
|
|
55
|
-
```powershell
|
|
56
|
-
python -m loganalysis --self-test
|
|
57
|
-
```
|
|
58
|
-
|
|
59
|
-
Legacy compatibility call:
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
56
|
+
Self-test mode:
|
|
57
|
+
|
|
58
|
+
```powershell
|
|
59
|
+
python -m loganalysis --self-test
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Legacy compatibility call:
|
|
63
|
+
|
|
64
|
+
```powershell
|
|
65
|
+
python .\log_analysis.py --convert
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
CSV chunking run:
|
|
69
|
+
|
|
70
|
+
```powershell
|
|
71
|
+
csvchunking "data\large_export.csv" --chunk-size 5000
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
The command creates an output directory next to the input file named after the CSV stem. For example, `data\large_export.csv` is split into files such as `data\large_export\large_export_01.csv`, `data\large_export\large_export_02.csv`, and so on.
|
|
75
|
+
|
|
76
|
+
CSV chunking with an explicit encoding:
|
|
77
|
+
|
|
78
|
+
```powershell
|
|
79
|
+
python -m csvchunking "data\large_export.csv" --chunk-size 5000 --encoding utf-8-sig
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Each chunk contains the original header row plus up to `--chunk-size` data rows. The delimiter is detected automatically; if detection fails, semicolon-separated CSV is used.
|
|
83
|
+
|
|
84
|
+
## Supported CLI options
|
|
85
|
+
|
|
86
|
+
Log analysis options:
|
|
87
|
+
|
|
88
|
+
- `--logs-dir`
|
|
89
|
+
- `--out-dir`
|
|
90
|
+
- `--backup-dir`
|
|
91
|
+
- `--top-examples`
|
|
92
|
+
- `--convert`
|
|
93
|
+
- `--self-test`
|
|
94
|
+
|
|
95
|
+
CSV chunking options:
|
|
96
|
+
|
|
97
|
+
- `input_file` - path to the CSV file to split
|
|
98
|
+
- `--chunk-size` - required number of data rows per output file; must be greater than zero
|
|
99
|
+
- `--encoding` - input and output encoding; defaults to `utf-8-sig`
|
|
100
|
+
|
|
101
|
+
## Release process
|
|
102
|
+
|
|
103
|
+
To publish a new release, always test on TestPyPI first, then upload to PyPI only after successful Conda smoke tests:
|
|
65
104
|
|
|
66
105
|
```bash
|
|
67
106
|
export TWINE_USERNAME="__token__"
|
|
@@ -71,37 +110,25 @@ bash scripts/release_testpypi.sh --bump patch
|
|
|
71
110
|
bash scripts/release_pypi.sh --yes
|
|
72
111
|
```
|
|
73
112
|
|
|
74
|
-
**
|
|
75
|
-
-
|
|
76
|
-
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
```
|
|
81
|
-
|
|
82
|
-
## Supported CLI options
|
|
83
|
-
|
|
84
|
-
- `--logs-dir`
|
|
85
|
-
- `--out-dir`
|
|
86
|
-
- `--backup-dir`
|
|
87
|
-
- `--top-examples`
|
|
88
|
-
- `--convert`
|
|
89
|
-
- `--self-test`
|
|
90
|
-
|
|
91
|
-
## Library usage
|
|
92
|
-
|
|
93
|
-
```python
|
|
113
|
+
**Notes:**
|
|
114
|
+
- Run and verify the TestPyPI release first, then upload the final package to PyPI.
|
|
115
|
+
- PyPI versions cannot be overwritten or reused.
|
|
116
|
+
|
|
117
|
+
## Library usage
|
|
118
|
+
|
|
119
|
+
```python
|
|
94
120
|
from pathlib import Path
|
|
95
121
|
|
|
96
122
|
from loganalysis import (
|
|
97
123
|
analyze_file,
|
|
98
124
|
convert_report_md_to_html_pdf,
|
|
99
|
-
iter_logical_entries,
|
|
100
|
-
normalize_message,
|
|
101
|
-
)
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
125
|
+
iter_logical_entries,
|
|
126
|
+
normalize_message,
|
|
127
|
+
)
|
|
128
|
+
from csvchunking import split_csv
|
|
129
|
+
|
|
130
|
+
result = analyze_file(Path("logs/app.txt"))
|
|
131
|
+
print(result["norm_counts"])
|
|
105
132
|
|
|
106
133
|
print(normalize_message(
|
|
107
134
|
'Conversion: X =3100110. 138 The record was not found in table "Teile".'
|
|
@@ -112,20 +139,27 @@ for entry in iter_logical_entries(Path("logs/app.txt")):
|
|
|
112
139
|
|
|
113
140
|
convert_report_md_to_html_pdf(
|
|
114
141
|
Path("log_analyse_out/report.md"),
|
|
115
|
-
Path("log_analyse_out/report.html"),
|
|
116
|
-
Path("log_analyse_out/report.pdf"),
|
|
117
|
-
)
|
|
118
|
-
|
|
142
|
+
Path("log_analyse_out/report.html"),
|
|
143
|
+
Path("log_analyse_out/report.pdf"),
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
chunk_result = split_csv(Path("data/large_export.csv"), chunk_size=5000)
|
|
147
|
+
print(chunk_result.output_dir)
|
|
148
|
+
print(chunk_result.output_files)
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
`split_csv()` returns a `ChunkResult` with the input file, output directory, chunk size, processed data-row count, created file count, and generated output file paths.
|
|
119
152
|
|
|
120
153
|
## Project structure
|
|
121
154
|
|
|
122
155
|
```text
|
|
123
156
|
.
|
|
124
|
-
├─ pyproject.toml
|
|
125
|
-
├─ src/loganalysis/
|
|
126
|
-
├─
|
|
127
|
-
├─
|
|
128
|
-
|
|
157
|
+
├─ pyproject.toml
|
|
158
|
+
├─ src/loganalysis/
|
|
159
|
+
├─ src/csvchunking/
|
|
160
|
+
├─ tests/
|
|
161
|
+
├─ docs/
|
|
162
|
+
└─ log_analysis.py
|
|
129
163
|
```
|
|
130
164
|
|
|
131
165
|
Important modules:
|
|
@@ -135,9 +169,11 @@ Important modules:
|
|
|
135
169
|
- `normalization.py` - message normalization
|
|
136
170
|
- `report_markdown.py` - Markdown report model and rendering
|
|
137
171
|
- `report_html.py` - HTML/CSS rendering
|
|
138
|
-
- `report_pdf.py` - PDF engine selection and fallback handling
|
|
139
|
-
- `converters.py` - Markdown-to-HTML/PDF conversion
|
|
140
|
-
- `cli.py` - command-line entry point
|
|
172
|
+
- `report_pdf.py` - PDF engine selection and fallback handling
|
|
173
|
+
- `converters.py` - Markdown-to-HTML/PDF conversion
|
|
174
|
+
- `loganalysis/cli.py` - log analysis command-line entry point
|
|
175
|
+
- `csvchunking/chunker.py` - CSV splitting logic and `ChunkResult`
|
|
176
|
+
- `csvchunking/cli.py` - CSV chunking command-line entry point
|
|
141
177
|
|
|
142
178
|
## HTML/PDF conversion
|
|
143
179
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "nm-tool-forge"
|
|
7
|
-
version = "0.2.
|
|
7
|
+
version = "0.2.4"
|
|
8
8
|
description = "Analyze MigMan log files and generate aggregated CSV, Markdown, HTML, and optional PDF reports."
|
|
9
9
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -18,16 +18,16 @@ def split_csv(
|
|
|
18
18
|
chunk_size: int,
|
|
19
19
|
encoding: str = "utf-8-sig",
|
|
20
20
|
) -> ChunkResult:
|
|
21
|
-
if not Path(input_file).is_file():
|
|
22
|
-
raise FileNotFoundError(f"
|
|
23
|
-
if chunk_size <= 0:
|
|
24
|
-
raise ValueError("chunk_size
|
|
21
|
+
if not Path(input_file).is_file():
|
|
22
|
+
raise FileNotFoundError(f"Input file not found: {input_file}")
|
|
23
|
+
if chunk_size <= 0:
|
|
24
|
+
raise ValueError("chunk_size must be greater than 0")
|
|
25
25
|
|
|
26
26
|
input_file = Path(input_file)
|
|
27
27
|
output_dir = input_file.parent / input_file.stem
|
|
28
28
|
output_dir.mkdir(exist_ok=True)
|
|
29
29
|
|
|
30
|
-
#
|
|
30
|
+
# Detect the delimiter automatically.
|
|
31
31
|
with open(input_file, encoding=encoding, newline="") as f:
|
|
32
32
|
sample = f.read(4096)
|
|
33
33
|
f.seek(0)
|
|
@@ -41,7 +41,7 @@ def split_csv(
|
|
|
41
41
|
try:
|
|
42
42
|
header = next(reader)
|
|
43
43
|
except StopIteration as exc:
|
|
44
|
-
raise ValueError("
|
|
44
|
+
raise ValueError("Input file is empty.") from exc
|
|
45
45
|
chunk = []
|
|
46
46
|
file_count = 0
|
|
47
47
|
data_rows = 0
|
|
@@ -7,22 +7,22 @@ from .chunker import split_csv
|
|
|
7
7
|
|
|
8
8
|
def main() -> None:
|
|
9
9
|
parser = argparse.ArgumentParser(
|
|
10
|
-
description="
|
|
10
|
+
description="Split a large CSV file into smaller chunks with a header row.",
|
|
11
11
|
)
|
|
12
|
-
parser.add_argument("input_file", help="
|
|
12
|
+
parser.add_argument("input_file", help="Path to the CSV file")
|
|
13
13
|
parser.add_argument(
|
|
14
14
|
"--chunk-size",
|
|
15
15
|
type=int,
|
|
16
16
|
required=True,
|
|
17
|
-
help="
|
|
17
|
+
help="Number of data rows per output file; must be greater than 0",
|
|
18
18
|
)
|
|
19
|
-
parser.add_argument("--encoding", default="utf-8-sig", help="
|
|
19
|
+
parser.add_argument("--encoding", default="utf-8-sig", help="Input and output encoding (Default: utf-8-sig)")
|
|
20
20
|
args = parser.parse_args()
|
|
21
|
-
try:
|
|
22
|
-
result = split_csv(Path(args.input_file), args.chunk_size, encoding=args.encoding)
|
|
23
|
-
except Exception as e:
|
|
24
|
-
print(f"
|
|
25
|
-
sys.exit(1)
|
|
21
|
+
try:
|
|
22
|
+
result = split_csv(Path(args.input_file), args.chunk_size, encoding=args.encoding)
|
|
23
|
+
except Exception as e:
|
|
24
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
25
|
+
sys.exit(1)
|
|
26
26
|
print("CSV chunking completed.")
|
|
27
27
|
print(f"- Input: {result.input_file}")
|
|
28
28
|
print(f"- Output directory: {result.output_dir}")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nm-tool-forge
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: Analyze MigMan log files and generate aggregated CSV, Markdown, HTML, and optional PDF reports.
|
|
5
5
|
Author-email: Stefan Ewald <s.ew@outlook.de>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -31,7 +31,7 @@ Dynamic: license-file
|
|
|
31
31
|
|
|
32
32
|
# nm-tool-forge
|
|
33
33
|
|
|
34
|
-
`nm-tool-forge` analyzes MigMan text log files with severity tokens such as `INFO`, `ERROR`, and `WARNING` and generates aggregated CSV, Markdown, HTML, and optional PDF reports.
|
|
34
|
+
`nm-tool-forge` analyzes MigMan text log files with severity tokens such as `INFO`, `ERROR`, and `WARNING` and generates aggregated CSV, Markdown, HTML, and optional PDF reports. The package also includes `csvchunking`, a small helper for splitting large CSV files into migration-friendly chunks.
|
|
35
35
|
|
|
36
36
|
The project uses a package-ready `src` layout. The legacy `log_analysis.py` file remains available as a thin compatibility entry point for older local setups.
|
|
37
37
|
|
|
@@ -43,6 +43,7 @@ The project uses a package-ready `src` layout. The legacy `log_analysis.py` file
|
|
|
43
43
|
- Generate Markdown summary reports
|
|
44
44
|
- Optionally convert reports to HTML and PDF
|
|
45
45
|
- Keep a backup copy of analyzed log files
|
|
46
|
+
- Split large CSV files into numbered chunks while preserving the header row
|
|
46
47
|
- Run built-in self-tests from the CLI
|
|
47
48
|
|
|
48
49
|
## Installation
|
|
@@ -61,12 +62,14 @@ python -m pip install .[pdf,dev]
|
|
|
61
62
|
|
|
62
63
|
## Command-line usage
|
|
63
64
|
|
|
64
|
-
After installation,
|
|
65
|
+
After installation, the CLI entry points are available:
|
|
65
66
|
|
|
66
67
|
```powershell
|
|
67
68
|
python -m loganalysis --help
|
|
69
|
+
python -m csvchunking --help
|
|
68
70
|
loganalysis --help
|
|
69
71
|
nm-tool-forge --help
|
|
72
|
+
csvchunking --help
|
|
70
73
|
```
|
|
71
74
|
|
|
72
75
|
Typical analysis run:
|
|
@@ -89,29 +92,30 @@ python -m loganalysis --self-test
|
|
|
89
92
|
|
|
90
93
|
Legacy compatibility call:
|
|
91
94
|
|
|
95
|
+
```powershell
|
|
96
|
+
python .\log_analysis.py --convert
|
|
97
|
+
```
|
|
92
98
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
To publish a new release, always test on TestPyPI first, then upload to PyPI only after successful Conda/Smoke-Tests:
|
|
96
|
-
|
|
97
|
-
```bash
|
|
98
|
-
export TWINE_USERNAME="__token__"
|
|
99
|
-
export TWINE_PASSWORD="pypi-..."
|
|
99
|
+
CSV chunking run:
|
|
100
100
|
|
|
101
|
-
|
|
102
|
-
|
|
101
|
+
```powershell
|
|
102
|
+
csvchunking "data\large_export.csv" --chunk-size 5000
|
|
103
103
|
```
|
|
104
104
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
105
|
+
The command creates an output directory next to the input file named after the CSV stem. For example, `data\large_export.csv` is split into files such as `data\large_export\large_export_01.csv`, `data\large_export\large_export_02.csv`, and so on.
|
|
106
|
+
|
|
107
|
+
CSV chunking with an explicit encoding:
|
|
108
108
|
|
|
109
109
|
```powershell
|
|
110
|
-
python
|
|
110
|
+
python -m csvchunking "data\large_export.csv" --chunk-size 5000 --encoding utf-8-sig
|
|
111
111
|
```
|
|
112
112
|
|
|
113
|
+
Each chunk contains the original header row plus up to `--chunk-size` data rows. The delimiter is detected automatically; if detection fails, semicolon-separated CSV is used.
|
|
114
|
+
|
|
113
115
|
## Supported CLI options
|
|
114
116
|
|
|
117
|
+
Log analysis options:
|
|
118
|
+
|
|
115
119
|
- `--logs-dir`
|
|
116
120
|
- `--out-dir`
|
|
117
121
|
- `--backup-dir`
|
|
@@ -119,6 +123,28 @@ python .\log_analysis.py --convert
|
|
|
119
123
|
- `--convert`
|
|
120
124
|
- `--self-test`
|
|
121
125
|
|
|
126
|
+
CSV chunking options:
|
|
127
|
+
|
|
128
|
+
- `input_file` - path to the CSV file to split
|
|
129
|
+
- `--chunk-size` - required number of data rows per output file; must be greater than zero
|
|
130
|
+
- `--encoding` - input and output encoding; defaults to `utf-8-sig`
|
|
131
|
+
|
|
132
|
+
## Release process
|
|
133
|
+
|
|
134
|
+
To publish a new release, always test on TestPyPI first, then upload to PyPI only after successful Conda smoke tests:
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
export TWINE_USERNAME="__token__"
|
|
138
|
+
export TWINE_PASSWORD="pypi-..."
|
|
139
|
+
|
|
140
|
+
bash scripts/release_testpypi.sh --bump patch
|
|
141
|
+
bash scripts/release_pypi.sh --yes
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
**Notes:**
|
|
145
|
+
- Run and verify the TestPyPI release first, then upload the final package to PyPI.
|
|
146
|
+
- PyPI versions cannot be overwritten or reused.
|
|
147
|
+
|
|
122
148
|
## Library usage
|
|
123
149
|
|
|
124
150
|
```python
|
|
@@ -130,6 +156,7 @@ from loganalysis import (
|
|
|
130
156
|
iter_logical_entries,
|
|
131
157
|
normalize_message,
|
|
132
158
|
)
|
|
159
|
+
from csvchunking import split_csv
|
|
133
160
|
|
|
134
161
|
result = analyze_file(Path("logs/app.txt"))
|
|
135
162
|
print(result["norm_counts"])
|
|
@@ -146,14 +173,21 @@ convert_report_md_to_html_pdf(
|
|
|
146
173
|
Path("log_analyse_out/report.html"),
|
|
147
174
|
Path("log_analyse_out/report.pdf"),
|
|
148
175
|
)
|
|
176
|
+
|
|
177
|
+
chunk_result = split_csv(Path("data/large_export.csv"), chunk_size=5000)
|
|
178
|
+
print(chunk_result.output_dir)
|
|
179
|
+
print(chunk_result.output_files)
|
|
149
180
|
```
|
|
150
181
|
|
|
182
|
+
`split_csv()` returns a `ChunkResult` with the input file, output directory, chunk size, processed data-row count, created file count, and generated output file paths.
|
|
183
|
+
|
|
151
184
|
## Project structure
|
|
152
185
|
|
|
153
186
|
```text
|
|
154
187
|
.
|
|
155
188
|
├─ pyproject.toml
|
|
156
189
|
├─ src/loganalysis/
|
|
190
|
+
├─ src/csvchunking/
|
|
157
191
|
├─ tests/
|
|
158
192
|
├─ docs/
|
|
159
193
|
└─ log_analysis.py
|
|
@@ -168,7 +202,9 @@ Important modules:
|
|
|
168
202
|
- `report_html.py` - HTML/CSS rendering
|
|
169
203
|
- `report_pdf.py` - PDF engine selection and fallback handling
|
|
170
204
|
- `converters.py` - Markdown-to-HTML/PDF conversion
|
|
171
|
-
- `cli.py` - command-line entry point
|
|
205
|
+
- `loganalysis/cli.py` - log analysis command-line entry point
|
|
206
|
+
- `csvchunking/chunker.py` - CSV splitting logic and `ChunkResult`
|
|
207
|
+
- `csvchunking/cli.py` - CSV chunking command-line entry point
|
|
172
208
|
|
|
173
209
|
## HTML/PDF conversion
|
|
174
210
|
|
|
@@ -12,7 +12,7 @@ def make_csv(tmp_path, name, header, rows, encoding="utf-8-sig", delimiter=";"):
|
|
|
12
12
|
return file
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
def
|
|
15
|
+
def test_regular_split(tmp_path):
|
|
16
16
|
header = ["col1", "col2"]
|
|
17
17
|
rows = [["A", "1"], ["B", "2"], ["C", "3"], ["D", "4"], ["E", "5"]]
|
|
18
18
|
file = make_csv(tmp_path, "sample.csv", header, rows)
|
|
@@ -27,7 +27,7 @@ def test_normale_aufteilung(tmp_path):
|
|
|
27
27
|
assert (result.output_dir / "sample_03.csv").exists()
|
|
28
28
|
|
|
29
29
|
|
|
30
|
-
def
|
|
30
|
+
def test_header_in_each_file(tmp_path):
|
|
31
31
|
header = ["foo", "bar"]
|
|
32
32
|
rows = [["x", "1"], ["y", "2"], ["z", "3"]]
|
|
33
33
|
file = make_csv(tmp_path, "test.csv", header, rows)
|
|
@@ -37,7 +37,7 @@ def test_header_in_jeder_datei(tmp_path):
|
|
|
37
37
|
assert f.readline().strip() == "foo;bar"
|
|
38
38
|
|
|
39
39
|
|
|
40
|
-
def
|
|
40
|
+
def test_filename_with_spaces(tmp_path):
|
|
41
41
|
header = ["a", "b"]
|
|
42
42
|
rows = [["1", "2"]]
|
|
43
43
|
file = make_csv(tmp_path, "Part-Storage Areas Relationships.csv", header, rows)
|
|
@@ -46,7 +46,7 @@ def test_dateiname_mit_leerzeichen(tmp_path):
|
|
|
46
46
|
assert (result.output_dir / "Part-Storage Areas Relationships_01.csv").exists()
|
|
47
47
|
|
|
48
48
|
|
|
49
|
-
def
|
|
49
|
+
def test_invalid_chunk_size(tmp_path):
|
|
50
50
|
header = ["a", "b"]
|
|
51
51
|
rows = [["1", "2"]]
|
|
52
52
|
file = make_csv(tmp_path, "fail.csv", header, rows)
|
|
@@ -56,7 +56,7 @@ def test_ungueltige_chunkgroesse(tmp_path):
|
|
|
56
56
|
split_csv(file, chunk_size=-1)
|
|
57
57
|
|
|
58
58
|
|
|
59
|
-
def
|
|
59
|
+
def test_empty_file(tmp_path):
|
|
60
60
|
file = tmp_path / "empty.csv"
|
|
61
61
|
file.write_text("")
|
|
62
62
|
with pytest.raises(ValueError):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|