nm-tool-forge 0.1.0__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/PKG-INFO +57 -4
  2. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/README.md +105 -52
  3. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/pyproject.toml +67 -66
  4. nm_tool_forge-0.2.4/src/csvchunking/__init__.py +4 -0
  5. nm_tool_forge-0.2.4/src/csvchunking/__main__.py +4 -0
  6. nm_tool_forge-0.2.4/src/csvchunking/chunker.py +76 -0
  7. nm_tool_forge-0.2.4/src/csvchunking/cli.py +31 -0
  8. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/src/loganalysis/__init__.py +16 -16
  9. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/src/loganalysis/cli.py +8 -4
  10. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/src/loganalysis/csv_export.py +7 -7
  11. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/src/loganalysis/parsing.py +6 -6
  12. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/src/loganalysis/report_markdown.py +16 -12
  13. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/src/loganalysis/report_pdf.py +9 -4
  14. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/src/loganalysis/selftest.py +17 -11
  15. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/src/nm_tool_forge.egg-info/PKG-INFO +57 -4
  16. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/src/nm_tool_forge.egg-info/SOURCES.txt +5 -0
  17. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/src/nm_tool_forge.egg-info/entry_points.txt +1 -0
  18. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/src/nm_tool_forge.egg-info/top_level.txt +1 -0
  19. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/tests/test_analysis.py +6 -5
  20. nm_tool_forge-0.2.4/tests/test_csvchunking.py +63 -0
  21. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/tests/test_report_markdown.py +3 -1
  22. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/LICENSE +0 -0
  23. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/setup.cfg +0 -0
  24. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/src/loganalysis/__main__.py +0 -0
  25. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/src/loganalysis/analysis.py +0 -0
  26. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/src/loganalysis/constants.py +0 -0
  27. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/src/loganalysis/converters.py +0 -0
  28. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/src/loganalysis/encoding.py +0 -0
  29. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/src/loganalysis/filesystem.py +0 -0
  30. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/src/loganalysis/models.py +0 -0
  31. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/src/loganalysis/normalization.py +0 -0
  32. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/src/loganalysis/report_html.py +0 -0
  33. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/src/loganalysis/report_models.py +0 -0
  34. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/src/nm_tool_forge.egg-info/dependency_links.txt +0 -0
  35. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/src/nm_tool_forge.egg-info/requires.txt +0 -0
  36. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/tests/test_normalization.py +0 -0
  37. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/tests/test_parsing.py +0 -0
  38. {nm_tool_forge-0.1.0 → nm_tool_forge-0.2.4}/tests/test_report_html.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nm-tool-forge
3
- Version: 0.1.0
3
+ Version: 0.2.4
4
4
  Summary: Analyze MigMan log files and generate aggregated CSV, Markdown, HTML, and optional PDF reports.
5
5
  Author-email: Stefan Ewald <s.ew@outlook.de>
6
6
  License-Expression: MIT
@@ -31,7 +31,7 @@ Dynamic: license-file
31
31
 
32
32
  # nm-tool-forge
33
33
 
34
- `nm-tool-forge` analyzes MigMan text log files with severity tokens such as `INFO`, `ERROR`, and `WARNING` and generates aggregated CSV, Markdown, HTML, and optional PDF reports.
34
+ `nm-tool-forge` analyzes MigMan text log files with severity tokens such as `INFO`, `ERROR`, and `WARNING` and generates aggregated CSV, Markdown, HTML, and optional PDF reports. The package also includes `csvchunking`, a small helper for splitting large CSV files into migration-friendly chunks.
35
35
 
36
36
  The project uses a package-ready `src` layout. The legacy `log_analysis.py` file remains available as a thin compatibility entry point for older local setups.
37
37
 
@@ -43,6 +43,7 @@ The project uses a package-ready `src` layout. The legacy `log_analysis.py` file
43
43
  - Generate Markdown summary reports
44
44
  - Optionally convert reports to HTML and PDF
45
45
  - Keep a backup copy of analyzed log files
46
+ - Split large CSV files into numbered chunks while preserving the header row
46
47
  - Run built-in self-tests from the CLI
47
48
 
48
49
  ## Installation
@@ -61,12 +62,14 @@ python -m pip install .[pdf,dev]
61
62
 
62
63
  ## Command-line usage
63
64
 
64
- After installation, both entry points are available:
65
+ After installation, the CLI entry points are available:
65
66
 
66
67
  ```powershell
67
68
  python -m loganalysis --help
69
+ python -m csvchunking --help
68
70
  loganalysis --help
69
71
  nm-tool-forge --help
72
+ csvchunking --help
70
73
  ```
71
74
 
72
75
  Typical analysis run:
@@ -93,8 +96,26 @@ Legacy compatibility call:
93
96
  python .\log_analysis.py --convert
94
97
  ```
95
98
 
99
+ CSV chunking run:
100
+
101
+ ```powershell
102
+ csvchunking "data\large_export.csv" --chunk-size 5000
103
+ ```
104
+
105
+ The command creates an output directory next to the input file named after the CSV stem. For example, `data\large_export.csv` is split into files such as `data\large_export\large_export_01.csv`, `data\large_export\large_export_02.csv`, and so on.
106
+
107
+ CSV chunking with an explicit encoding:
108
+
109
+ ```powershell
110
+ python -m csvchunking "data\large_export.csv" --chunk-size 5000 --encoding utf-8-sig
111
+ ```
112
+
113
+ Each chunk contains the original header row plus up to `--chunk-size` data rows. The delimiter is detected automatically; if detection fails, semicolon-separated CSV is used.
114
+
96
115
  ## Supported CLI options
97
116
 
117
+ Log analysis options:
118
+
98
119
  - `--logs-dir`
99
120
  - `--out-dir`
100
121
  - `--backup-dir`
@@ -102,6 +123,28 @@ python .\log_analysis.py --convert
102
123
  - `--convert`
103
124
  - `--self-test`
104
125
 
126
+ CSV chunking options:
127
+
128
+ - `input_file` - path to the CSV file to split
129
+ - `--chunk-size` - required number of data rows per output file; must be greater than zero
130
+ - `--encoding` - input and output encoding; defaults to `utf-8-sig`
131
+
132
+ ## Release process
133
+
134
+ To publish a new release, always test on TestPyPI first, then upload to PyPI only after successful Conda smoke tests:
135
+
136
+ ```bash
137
+ export TWINE_USERNAME="__token__"
138
+ export TWINE_PASSWORD="pypi-..."
139
+
140
+ bash scripts/release_testpypi.sh --bump patch
141
+ bash scripts/release_pypi.sh --yes
142
+ ```
143
+
144
+ **Notes:**
145
+ - Run and verify the TestPyPI release first, then upload the final package to PyPI.
146
+ - PyPI versions cannot be overwritten or reused.
147
+
105
148
  ## Library usage
106
149
 
107
150
  ```python
@@ -113,6 +156,7 @@ from loganalysis import (
113
156
  iter_logical_entries,
114
157
  normalize_message,
115
158
  )
159
+ from csvchunking import split_csv
116
160
 
117
161
  result = analyze_file(Path("logs/app.txt"))
118
162
  print(result["norm_counts"])
@@ -129,14 +173,21 @@ convert_report_md_to_html_pdf(
129
173
  Path("log_analyse_out/report.html"),
130
174
  Path("log_analyse_out/report.pdf"),
131
175
  )
176
+
177
+ chunk_result = split_csv(Path("data/large_export.csv"), chunk_size=5000)
178
+ print(chunk_result.output_dir)
179
+ print(chunk_result.output_files)
132
180
  ```
133
181
 
182
+ `split_csv()` returns a `ChunkResult` with the input file, output directory, chunk size, processed data-row count, created file count, and generated output file paths.
183
+
134
184
  ## Project structure
135
185
 
136
186
  ```text
137
187
  .
138
188
  ├─ pyproject.toml
139
189
  ├─ src/loganalysis/
190
+ ├─ src/csvchunking/
140
191
  ├─ tests/
141
192
  ├─ docs/
142
193
  └─ log_analysis.py
@@ -151,7 +202,9 @@ Important modules:
151
202
  - `report_html.py` - HTML/CSS rendering
152
203
  - `report_pdf.py` - PDF engine selection and fallback handling
153
204
  - `converters.py` - Markdown-to-HTML/PDF conversion
154
- - `cli.py` - command-line entry point
205
+ - `loganalysis/cli.py` - log analysis command-line entry point
206
+ - `csvchunking/chunker.py` - CSV splitting logic and `ChunkResult`
207
+ - `csvchunking/cli.py` - CSV chunking command-line entry point
155
208
 
156
209
  ## HTML/PDF conversion
157
210
 
@@ -1,6 +1,6 @@
1
1
  # nm-tool-forge
2
2
 
3
- `nm-tool-forge` analyzes MigMan text log files with severity tokens such as `INFO`, `ERROR`, and `WARNING` and generates aggregated CSV, Markdown, HTML, and optional PDF reports.
3
+ `nm-tool-forge` analyzes MigMan text log files with severity tokens such as `INFO`, `ERROR`, and `WARNING` and generates aggregated CSV, Markdown, HTML, and optional PDF reports. The package also includes `csvchunking`, a small helper for splitting large CSV files into migration-friendly chunks.
4
4
 
5
5
  The project uses a package-ready `src` layout. The legacy `log_analysis.py` file remains available as a thin compatibility entry point for older local setups.
6
6
 
@@ -9,10 +9,11 @@ The project uses a package-ready `src` layout. The legacy `log_analysis.py` file
9
9
  - Parse logical log entries from multi-line text logs
10
10
  - Normalize recurring error patterns for better aggregation
11
11
  - Generate aggregated CSV reports
12
- - Generate Markdown summary reports
13
- - Optionally convert reports to HTML and PDF
14
- - Keep a backup copy of analyzed log files
15
- - Run built-in self-tests from the CLI
12
+ - Generate Markdown summary reports
13
+ - Optionally convert reports to HTML and PDF
14
+ - Keep a backup copy of analyzed log files
15
+ - Split large CSV files into numbered chunks while preserving the header row
16
+ - Run built-in self-tests from the CLI
16
17
 
17
18
  ## Installation
18
19
 
@@ -30,13 +31,15 @@ python -m pip install .[pdf,dev]
30
31
 
31
32
  ## Command-line usage
32
33
 
33
- After installation, both entry points are available:
34
-
35
- ```powershell
36
- python -m loganalysis --help
37
- loganalysis --help
38
- nm-tool-forge --help
39
- ```
34
+ After installation, the CLI entry points are available:
35
+
36
+ ```powershell
37
+ python -m loganalysis --help
38
+ python -m csvchunking --help
39
+ loganalysis --help
40
+ nm-tool-forge --help
41
+ csvchunking --help
42
+ ```
40
43
 
41
44
  Typical analysis run:
42
45
 
@@ -50,41 +53,82 @@ Analysis with HTML/PDF conversion:
50
53
  nm-tool-forge --logs-dir logs --out-dir log_analyse_out --convert
51
54
  ```
52
55
 
53
- Self-test mode:
54
-
55
- ```powershell
56
- python -m loganalysis --self-test
57
- ```
58
-
59
- Legacy compatibility call:
60
-
61
- ```powershell
62
- python .\log_analysis.py --convert
56
+ Self-test mode:
57
+
58
+ ```powershell
59
+ python -m loganalysis --self-test
60
+ ```
61
+
62
+ Legacy compatibility call:
63
+
64
+ ```powershell
65
+ python .\log_analysis.py --convert
66
+ ```
67
+
68
+ CSV chunking run:
69
+
70
+ ```powershell
71
+ csvchunking "data\large_export.csv" --chunk-size 5000
72
+ ```
73
+
74
+ The command creates an output directory next to the input file named after the CSV stem. For example, `data\large_export.csv` is split into files such as `data\large_export\large_export_01.csv`, `data\large_export\large_export_02.csv`, and so on.
75
+
76
+ CSV chunking with an explicit encoding:
77
+
78
+ ```powershell
79
+ python -m csvchunking "data\large_export.csv" --chunk-size 5000 --encoding utf-8-sig
80
+ ```
81
+
82
+ Each chunk contains the original header row plus up to `--chunk-size` data rows. The delimiter is detected automatically; if detection fails, semicolon-separated CSV is used.
83
+
84
+ ## Supported CLI options
85
+
86
+ Log analysis options:
87
+
88
+ - `--logs-dir`
89
+ - `--out-dir`
90
+ - `--backup-dir`
91
+ - `--top-examples`
92
+ - `--convert`
93
+ - `--self-test`
94
+
95
+ CSV chunking options:
96
+
97
+ - `input_file` - path to the CSV file to split
98
+ - `--chunk-size` - required number of data rows per output file; must be greater than zero
99
+ - `--encoding` - input and output encoding; defaults to `utf-8-sig`
100
+
101
+ ## Release process
102
+
103
+ To publish a new release, always test on TestPyPI first, then upload to PyPI only after successful Conda smoke tests:
104
+
105
+ ```bash
106
+ export TWINE_USERNAME="__token__"
107
+ export TWINE_PASSWORD="pypi-..."
108
+
109
+ bash scripts/release_testpypi.sh --bump patch
110
+ bash scripts/release_pypi.sh --yes
63
111
  ```
64
112
 
65
- ## Supported CLI options
66
-
67
- - `--logs-dir`
68
- - `--out-dir`
69
- - `--backup-dir`
70
- - `--top-examples`
71
- - `--convert`
72
- - `--self-test`
73
-
74
- ## Library usage
75
-
76
- ```python
113
+ **Notes:**
114
+ - Run and verify the TestPyPI release first, then upload the final package to PyPI.
115
+ - PyPI versions cannot be overwritten or reused.
116
+
117
+ ## Library usage
118
+
119
+ ```python
77
120
  from pathlib import Path
78
121
 
79
122
  from loganalysis import (
80
123
  analyze_file,
81
124
  convert_report_md_to_html_pdf,
82
- iter_logical_entries,
83
- normalize_message,
84
- )
85
-
86
- result = analyze_file(Path("logs/app.txt"))
87
- print(result["norm_counts"])
125
+ iter_logical_entries,
126
+ normalize_message,
127
+ )
128
+ from csvchunking import split_csv
129
+
130
+ result = analyze_file(Path("logs/app.txt"))
131
+ print(result["norm_counts"])
88
132
 
89
133
  print(normalize_message(
90
134
  'Conversion: X =3100110. 138 The record was not found in table "Teile".'
@@ -95,20 +139,27 @@ for entry in iter_logical_entries(Path("logs/app.txt")):
95
139
 
96
140
  convert_report_md_to_html_pdf(
97
141
  Path("log_analyse_out/report.md"),
98
- Path("log_analyse_out/report.html"),
99
- Path("log_analyse_out/report.pdf"),
100
- )
101
- ```
142
+ Path("log_analyse_out/report.html"),
143
+ Path("log_analyse_out/report.pdf"),
144
+ )
145
+
146
+ chunk_result = split_csv(Path("data/large_export.csv"), chunk_size=5000)
147
+ print(chunk_result.output_dir)
148
+ print(chunk_result.output_files)
149
+ ```
150
+
151
+ `split_csv()` returns a `ChunkResult` with the input file, output directory, chunk size, processed data-row count, created file count, and generated output file paths.
102
152
 
103
153
  ## Project structure
104
154
 
105
155
  ```text
106
156
  .
107
- ├─ pyproject.toml
108
- ├─ src/loganalysis/
109
- ├─ tests/
110
- ├─ docs/
111
- └─ log_analysis.py
157
+ ├─ pyproject.toml
158
+ ├─ src/loganalysis/
159
+ ├─ src/csvchunking/
160
+ ├─ tests/
161
+ ├─ docs/
162
+ └─ log_analysis.py
112
163
  ```
113
164
 
114
165
  Important modules:
@@ -118,9 +169,11 @@ Important modules:
118
169
  - `normalization.py` - message normalization
119
170
  - `report_markdown.py` - Markdown report model and rendering
120
171
  - `report_html.py` - HTML/CSS rendering
121
- - `report_pdf.py` - PDF engine selection and fallback handling
122
- - `converters.py` - Markdown-to-HTML/PDF conversion
123
- - `cli.py` - command-line entry point
172
+ - `report_pdf.py` - PDF engine selection and fallback handling
173
+ - `converters.py` - Markdown-to-HTML/PDF conversion
174
+ - `loganalysis/cli.py` - log analysis command-line entry point
175
+ - `csvchunking/chunker.py` - CSV splitting logic and `ChunkResult`
176
+ - `csvchunking/cli.py` - CSV chunking command-line entry point
124
177
 
125
178
  ## HTML/PDF conversion
126
179
 
@@ -1,67 +1,68 @@
1
- [build-system]
2
- requires = ["setuptools>=69", "wheel"]
3
- build-backend = "setuptools.build_meta"
4
-
5
- [project]
6
- name = "nm-tool-forge"
7
- version = "0.1.0"
8
- description = "Analyze MigMan log files and generate aggregated CSV, Markdown, HTML, and optional PDF reports."
9
- readme = { file = "README.md", content-type = "text/markdown" }
10
- requires-python = ">=3.10"
11
- license = "MIT"
12
- license-files = ["LICENSE"]
13
- authors = [
14
- { name = "Stefan Ewald", email = "s.ew@outlook.de" }
15
- ]
16
- keywords = ["migman", "logs", "analysis", "reporting", "csv", "markdown", "pdf"]
17
- classifiers = [
18
- "Development Status :: 4 - Beta",
19
- "Intended Audience :: Developers",
20
- "Programming Language :: Python :: 3",
21
- "Programming Language :: Python :: 3.10",
22
- "Programming Language :: Python :: 3.11",
23
- "Programming Language :: Python :: 3.12",
24
- "Programming Language :: Python :: 3.13",
25
- "Topic :: Software Development :: Libraries :: Python Modules",
26
- "Topic :: Utilities",
27
- ]
28
-
29
- dependencies = [
30
- "chardet>=5.0",
31
- ]
32
-
33
- [project.optional-dependencies]
34
- pdf = [
35
- "weasyprint>=62",
36
- ]
37
- dev = [
38
- "pytest>=8.0",
39
- "build>=1.2",
40
- "twine>=5.0",
41
- "ruff>=0.11",
42
- ]
43
-
44
- [project.urls]
45
- Homepage = "https://github.com/Jack736-ui/migman_log"
46
- Issues = "https://github.com/Jack736-ui/migman_log/issues"
47
-
48
- [project.scripts]
49
- nm-tool-forge = "loganalysis.cli:main"
50
- loganalysis = "loganalysis.cli:main"
51
-
52
- [tool.setuptools]
53
- package-dir = { "" = "src" }
54
-
55
- [tool.setuptools.packages.find]
56
- where = ["src"]
57
-
58
- [tool.pytest.ini_options]
59
- testpaths = ["tests"]
60
- addopts = "--basetemp=tests_tmp"
61
-
62
- [tool.ruff]
63
- line-length = 120
64
- target-version = "py310"
65
-
66
- [tool.ruff.lint]
1
+ [build-system]
2
+ requires = ["setuptools>=69", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "nm-tool-forge"
7
+ version = "0.2.4"
8
+ description = "Analyze MigMan log files and generate aggregated CSV, Markdown, HTML, and optional PDF reports."
9
+ readme = { file = "README.md", content-type = "text/markdown" }
10
+ requires-python = ">=3.10"
11
+ license = "MIT"
12
+ license-files = ["LICENSE"]
13
+ authors = [
14
+ { name = "Stefan Ewald", email = "s.ew@outlook.de" }
15
+ ]
16
+ keywords = ["migman", "logs", "analysis", "reporting", "csv", "markdown", "pdf"]
17
+ classifiers = [
18
+ "Development Status :: 4 - Beta",
19
+ "Intended Audience :: Developers",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.10",
22
+ "Programming Language :: Python :: 3.11",
23
+ "Programming Language :: Python :: 3.12",
24
+ "Programming Language :: Python :: 3.13",
25
+ "Topic :: Software Development :: Libraries :: Python Modules",
26
+ "Topic :: Utilities",
27
+ ]
28
+
29
+ dependencies = [
30
+ "chardet>=5.0",
31
+ ]
32
+
33
+ [project.optional-dependencies]
34
+ pdf = [
35
+ "weasyprint>=62",
36
+ ]
37
+ dev = [
38
+ "pytest>=8.0",
39
+ "build>=1.2",
40
+ "twine>=5.0",
41
+ "ruff>=0.11",
42
+ ]
43
+
44
+ [project.urls]
45
+ Homepage = "https://github.com/Jack736-ui/migman_log"
46
+ Issues = "https://github.com/Jack736-ui/migman_log/issues"
47
+
48
+ [project.scripts]
49
+ nm-tool-forge = "loganalysis.cli:main"
50
+ loganalysis = "loganalysis.cli:main"
51
+ csvchunking = "csvchunking.cli:main"
52
+
53
+ [tool.setuptools]
54
+ package-dir = { "" = "src" }
55
+
56
+ [tool.setuptools.packages.find]
57
+ where = ["src"]
58
+
59
+ [tool.pytest.ini_options]
60
+ testpaths = ["tests"]
61
+ addopts = "--basetemp=tests_tmp"
62
+
63
+ [tool.ruff]
64
+ line-length = 120
65
+ target-version = "py310"
66
+
67
+ [tool.ruff.lint]
67
68
  select = ["E", "F", "I", "B", "UP"]
@@ -0,0 +1,4 @@
1
+ from .chunker import ChunkResult, split_csv
2
+
3
+ __all__ = ["ChunkResult", "split_csv"]
4
+ __version__ = "0.2.4"
@@ -0,0 +1,4 @@
1
+ from .cli import main
2
+
3
+ if __name__ == "__main__":
4
+ main()
@@ -0,0 +1,76 @@
1
+ import csv
2
+ from dataclasses import dataclass
3
+ from pathlib import Path
4
+
5
+
6
+ @dataclass(frozen=True)
7
+ class ChunkResult:
8
+ input_file: Path
9
+ output_dir: Path
10
+ chunk_size: int
11
+ data_rows_processed: int
12
+ files_created: int
13
+ output_files: tuple[Path, ...]
14
+
15
+
16
+ def split_csv(
17
+ input_file: Path,
18
+ chunk_size: int,
19
+ encoding: str = "utf-8-sig",
20
+ ) -> ChunkResult:
21
+ if not Path(input_file).is_file():
22
+ raise FileNotFoundError(f"Input file not found: {input_file}")
23
+ if chunk_size <= 0:
24
+ raise ValueError("chunk_size must be greater than 0")
25
+
26
+ input_file = Path(input_file)
27
+ output_dir = input_file.parent / input_file.stem
28
+ output_dir.mkdir(exist_ok=True)
29
+
30
+ # Detect the delimiter automatically.
31
+ with open(input_file, encoding=encoding, newline="") as f:
32
+ sample = f.read(4096)
33
+ f.seek(0)
34
+ sniffer = csv.Sniffer()
35
+ try:
36
+ dialect = sniffer.sniff(sample)
37
+ except Exception:
38
+ dialect = csv.excel
39
+ dialect.delimiter = ";"
40
+ reader = csv.reader(f, dialect)
41
+ try:
42
+ header = next(reader)
43
+ except StopIteration as exc:
44
+ raise ValueError("Input file is empty.") from exc
45
+ chunk = []
46
+ file_count = 0
47
+ data_rows = 0
48
+ output_files = []
49
+ for row in reader:
50
+ chunk.append(row)
51
+ data_rows += 1
52
+ if len(chunk) == chunk_size:
53
+ file_count += 1
54
+ out_path = output_dir / f"{input_file.stem}_{file_count:02d}{input_file.suffix}"
55
+ with open(out_path, "w", encoding=encoding, newline="") as out:
56
+ writer = csv.writer(out, dialect)
57
+ writer.writerow(header)
58
+ writer.writerows(chunk)
59
+ output_files.append(out_path)
60
+ chunk = []
61
+ if chunk:
62
+ file_count += 1
63
+ out_path = output_dir / f"{input_file.stem}_{file_count:02d}{input_file.suffix}"
64
+ with open(out_path, "w", encoding=encoding, newline="") as out:
65
+ writer = csv.writer(out, dialect)
66
+ writer.writerow(header)
67
+ writer.writerows(chunk)
68
+ output_files.append(out_path)
69
+ return ChunkResult(
70
+ input_file=input_file,
71
+ output_dir=output_dir,
72
+ chunk_size=chunk_size,
73
+ data_rows_processed=data_rows,
74
+ files_created=file_count,
75
+ output_files=tuple(output_files),
76
+ )
@@ -0,0 +1,31 @@
1
+ import argparse
2
+ import sys
3
+ from pathlib import Path
4
+
5
+ from .chunker import split_csv
6
+
7
+
8
+ def main() -> None:
9
+ parser = argparse.ArgumentParser(
10
+ description="Split a large CSV file into smaller chunks with a header row.",
11
+ )
12
+ parser.add_argument("input_file", help="Path to the CSV file")
13
+ parser.add_argument(
14
+ "--chunk-size",
15
+ type=int,
16
+ required=True,
17
+ help="Number of data rows per output file; must be greater than 0",
18
+ )
19
+ parser.add_argument("--encoding", default="utf-8-sig", help="Input and output encoding (Default: utf-8-sig)")
20
+ args = parser.parse_args()
21
+ try:
22
+ result = split_csv(Path(args.input_file), args.chunk_size, encoding=args.encoding)
23
+ except Exception as e:
24
+ print(f"Error: {e}", file=sys.stderr)
25
+ sys.exit(1)
26
+ print("CSV chunking completed.")
27
+ print(f"- Input: {result.input_file}")
28
+ print(f"- Output directory: {result.output_dir}")
29
+ print(f"- Chunk size: {result.chunk_size}")
30
+ print(f"- Data rows processed: {result.data_rows_processed}")
31
+ print(f"- Files created: {result.files_created}")
@@ -1,16 +1,16 @@
1
- from __future__ import annotations
2
-
3
- from .analysis import analyze_file, run_analysis
4
- from .converters import convert_report_md_to_html_pdf
5
- from .normalization import normalize_message
6
- from .parsing import iter_logical_entries
7
-
8
- __all__ = [
9
- "analyze_file",
10
- "convert_report_md_to_html_pdf",
11
- "iter_logical_entries",
12
- "normalize_message",
13
- "run_analysis",
14
- ]
15
-
16
- __version__ = "0.1.0"
1
+ from __future__ import annotations
2
+
3
+ from .analysis import analyze_file, run_analysis
4
+ from .converters import convert_report_md_to_html_pdf
5
+ from .normalization import normalize_message
6
+ from .parsing import iter_logical_entries
7
+
8
+ __all__ = [
9
+ "analyze_file",
10
+ "convert_report_md_to_html_pdf",
11
+ "iter_logical_entries",
12
+ "normalize_message",
13
+ "run_analysis",
14
+ ]
15
+
16
+ __version__ = "0.2.4"
@@ -1,8 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
- import argparse
4
- from pathlib import Path
5
- from typing import Sequence
3
+ import argparse
4
+ from collections.abc import Sequence
5
+ from pathlib import Path
6
6
 
7
7
  from .analysis import NoLogFilesError, run_analysis
8
8
  from .constants import DEFAULT_LOGS_DIR, DEFAULT_OUT_DIR, DEFAULT_TOP_EXAMPLES, EXIT_SUCCESS
@@ -16,7 +16,11 @@ def build_parser() -> argparse.ArgumentParser:
16
16
  parser = argparse.ArgumentParser(
17
17
  description="Aggregated analysis of log files (INFO/ERROR/WARNING) in logs/*.txt",
18
18
  )
19
- parser.add_argument("--logs-dir", default=DEFAULT_LOGS_DIR, help=f"Subdirectory with log files (Default: {DEFAULT_LOGS_DIR})")
19
+ parser.add_argument(
20
+ "--logs-dir",
21
+ default=DEFAULT_LOGS_DIR,
22
+ help=f"Subdirectory with log files (Default: {DEFAULT_LOGS_DIR})",
23
+ )
20
24
  parser.add_argument("--out-dir", default=DEFAULT_OUT_DIR, help=f"Output directory (Default: {DEFAULT_OUT_DIR})")
21
25
  parser.add_argument("--backup-dir", default=None, help="Backup directory (Default: <out-dir>/backup)")
22
26
  parser.add_argument(
@@ -1,10 +1,10 @@
1
- from __future__ import annotations
2
-
3
- import csv
4
- from pathlib import Path
5
- from typing import Iterable
6
-
7
- from .filesystem import ensure_dir
1
+ from __future__ import annotations
2
+
3
+ import csv
4
+ from collections.abc import Iterable
5
+ from pathlib import Path
6
+
7
+ from .filesystem import ensure_dir
8
8
 
9
9
 
10
10
  def write_csv(path: Path, rows: Iterable[tuple[str, str, int]], headers: list[str]) -> None:
@@ -1,9 +1,9 @@
1
- from __future__ import annotations
2
-
3
- from pathlib import Path
4
- from typing import Iterable
5
-
6
- from .constants import RE_ENTRY_START, RE_LINE_PREFIX, RE_TRAILING_DATASET, RE_WHITESPACE, SEVERITY_ALIASES
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Iterable
4
+ from pathlib import Path
5
+
6
+ from .constants import RE_ENTRY_START, RE_LINE_PREFIX, RE_TRAILING_DATASET, RE_WHITESPACE, SEVERITY_ALIASES
7
7
  from .encoding import detect_encoding
8
8
  from .models import ParsedLine
9
9
 
@@ -52,12 +52,14 @@ def build_markdown_report(
52
52
  continue
53
53
 
54
54
  lines.append("| Severity | Count | Normalized message | Examples |")
55
- lines.append("|---|---:|---|---|")
56
- for (severity, normalized_message), count in top_norm:
57
- examples_counter = analysis.norm_examples[(severity, normalized_message)]
58
- examples = [f"{message} ({amount})" for message, amount in examples_counter.most_common(config.top_examples)]
59
- examples_text = "<br>".join(examples) if examples else ""
60
- lines.append(f"| {severity} | {count} | {normalized_message} | {examples_text} |")
55
+ lines.append("|---|---:|---|---|")
56
+ for (severity, normalized_message), count in top_norm:
57
+ examples_counter = analysis.norm_examples[(severity, normalized_message)]
58
+ examples = [
59
+ f"{message} ({amount})" for message, amount in examples_counter.most_common(config.top_examples)
60
+ ]
61
+ examples_text = "<br>".join(examples) if examples else ""
62
+ lines.append(f"| {severity} | {count} | {normalized_message} | {examples_text} |")
61
63
  lines.append("")
62
64
 
63
65
  lines.append("## Overall summary (all files)")
@@ -65,12 +67,14 @@ def build_markdown_report(
65
67
  top_global = _top_counter_items(summary.global_norm, REPORT_TOP_GLOBAL)
66
68
  if top_global:
67
69
  lines.append("| Severity | Count | Normalized message | Examples |")
68
- lines.append("|---|---:|---|---|")
69
- for (severity, normalized_message), count in top_global:
70
- examples_counter = summary.global_norm_examples[(severity, normalized_message)]
71
- examples = [f"{message} ({amount})" for message, amount in examples_counter.most_common(config.top_examples)]
72
- examples_text = "<br>".join(examples) if examples else ""
73
- lines.append(f"| {severity} | {count} | {normalized_message} | {examples_text} |")
70
+ lines.append("|---|---:|---|---|")
71
+ for (severity, normalized_message), count in top_global:
72
+ examples_counter = summary.global_norm_examples[(severity, normalized_message)]
73
+ examples = [
74
+ f"{message} ({amount})" for message, amount in examples_counter.most_common(config.top_examples)
75
+ ]
76
+ examples_text = "<br>".join(examples) if examples else ""
77
+ lines.append(f"| {severity} | {count} | {normalized_message} | {examples_text} |")
74
78
  lines.append("")
75
79
  else:
76
80
  lines.append("_No messages found._")
@@ -1,10 +1,15 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import shutil
4
- from contextlib import redirect_stderr, redirect_stdout
5
- from io import StringIO
6
-
7
- from .constants import COMMON_MOJIBAKE_TOKENS, LATEX_PDF_ENGINES, LATEX_SPECIAL_CHAR_REPLACEMENTS, RE_MARKDOWN_TABLE_SEPARATOR
4
+ from contextlib import redirect_stderr, redirect_stdout
5
+ from io import StringIO
6
+
7
+ from .constants import (
8
+ COMMON_MOJIBAKE_TOKENS,
9
+ LATEX_PDF_ENGINES,
10
+ LATEX_SPECIAL_CHAR_REPLACEMENTS,
11
+ RE_MARKDOWN_TABLE_SEPARATOR,
12
+ )
8
13
 
9
14
 
10
15
  def select_pdf_engine() -> str | None:
@@ -9,17 +9,23 @@ from .report_pdf import build_pdf_safe_markdown, escape_latex_text, make_markdow
9
9
 
10
10
 
11
11
  def run_self_tests() -> None:
12
- """Run deterministic built-in assertions for quick local verification."""
13
-
14
- for raw_message, expected in NORMALIZATION_SELF_TEST_CASES:
15
- actual = normalize_message(raw_message)
16
- assert actual == expected, f"normalize_message({raw_message!r}) -> {actual!r}, expected {expected!r}"
17
-
18
- assert is_entry_start("ERROR\tLine 1: tab-separated severity")
19
- assert escape_latex_text(r"D:\DATEN_UEBERNAHME\A&B") == r"D:\textbackslash{}DATEN\_UEBERNAHME\textbackslash{}A\&B"
20
- assert make_markdown_table_line_pdf_safe("|---|---:|---|") == "|---|---:|---|"
21
- assert make_markdown_table_line_pdf_safe(r"| ERROR | D:\DATEN_1<br>foo |") == r"| ERROR | D:\textbackslash{}DATEN\_1 ; foo |"
22
- assert build_pdf_safe_markdown("plain\n| A | B |\n").endswith("\n")
12
+ """Run deterministic built-in assertions for quick local verification."""
13
+
14
+ for raw_message, expected in NORMALIZATION_SELF_TEST_CASES:
15
+ actual = normalize_message(raw_message)
16
+ assert actual == expected, f"normalize_message({raw_message!r}) -> {actual!r}, expected {expected!r}"
17
+
18
+ assert is_entry_start("ERROR\tLine 1: tab-separated severity")
19
+ assert (
20
+ escape_latex_text(r"D:\DATEN_UEBERNAHME\A&B")
21
+ == r"D:\textbackslash{}DATEN\_UEBERNAHME\textbackslash{}A\&B"
22
+ )
23
+ assert make_markdown_table_line_pdf_safe("|---|---:|---|") == "|---|---:|---|"
24
+ assert (
25
+ make_markdown_table_line_pdf_safe(r"| ERROR | D:\DATEN_1<br>foo |")
26
+ == r"| ERROR | D:\textbackslash{}DATEN\_1 ; foo |"
27
+ )
28
+ assert build_pdf_safe_markdown("plain\n| A | B |\n").endswith("\n")
23
29
 
24
30
  sample_report_markdown = """# Log Analysis Report
25
31
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nm-tool-forge
3
- Version: 0.1.0
3
+ Version: 0.2.4
4
4
  Summary: Analyze MigMan log files and generate aggregated CSV, Markdown, HTML, and optional PDF reports.
5
5
  Author-email: Stefan Ewald <s.ew@outlook.de>
6
6
  License-Expression: MIT
@@ -31,7 +31,7 @@ Dynamic: license-file
31
31
 
32
32
  # nm-tool-forge
33
33
 
34
- `nm-tool-forge` analyzes MigMan text log files with severity tokens such as `INFO`, `ERROR`, and `WARNING` and generates aggregated CSV, Markdown, HTML, and optional PDF reports.
34
+ `nm-tool-forge` analyzes MigMan text log files with severity tokens such as `INFO`, `ERROR`, and `WARNING` and generates aggregated CSV, Markdown, HTML, and optional PDF reports. The package also includes `csvchunking`, a small helper for splitting large CSV files into migration-friendly chunks.
35
35
 
36
36
  The project uses a package-ready `src` layout. The legacy `log_analysis.py` file remains available as a thin compatibility entry point for older local setups.
37
37
 
@@ -43,6 +43,7 @@ The project uses a package-ready `src` layout. The legacy `log_analysis.py` file
43
43
  - Generate Markdown summary reports
44
44
  - Optionally convert reports to HTML and PDF
45
45
  - Keep a backup copy of analyzed log files
46
+ - Split large CSV files into numbered chunks while preserving the header row
46
47
  - Run built-in self-tests from the CLI
47
48
 
48
49
  ## Installation
@@ -61,12 +62,14 @@ python -m pip install .[pdf,dev]
61
62
 
62
63
  ## Command-line usage
63
64
 
64
- After installation, both entry points are available:
65
+ After installation, the CLI entry points are available:
65
66
 
66
67
  ```powershell
67
68
  python -m loganalysis --help
69
+ python -m csvchunking --help
68
70
  loganalysis --help
69
71
  nm-tool-forge --help
72
+ csvchunking --help
70
73
  ```
71
74
 
72
75
  Typical analysis run:
@@ -93,8 +96,26 @@ Legacy compatibility call:
93
96
  python .\log_analysis.py --convert
94
97
  ```
95
98
 
99
+ CSV chunking run:
100
+
101
+ ```powershell
102
+ csvchunking "data\large_export.csv" --chunk-size 5000
103
+ ```
104
+
105
+ The command creates an output directory next to the input file named after the CSV stem. For example, `data\large_export.csv` is split into files such as `data\large_export\large_export_01.csv`, `data\large_export\large_export_02.csv`, and so on.
106
+
107
+ CSV chunking with an explicit encoding:
108
+
109
+ ```powershell
110
+ python -m csvchunking "data\large_export.csv" --chunk-size 5000 --encoding utf-8-sig
111
+ ```
112
+
113
+ Each chunk contains the original header row plus up to `--chunk-size` data rows. The delimiter is detected automatically; if detection fails, semicolon-separated CSV is used.
114
+
96
115
  ## Supported CLI options
97
116
 
117
+ Log analysis options:
118
+
98
119
  - `--logs-dir`
99
120
  - `--out-dir`
100
121
  - `--backup-dir`
@@ -102,6 +123,28 @@ python .\log_analysis.py --convert
102
123
  - `--convert`
103
124
  - `--self-test`
104
125
 
126
+ CSV chunking options:
127
+
128
+ - `input_file` - path to the CSV file to split
129
+ - `--chunk-size` - required number of data rows per output file; must be greater than zero
130
+ - `--encoding` - input and output encoding; defaults to `utf-8-sig`
131
+
132
+ ## Release process
133
+
134
+ To publish a new release, always test on TestPyPI first, then upload to PyPI only after successful Conda smoke tests:
135
+
136
+ ```bash
137
+ export TWINE_USERNAME="__token__"
138
+ export TWINE_PASSWORD="pypi-..."
139
+
140
+ bash scripts/release_testpypi.sh --bump patch
141
+ bash scripts/release_pypi.sh --yes
142
+ ```
143
+
144
+ **Notes:**
145
+ - Run and verify the TestPyPI release first, then upload the final package to PyPI.
146
+ - PyPI versions cannot be overwritten or reused.
147
+
105
148
  ## Library usage
106
149
 
107
150
  ```python
@@ -113,6 +156,7 @@ from loganalysis import (
113
156
  iter_logical_entries,
114
157
  normalize_message,
115
158
  )
159
+ from csvchunking import split_csv
116
160
 
117
161
  result = analyze_file(Path("logs/app.txt"))
118
162
  print(result["norm_counts"])
@@ -129,14 +173,21 @@ convert_report_md_to_html_pdf(
129
173
  Path("log_analyse_out/report.html"),
130
174
  Path("log_analyse_out/report.pdf"),
131
175
  )
176
+
177
+ chunk_result = split_csv(Path("data/large_export.csv"), chunk_size=5000)
178
+ print(chunk_result.output_dir)
179
+ print(chunk_result.output_files)
132
180
  ```
133
181
 
182
+ `split_csv()` returns a `ChunkResult` with the input file, output directory, chunk size, processed data-row count, created file count, and generated output file paths.
183
+
134
184
  ## Project structure
135
185
 
136
186
  ```text
137
187
  .
138
188
  ├─ pyproject.toml
139
189
  ├─ src/loganalysis/
190
+ ├─ src/csvchunking/
140
191
  ├─ tests/
141
192
  ├─ docs/
142
193
  └─ log_analysis.py
@@ -151,7 +202,9 @@ Important modules:
151
202
  - `report_html.py` - HTML/CSS rendering
152
203
  - `report_pdf.py` - PDF engine selection and fallback handling
153
204
  - `converters.py` - Markdown-to-HTML/PDF conversion
154
- - `cli.py` - command-line entry point
205
+ - `loganalysis/cli.py` - log analysis command-line entry point
206
+ - `csvchunking/chunker.py` - CSV splitting logic and `ChunkResult`
207
+ - `csvchunking/cli.py` - CSV chunking command-line entry point
155
208
 
156
209
  ## HTML/PDF conversion
157
210
 
@@ -1,6 +1,10 @@
1
1
  LICENSE
2
2
  README.md
3
3
  pyproject.toml
4
+ src/csvchunking/__init__.py
5
+ src/csvchunking/__main__.py
6
+ src/csvchunking/chunker.py
7
+ src/csvchunking/cli.py
4
8
  src/loganalysis/__init__.py
5
9
  src/loganalysis/__main__.py
6
10
  src/loganalysis/analysis.py
@@ -25,6 +29,7 @@ src/nm_tool_forge.egg-info/entry_points.txt
25
29
  src/nm_tool_forge.egg-info/requires.txt
26
30
  src/nm_tool_forge.egg-info/top_level.txt
27
31
  tests/test_analysis.py
32
+ tests/test_csvchunking.py
28
33
  tests/test_normalization.py
29
34
  tests/test_parsing.py
30
35
  tests/test_report_html.py
@@ -1,3 +1,4 @@
1
1
  [console_scripts]
2
+ csvchunking = csvchunking.cli:main
2
3
  loganalysis = loganalysis.cli:main
3
4
  nm-tool-forge = loganalysis.cli:main
@@ -18,11 +18,12 @@ def test_analyze_file_aggregates_raw_and_normalized_counts(tmp_path: Path) -> No
18
18
  result = analyze_file(log_path)
19
19
 
20
20
  assert result.total_lines == 3
21
- assert result.total_entries == 3
22
- assert result.unknown_lines == 0
23
- assert result.raw_counts[("WARNING", "Different issue")] == 1
24
- assert result.norm_counts[("ERROR", 'Conversion: X =<VALUE> The record was not found in table "Teile".')] == 2
25
- assert len(result.norm_examples[("ERROR", 'Conversion: X =<VALUE> The record was not found in table "Teile".')]) == 2
21
+ assert result.total_entries == 3
22
+ assert result.unknown_lines == 0
23
+ assert result.raw_counts[("WARNING", "Different issue")] == 1
24
+ normalized_key = ("ERROR", 'Conversion: X =<VALUE> The record was not found in table "Teile".')
25
+ assert result.norm_counts[normalized_key] == 2
26
+ assert len(result.norm_examples[normalized_key]) == 2
26
27
 
27
28
 
28
29
  def test_run_analysis_writes_outputs_and_report(tmp_path: Path) -> None:
@@ -0,0 +1,63 @@
1
+ import pytest
2
+
3
+ from csvchunking.chunker import split_csv
4
+
5
+
6
+ def make_csv(tmp_path, name, header, rows, encoding="utf-8-sig", delimiter=";"):
7
+ file = tmp_path / name
8
+ with open(file, "w", encoding=encoding, newline="") as f:
9
+ f.write(delimiter.join(header) + "\n")
10
+ for row in rows:
11
+ f.write(delimiter.join(row) + "\n")
12
+ return file
13
+
14
+
15
+ def test_regular_split(tmp_path):
16
+ header = ["col1", "col2"]
17
+ rows = [["A", "1"], ["B", "2"], ["C", "3"], ["D", "4"], ["E", "5"]]
18
+ file = make_csv(tmp_path, "sample.csv", header, rows)
19
+ result = split_csv(file, chunk_size=2)
20
+ assert result.files_created == 3
21
+ for out in result.output_files:
22
+ with open(out, encoding="utf-8-sig") as f:
23
+ lines = f.read().splitlines()
24
+ assert lines[0] == "col1;col2"
25
+ assert (result.output_dir / "sample_01.csv").exists()
26
+ assert (result.output_dir / "sample_02.csv").exists()
27
+ assert (result.output_dir / "sample_03.csv").exists()
28
+
29
+
30
+ def test_header_in_each_file(tmp_path):
31
+ header = ["foo", "bar"]
32
+ rows = [["x", "1"], ["y", "2"], ["z", "3"]]
33
+ file = make_csv(tmp_path, "test.csv", header, rows)
34
+ result = split_csv(file, chunk_size=1)
35
+ for out in result.output_files:
36
+ with open(out, encoding="utf-8-sig") as f:
37
+ assert f.readline().strip() == "foo;bar"
38
+
39
+
40
+ def test_filename_with_spaces(tmp_path):
41
+ header = ["a", "b"]
42
+ rows = [["1", "2"]]
43
+ file = make_csv(tmp_path, "Part-Storage Areas Relationships.csv", header, rows)
44
+ result = split_csv(file, chunk_size=1)
45
+ assert result.output_dir.name == "Part-Storage Areas Relationships"
46
+ assert (result.output_dir / "Part-Storage Areas Relationships_01.csv").exists()
47
+
48
+
49
+ def test_invalid_chunk_size(tmp_path):
50
+ header = ["a", "b"]
51
+ rows = [["1", "2"]]
52
+ file = make_csv(tmp_path, "fail.csv", header, rows)
53
+ with pytest.raises(ValueError):
54
+ split_csv(file, chunk_size=0)
55
+ with pytest.raises(ValueError):
56
+ split_csv(file, chunk_size=-1)
57
+
58
+
59
+ def test_empty_file(tmp_path):
60
+ file = tmp_path / "empty.csv"
61
+ file.write_text("")
62
+ with pytest.raises(ValueError):
63
+ split_csv(file, chunk_size=1)
@@ -16,7 +16,9 @@ def test_build_and_parse_markdown_report_roundtrip(tmp_path: Path) -> None:
16
16
  unknown_lines=0,
17
17
  raw_counts=Counter({("ERROR", 'Conversion: X =3100110. 138 The record was not found in table "Teile".'): 2}),
18
18
  norm_counts=Counter({normalized_key: 2}),
19
- norm_examples={normalized_key: Counter({'Conversion: X =3100110. 138 The record was not found in table "Teile".': 2})},
19
+ norm_examples={
20
+ normalized_key: Counter({'Conversion: X =3100110. 138 The record was not found in table "Teile".': 2})
21
+ },
20
22
  backup_path=tmp_path / "backup" / "demo.txt.bak",
21
23
  )
22
24
  summary = AnalysisSummary(
File without changes
File without changes