nm-tool-forge 0.2.4__tar.gz → 0.2.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/PKG-INFO +1 -1
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/pyproject.toml +1 -1
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/src/csvchunking/__init__.py +1 -1
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/src/csvchunking/chunker.py +44 -27
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/src/loganalysis/__init__.py +1 -1
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/src/nm_tool_forge.egg-info/PKG-INFO +1 -1
- nm_tool_forge-0.2.5/tests/test_csvchunking.py +153 -0
- nm_tool_forge-0.2.4/tests/test_csvchunking.py +0 -63
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/LICENSE +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/README.md +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/setup.cfg +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/src/csvchunking/__main__.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/src/csvchunking/cli.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/src/loganalysis/__main__.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/src/loganalysis/analysis.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/src/loganalysis/cli.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/src/loganalysis/constants.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/src/loganalysis/converters.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/src/loganalysis/csv_export.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/src/loganalysis/encoding.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/src/loganalysis/filesystem.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/src/loganalysis/models.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/src/loganalysis/normalization.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/src/loganalysis/parsing.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/src/loganalysis/report_html.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/src/loganalysis/report_markdown.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/src/loganalysis/report_models.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/src/loganalysis/report_pdf.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/src/loganalysis/selftest.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/src/nm_tool_forge.egg-info/SOURCES.txt +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/src/nm_tool_forge.egg-info/dependency_links.txt +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/src/nm_tool_forge.egg-info/entry_points.txt +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/src/nm_tool_forge.egg-info/requires.txt +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/src/nm_tool_forge.egg-info/top_level.txt +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/tests/test_analysis.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/tests/test_normalization.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/tests/test_parsing.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/tests/test_report_html.py +0 -0
- {nm_tool_forge-0.2.4 → nm_tool_forge-0.2.5}/tests/test_report_markdown.py +0 -0
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "nm-tool-forge"
|
|
7
|
-
version = "0.2.
|
|
7
|
+
version = "0.2.5"
|
|
8
8
|
description = "Analyze MigMan log files and generate aggregated CSV, Markdown, HTML, and optional PDF reports."
|
|
9
9
|
readme = { file = "README.md", content-type = "text/markdown" }
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -1,33 +1,50 @@
|
|
|
1
|
-
import csv
|
|
2
|
-
|
|
3
|
-
from
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
1
|
+
import csv
|
|
2
|
+
import re
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass(frozen=True)
|
|
8
|
+
class ChunkResult:
|
|
8
9
|
input_file: Path
|
|
9
10
|
output_dir: Path
|
|
10
|
-
chunk_size: int
|
|
11
|
-
data_rows_processed: int
|
|
12
|
-
files_created: int
|
|
13
|
-
output_files: tuple[Path, ...]
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
11
|
+
chunk_size: int
|
|
12
|
+
data_rows_processed: int
|
|
13
|
+
files_created: int
|
|
14
|
+
output_files: tuple[Path, ...]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def cleanup_existing_chunks(output_dir: Path, input_file: Path) -> None:
|
|
18
|
+
output_dir = Path(output_dir)
|
|
19
|
+
if not output_dir.exists():
|
|
20
|
+
return
|
|
21
|
+
|
|
22
|
+
input_file = Path(input_file)
|
|
23
|
+
pattern = re.compile(
|
|
24
|
+
rf"^{re.escape(input_file.stem)}_\d{{2,}}{re.escape(input_file.suffix)}$"
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
for existing_file in output_dir.iterdir():
|
|
28
|
+
if existing_file.is_file() and pattern.fullmatch(existing_file.name):
|
|
29
|
+
existing_file.unlink()
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def split_csv(
|
|
33
|
+
input_file: Path,
|
|
34
|
+
chunk_size: int,
|
|
35
|
+
encoding: str = "utf-8-sig",
|
|
36
|
+
) -> ChunkResult:
|
|
37
|
+
if not Path(input_file).is_file():
|
|
38
|
+
raise FileNotFoundError(f"Input file not found: {input_file}")
|
|
39
|
+
if chunk_size <= 0:
|
|
40
|
+
raise ValueError("chunk_size must be greater than 0")
|
|
25
41
|
|
|
26
42
|
input_file = Path(input_file)
|
|
27
43
|
output_dir = input_file.parent / input_file.stem
|
|
28
44
|
output_dir.mkdir(exist_ok=True)
|
|
45
|
+
cleanup_existing_chunks(output_dir, input_file)
|
|
29
46
|
|
|
30
|
-
# Detect the delimiter automatically.
|
|
47
|
+
# Detect the delimiter automatically.
|
|
31
48
|
with open(input_file, encoding=encoding, newline="") as f:
|
|
32
49
|
sample = f.read(4096)
|
|
33
50
|
f.seek(0)
|
|
@@ -38,10 +55,10 @@ def split_csv(
|
|
|
38
55
|
dialect = csv.excel
|
|
39
56
|
dialect.delimiter = ";"
|
|
40
57
|
reader = csv.reader(f, dialect)
|
|
41
|
-
try:
|
|
42
|
-
header = next(reader)
|
|
43
|
-
except StopIteration as exc:
|
|
44
|
-
raise ValueError("Input file is empty.") from exc
|
|
58
|
+
try:
|
|
59
|
+
header = next(reader)
|
|
60
|
+
except StopIteration as exc:
|
|
61
|
+
raise ValueError("Input file is empty.") from exc
|
|
45
62
|
chunk = []
|
|
46
63
|
file_count = 0
|
|
47
64
|
data_rows = 0
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from csvchunking.chunker import split_csv
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def make_csv(tmp_path, name, header, rows, encoding="utf-8-sig", delimiter=";"):
|
|
7
|
+
file = tmp_path / name
|
|
8
|
+
with open(file, "w", encoding=encoding, newline="") as f:
|
|
9
|
+
f.write(delimiter.join(header) + "\n")
|
|
10
|
+
for row in rows:
|
|
11
|
+
f.write(delimiter.join(row) + "\n")
|
|
12
|
+
return file
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_regular_split(tmp_path):
|
|
16
|
+
header = ["col1", "col2"]
|
|
17
|
+
rows = [["A", "1"], ["B", "2"], ["C", "3"], ["D", "4"], ["E", "5"]]
|
|
18
|
+
file = make_csv(tmp_path, "sample.csv", header, rows)
|
|
19
|
+
result = split_csv(file, chunk_size=2)
|
|
20
|
+
assert result.files_created == 3
|
|
21
|
+
for out in result.output_files:
|
|
22
|
+
with open(out, encoding="utf-8-sig") as f:
|
|
23
|
+
lines = f.read().splitlines()
|
|
24
|
+
assert lines[0] == "col1;col2"
|
|
25
|
+
assert (result.output_dir / "sample_01.csv").exists()
|
|
26
|
+
assert (result.output_dir / "sample_02.csv").exists()
|
|
27
|
+
assert (result.output_dir / "sample_03.csv").exists()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_header_in_each_file(tmp_path):
|
|
31
|
+
header = ["foo", "bar"]
|
|
32
|
+
rows = [["x", "1"], ["y", "2"], ["z", "3"]]
|
|
33
|
+
file = make_csv(tmp_path, "test.csv", header, rows)
|
|
34
|
+
result = split_csv(file, chunk_size=1)
|
|
35
|
+
for out in result.output_files:
|
|
36
|
+
with open(out, encoding="utf-8-sig") as f:
|
|
37
|
+
assert f.readline().strip() == "foo;bar"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_filename_with_spaces(tmp_path):
|
|
41
|
+
header = ["a", "b"]
|
|
42
|
+
rows = [["1", "2"]]
|
|
43
|
+
file = make_csv(tmp_path, "Part-Storage Areas Relationships.csv", header, rows)
|
|
44
|
+
result = split_csv(file, chunk_size=1)
|
|
45
|
+
assert result.output_dir.name == "Part-Storage Areas Relationships"
|
|
46
|
+
assert (result.output_dir / "Part-Storage Areas Relationships_01.csv").exists()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def test_cleanup_removes_stale_matching_chunk_files(tmp_path):
|
|
50
|
+
header = ["col1", "col2"]
|
|
51
|
+
rows = [["A", "1"], ["B", "2"], ["C", "3"], ["D", "4"]]
|
|
52
|
+
file = make_csv(tmp_path, "sample.csv", header, rows)
|
|
53
|
+
output_dir = tmp_path / "sample"
|
|
54
|
+
output_dir.mkdir()
|
|
55
|
+
for name in ("sample_01.csv", "sample_02.csv", "sample_03.csv"):
|
|
56
|
+
(output_dir / name).write_text("old chunk\n", encoding="utf-8-sig")
|
|
57
|
+
|
|
58
|
+
result = split_csv(file, chunk_size=2)
|
|
59
|
+
|
|
60
|
+
assert result.files_created == 2
|
|
61
|
+
assert (result.output_dir / "sample_01.csv").exists()
|
|
62
|
+
assert (result.output_dir / "sample_02.csv").exists()
|
|
63
|
+
assert not (result.output_dir / "sample_03.csv").exists()
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def test_cleanup_keeps_non_matching_csv_files_and_subdirectories(tmp_path):
|
|
67
|
+
header = ["col1", "col2"]
|
|
68
|
+
rows = [["A", "1"], ["B", "2"]]
|
|
69
|
+
file = make_csv(tmp_path, "sample.csv", header, rows)
|
|
70
|
+
output_dir = tmp_path / "sample"
|
|
71
|
+
output_dir.mkdir()
|
|
72
|
+
preserved_files = [
|
|
73
|
+
"notes.csv",
|
|
74
|
+
"sample_backup.csv",
|
|
75
|
+
"sample_old.csv",
|
|
76
|
+
"other_01.csv",
|
|
77
|
+
"sample_1.csv",
|
|
78
|
+
]
|
|
79
|
+
for name in preserved_files:
|
|
80
|
+
(output_dir / name).write_text("keep\n", encoding="utf-8-sig")
|
|
81
|
+
matching_subdir = output_dir / "sample_99.csv"
|
|
82
|
+
matching_subdir.mkdir()
|
|
83
|
+
(matching_subdir / "nested.txt").write_text("keep nested\n", encoding="utf-8-sig")
|
|
84
|
+
|
|
85
|
+
result = split_csv(file, chunk_size=1)
|
|
86
|
+
|
|
87
|
+
for name in preserved_files:
|
|
88
|
+
assert (result.output_dir / name).exists()
|
|
89
|
+
assert matching_subdir.is_dir()
|
|
90
|
+
assert (matching_subdir / "nested.txt").exists()
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def test_cleanup_filename_with_spaces_uses_exact_chunk_pattern(tmp_path):
|
|
94
|
+
header = ["a", "b"]
|
|
95
|
+
rows = [["1", "2"], ["3", "4"], ["5", "6"], ["7", "8"]]
|
|
96
|
+
filename = "Part-Storage Areas Relationships.csv"
|
|
97
|
+
file = make_csv(tmp_path, filename, header, rows)
|
|
98
|
+
output_dir = tmp_path / "Part-Storage Areas Relationships"
|
|
99
|
+
output_dir.mkdir()
|
|
100
|
+
for name in (
|
|
101
|
+
"Part-Storage Areas Relationships_01.csv",
|
|
102
|
+
"Part-Storage Areas Relationships_02.csv",
|
|
103
|
+
"Part-Storage Areas Relationships_99.csv",
|
|
104
|
+
):
|
|
105
|
+
(output_dir / name).write_text("old chunk\n", encoding="utf-8-sig")
|
|
106
|
+
backup_file = output_dir / "Part-Storage Areas Relationships_backup.csv"
|
|
107
|
+
backup_file.write_text("keep\n", encoding="utf-8-sig")
|
|
108
|
+
|
|
109
|
+
result = split_csv(file, chunk_size=2)
|
|
110
|
+
|
|
111
|
+
assert result.output_dir == output_dir
|
|
112
|
+
assert (result.output_dir / "Part-Storage Areas Relationships_01.csv").exists()
|
|
113
|
+
assert (result.output_dir / "Part-Storage Areas Relationships_02.csv").exists()
|
|
114
|
+
assert not (result.output_dir / "Part-Storage Areas Relationships_99.csv").exists()
|
|
115
|
+
assert backup_file.exists()
|
|
116
|
+
assert "old chunk" not in (
|
|
117
|
+
result.output_dir / "Part-Storage Areas Relationships_01.csv"
|
|
118
|
+
).read_text(encoding="utf-8-sig")
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def test_cleanup_repeated_run_removes_extra_chunks(tmp_path):
|
|
122
|
+
header = ["col1", "col2"]
|
|
123
|
+
first_rows = [["A", "1"], ["B", "2"], ["C", "3"], ["D", "4"], ["E", "5"]]
|
|
124
|
+
file = make_csv(tmp_path, "sample.csv", header, first_rows)
|
|
125
|
+
first_result = split_csv(file, chunk_size=2)
|
|
126
|
+
assert first_result.files_created == 3
|
|
127
|
+
assert (first_result.output_dir / "sample_03.csv").exists()
|
|
128
|
+
|
|
129
|
+
second_rows = [["A", "1"], ["B", "2"]]
|
|
130
|
+
make_csv(tmp_path, "sample.csv", header, second_rows)
|
|
131
|
+
second_result = split_csv(file, chunk_size=2)
|
|
132
|
+
|
|
133
|
+
assert second_result.files_created == 1
|
|
134
|
+
assert (second_result.output_dir / "sample_01.csv").exists()
|
|
135
|
+
assert not (second_result.output_dir / "sample_02.csv").exists()
|
|
136
|
+
assert not (second_result.output_dir / "sample_03.csv").exists()
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def test_invalid_chunk_size(tmp_path):
|
|
140
|
+
header = ["a", "b"]
|
|
141
|
+
rows = [["1", "2"]]
|
|
142
|
+
file = make_csv(tmp_path, "fail.csv", header, rows)
|
|
143
|
+
with pytest.raises(ValueError):
|
|
144
|
+
split_csv(file, chunk_size=0)
|
|
145
|
+
with pytest.raises(ValueError):
|
|
146
|
+
split_csv(file, chunk_size=-1)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def test_empty_file(tmp_path):
|
|
150
|
+
file = tmp_path / "empty.csv"
|
|
151
|
+
file.write_text("")
|
|
152
|
+
with pytest.raises(ValueError):
|
|
153
|
+
split_csv(file, chunk_size=1)
|
|
@@ -1,63 +0,0 @@
|
|
|
1
|
-
import pytest
|
|
2
|
-
|
|
3
|
-
from csvchunking.chunker import split_csv
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def make_csv(tmp_path, name, header, rows, encoding="utf-8-sig", delimiter=";"):
|
|
7
|
-
file = tmp_path / name
|
|
8
|
-
with open(file, "w", encoding=encoding, newline="") as f:
|
|
9
|
-
f.write(delimiter.join(header) + "\n")
|
|
10
|
-
for row in rows:
|
|
11
|
-
f.write(delimiter.join(row) + "\n")
|
|
12
|
-
return file
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def test_regular_split(tmp_path):
|
|
16
|
-
header = ["col1", "col2"]
|
|
17
|
-
rows = [["A", "1"], ["B", "2"], ["C", "3"], ["D", "4"], ["E", "5"]]
|
|
18
|
-
file = make_csv(tmp_path, "sample.csv", header, rows)
|
|
19
|
-
result = split_csv(file, chunk_size=2)
|
|
20
|
-
assert result.files_created == 3
|
|
21
|
-
for out in result.output_files:
|
|
22
|
-
with open(out, encoding="utf-8-sig") as f:
|
|
23
|
-
lines = f.read().splitlines()
|
|
24
|
-
assert lines[0] == "col1;col2"
|
|
25
|
-
assert (result.output_dir / "sample_01.csv").exists()
|
|
26
|
-
assert (result.output_dir / "sample_02.csv").exists()
|
|
27
|
-
assert (result.output_dir / "sample_03.csv").exists()
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def test_header_in_each_file(tmp_path):
|
|
31
|
-
header = ["foo", "bar"]
|
|
32
|
-
rows = [["x", "1"], ["y", "2"], ["z", "3"]]
|
|
33
|
-
file = make_csv(tmp_path, "test.csv", header, rows)
|
|
34
|
-
result = split_csv(file, chunk_size=1)
|
|
35
|
-
for out in result.output_files:
|
|
36
|
-
with open(out, encoding="utf-8-sig") as f:
|
|
37
|
-
assert f.readline().strip() == "foo;bar"
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def test_filename_with_spaces(tmp_path):
|
|
41
|
-
header = ["a", "b"]
|
|
42
|
-
rows = [["1", "2"]]
|
|
43
|
-
file = make_csv(tmp_path, "Part-Storage Areas Relationships.csv", header, rows)
|
|
44
|
-
result = split_csv(file, chunk_size=1)
|
|
45
|
-
assert result.output_dir.name == "Part-Storage Areas Relationships"
|
|
46
|
-
assert (result.output_dir / "Part-Storage Areas Relationships_01.csv").exists()
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def test_invalid_chunk_size(tmp_path):
|
|
50
|
-
header = ["a", "b"]
|
|
51
|
-
rows = [["1", "2"]]
|
|
52
|
-
file = make_csv(tmp_path, "fail.csv", header, rows)
|
|
53
|
-
with pytest.raises(ValueError):
|
|
54
|
-
split_csv(file, chunk_size=0)
|
|
55
|
-
with pytest.raises(ValueError):
|
|
56
|
-
split_csv(file, chunk_size=-1)
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
def test_empty_file(tmp_path):
|
|
60
|
-
file = tmp_path / "empty.csv"
|
|
61
|
-
file.write_text("")
|
|
62
|
-
with pytest.raises(ValueError):
|
|
63
|
-
split_csv(file, chunk_size=1)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|