csv-inspector-tool 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- csv_inspector_tool-0.1.0/LICENSE +9 -0
- csv_inspector_tool-0.1.0/PKG-INFO +35 -0
- csv_inspector_tool-0.1.0/README.md +20 -0
- csv_inspector_tool-0.1.0/csv_inspector/__init__.py +5 -0
- csv_inspector_tool-0.1.0/csv_inspector/cli.py +42 -0
- csv_inspector_tool-0.1.0/csv_inspector/reader.py +131 -0
- csv_inspector_tool-0.1.0/csv_inspector_tool.egg-info/PKG-INFO +35 -0
- csv_inspector_tool-0.1.0/csv_inspector_tool.egg-info/SOURCES.txt +12 -0
- csv_inspector_tool-0.1.0/csv_inspector_tool.egg-info/dependency_links.txt +1 -0
- csv_inspector_tool-0.1.0/csv_inspector_tool.egg-info/entry_points.txt +2 -0
- csv_inspector_tool-0.1.0/csv_inspector_tool.egg-info/top_level.txt +1 -0
- csv_inspector_tool-0.1.0/pyproject.toml +27 -0
- csv_inspector_tool-0.1.0/setup.cfg +4 -0
- csv_inspector_tool-0.1.0/tests/test_reader.py +105 -0
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Sanchita Karki
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
6
|
+
|
|
7
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
8
|
+
|
|
9
|
+
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: csv-inspector-tool
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Read CSV files and report title, headers, and per-column row counts.
|
|
5
|
+
Author-email: Sanchita Karki <karkisanchu06@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/Sann842/csv-inspector.git
|
|
8
|
+
Keywords: csv,data,inspector
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.8
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Dynamic: license-file
|
|
15
|
+
|
|
16
|
+
# csv-inspector
|
|
17
|
+
|
|
18
|
+
A Python package that reads a CSV file and reports:
|
|
19
|
+
-- Table Title - derived from the file name
|
|
20
|
+
-- Headers - every column name found in the first row
|
|
21
|
+
-- Row Counts per Column - total rows, non-empty rows, empty rows, and fill-rate %
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# installation
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pip install .
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# running test
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
python -m pytest tests/
|
|
35
|
+
```
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# csv-inspector
|
|
2
|
+
|
|
3
|
+
A Python package that reads a CSV file and reports:
|
|
4
|
+
-- Table Title - derived from the file name
|
|
5
|
+
-- Headers - every column name found in the first row
|
|
6
|
+
-- Row Counts per Column - total rows, non-empty rows, empty rows, and fill-rate %
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
# installation
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
pip install .
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# running test
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
python -m pytest tests/
|
|
20
|
+
```
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Command-line interface: python -m csv_inspector <file> [file …]
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
from .reader import inspect
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def main() -> None:
|
|
12
|
+
parser = argparse.ArgumentParser(
|
|
13
|
+
prog="csv-inspector",
|
|
14
|
+
description="Inspect CSV files: show title, headers, and row counts per column.",
|
|
15
|
+
)
|
|
16
|
+
parser.add_argument("files", nargs="+", metavar="FILE", help="One or more CSV files to inspect.")
|
|
17
|
+
parser.add_argument("--encoding", default="utf-8", help="File encoding (default: utf-8).")
|
|
18
|
+
parser.add_argument(
|
|
19
|
+
"--delimiter",
|
|
20
|
+
default=None,
|
|
21
|
+
help="Column delimiter. Omit to auto-detect.",
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
args = parser.parse_args()
|
|
25
|
+
|
|
26
|
+
exit_code = 0
|
|
27
|
+
for path in args.files:
|
|
28
|
+
try:
|
|
29
|
+
report = inspect(path, encoding=args.encoding, delimiter=args.delimiter)
|
|
30
|
+
print(report)
|
|
31
|
+
except FileNotFoundError:
|
|
32
|
+
print(f"ERROR: file not found — {path}", file=sys.stderr)
|
|
33
|
+
exit_code = 1
|
|
34
|
+
except Exception as exc: # noqa: BLE001
|
|
35
|
+
print(f"ERROR: {path} — {exc}", file=sys.stderr)
|
|
36
|
+
exit_code = 1
|
|
37
|
+
|
|
38
|
+
sys.exit(exit_code)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
if __name__ == "__main__":
|
|
42
|
+
main()
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Core logic for reading and analysing CSV files.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import csv
|
|
6
|
+
import os
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from typing import List, Optional
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class ColumnStats:
|
|
13
|
+
"""Statistics for a single column (header)."""
|
|
14
|
+
name: str
|
|
15
|
+
non_empty_count: int # rows where the cell is not blank
|
|
16
|
+
empty_count: int # rows where the cell is blank / missing
|
|
17
|
+
total_count: int # total data rows (excludes the header row itself)
|
|
18
|
+
|
|
19
|
+
@property
|
|
20
|
+
def fill_rate(self) -> float:
|
|
21
|
+
"""Percentage of rows that have a value for this column."""
|
|
22
|
+
if self.total_count == 0:
|
|
23
|
+
return 0.0
|
|
24
|
+
return round(self.non_empty_count / self.total_count * 100, 2)
|
|
25
|
+
|
|
26
|
+
def __str__(self) -> str:
|
|
27
|
+
return (
|
|
28
|
+
f" {self.name!r:<30} "
|
|
29
|
+
f"total={self.total_count} "
|
|
30
|
+
f"non-empty={self.non_empty_count} "
|
|
31
|
+
f"empty={self.empty_count} "
|
|
32
|
+
f"fill={self.fill_rate}%"
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class TableReport:
|
|
38
|
+
"""Full report for one CSV file."""
|
|
39
|
+
file_path: str
|
|
40
|
+
title: str # derived from the file name (no extension)
|
|
41
|
+
headers: List[str]
|
|
42
|
+
total_rows: int # data rows (excludes header)
|
|
43
|
+
column_stats: List[ColumnStats] = field(default_factory=list)
|
|
44
|
+
|
|
45
|
+
def __str__(self) -> str:
|
|
46
|
+
sep = "=" * 60
|
|
47
|
+
lines = [
|
|
48
|
+
sep,
|
|
49
|
+
f"Table : {self.title}",
|
|
50
|
+
f"File : {self.file_path}",
|
|
51
|
+
f"Rows : {self.total_rows}",
|
|
52
|
+
f"Columns: {len(self.headers)}",
|
|
53
|
+
"",
|
|
54
|
+
"Headers & row counts:",
|
|
55
|
+
]
|
|
56
|
+
for stat in self.column_stats:
|
|
57
|
+
lines.append(str(stat))
|
|
58
|
+
lines.append(sep)
|
|
59
|
+
return "\n".join(lines)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def inspect(
|
|
63
|
+
file_path: str,
|
|
64
|
+
encoding: str = "utf-8",
|
|
65
|
+
delimiter: Optional[str] = None,
|
|
66
|
+
) -> TableReport:
|
|
67
|
+
"""
|
|
68
|
+
Read a CSV file and return a :class:`TableReport`.
|
|
69
|
+
|
|
70
|
+
Parameters
|
|
71
|
+
----------
|
|
72
|
+
file_path : str
|
|
73
|
+
Path to the CSV file.
|
|
74
|
+
encoding : str
|
|
75
|
+
File encoding (default ``"utf-8"``).
|
|
76
|
+
delimiter : str or None
|
|
77
|
+
Column delimiter. ``None`` means auto-detect via :pyclass:`csv.Sniffer`.
|
|
78
|
+
|
|
79
|
+
Returns
|
|
80
|
+
-------
|
|
81
|
+
TableReport
|
|
82
|
+
"""
|
|
83
|
+
file_path = os.path.abspath(file_path)
|
|
84
|
+
title = os.path.splitext(os.path.basename(file_path))[0]
|
|
85
|
+
|
|
86
|
+
with open(file_path, newline="", encoding=encoding) as fh:
|
|
87
|
+
raw = fh.read()
|
|
88
|
+
|
|
89
|
+
# Auto-detect delimiter when not supplied
|
|
90
|
+
if delimiter is None:
|
|
91
|
+
sample = raw[:4096]
|
|
92
|
+
try:
|
|
93
|
+
dialect = csv.Sniffer().sniff(sample, delimiters=",;\t|")
|
|
94
|
+
delimiter = dialect.delimiter
|
|
95
|
+
except csv.Error:
|
|
96
|
+
delimiter = "," # fall back to comma
|
|
97
|
+
|
|
98
|
+
reader = csv.DictReader(raw.splitlines(), delimiter=delimiter)
|
|
99
|
+
headers = reader.fieldnames or []
|
|
100
|
+
|
|
101
|
+
# Initialise counters
|
|
102
|
+
non_empty: dict = {h: 0 for h in headers}
|
|
103
|
+
empty: dict = {h: 0 for h in headers}
|
|
104
|
+
total_rows = 0
|
|
105
|
+
|
|
106
|
+
for row in reader:
|
|
107
|
+
total_rows += 1
|
|
108
|
+
for h in headers:
|
|
109
|
+
val = (row.get(h) or "").strip()
|
|
110
|
+
if val:
|
|
111
|
+
non_empty[h] += 1
|
|
112
|
+
else:
|
|
113
|
+
empty[h] += 1
|
|
114
|
+
|
|
115
|
+
column_stats = [
|
|
116
|
+
ColumnStats(
|
|
117
|
+
name=h,
|
|
118
|
+
non_empty_count=non_empty[h],
|
|
119
|
+
empty_count=empty[h],
|
|
120
|
+
total_count=total_rows,
|
|
121
|
+
)
|
|
122
|
+
for h in headers
|
|
123
|
+
]
|
|
124
|
+
|
|
125
|
+
return TableReport(
|
|
126
|
+
file_path=file_path,
|
|
127
|
+
title=title,
|
|
128
|
+
headers=list(headers),
|
|
129
|
+
total_rows=total_rows,
|
|
130
|
+
column_stats=column_stats,
|
|
131
|
+
)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: csv-inspector-tool
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Read CSV files and report title, headers, and per-column row counts.
|
|
5
|
+
Author-email: Sanchita Karki <karkisanchu06@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/Sann842/csv-inspector.git
|
|
8
|
+
Keywords: csv,data,inspector
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.8
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Dynamic: license-file
|
|
15
|
+
|
|
16
|
+
# csv-inspector
|
|
17
|
+
|
|
18
|
+
A Python package that reads a CSV file and reports:
|
|
19
|
+
-- Table Title - derived from the file name
|
|
20
|
+
-- Headers - every column name found in the first row
|
|
21
|
+
-- Row Counts per Column - total rows, non-empty rows, empty rows, and fill-rate %
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# installation
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pip install .
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# running test
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
python -m pytest tests/
|
|
35
|
+
```
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
csv_inspector/__init__.py
|
|
5
|
+
csv_inspector/cli.py
|
|
6
|
+
csv_inspector/reader.py
|
|
7
|
+
csv_inspector_tool.egg-info/PKG-INFO
|
|
8
|
+
csv_inspector_tool.egg-info/SOURCES.txt
|
|
9
|
+
csv_inspector_tool.egg-info/dependency_links.txt
|
|
10
|
+
csv_inspector_tool.egg-info/entry_points.txt
|
|
11
|
+
csv_inspector_tool.egg-info/top_level.txt
|
|
12
|
+
tests/test_reader.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
csv_inspector
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "csv-inspector-tool"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Read CSV files and report title, headers, and per-column row counts."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
license-files = ["LICENSE"]
|
|
12
|
+
requires-python = ">=3.8"
|
|
13
|
+
authors = [
|
|
14
|
+
{ name = "Sanchita Karki", email = "karkisanchu06@gmail.com" }
|
|
15
|
+
]
|
|
16
|
+
keywords = ["csv", "data", "inspector"]
|
|
17
|
+
classifiers = [
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Operating System :: OS Independent",
|
|
20
|
+
]
|
|
21
|
+
dependencies = []
|
|
22
|
+
|
|
23
|
+
[project.scripts]
|
|
24
|
+
csv-inspector = "csv_inspector.cli:main"
|
|
25
|
+
|
|
26
|
+
[project.urls]
|
|
27
|
+
Homepage = "https://github.com/Sann842/csv-inspector.git"
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import textwrap
|
|
3
|
+
import tempfile
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from csv_inspector import inspect, TableReport, ColumnStats
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def write_csv(content: str) -> str:
|
|
10
|
+
"""Write content to a temp CSV file and return the path."""
|
|
11
|
+
f = tempfile.NamedTemporaryFile(
|
|
12
|
+
mode="w", suffix=".csv", delete=False, encoding="utf-8"
|
|
13
|
+
)
|
|
14
|
+
f.write(textwrap.dedent(content))
|
|
15
|
+
f.close()
|
|
16
|
+
return f.name
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
# Basic smoke test
|
|
20
|
+
def test_basic():
|
|
21
|
+
path = write_csv("""\
|
|
22
|
+
id,name,score
|
|
23
|
+
1,Alice,95
|
|
24
|
+
2,Bob,88
|
|
25
|
+
3,Carol,
|
|
26
|
+
""")
|
|
27
|
+
try:
|
|
28
|
+
report = inspect(path)
|
|
29
|
+
assert isinstance(report, TableReport)
|
|
30
|
+
assert report.total_rows == 3
|
|
31
|
+
assert report.headers == ["id", "name", "score"]
|
|
32
|
+
assert report.title == os.path.splitext(os.path.basename(path))[0]
|
|
33
|
+
|
|
34
|
+
by_name = {s.name: s for s in report.column_stats}
|
|
35
|
+
assert by_name["id"].non_empty_count == 3
|
|
36
|
+
assert by_name["score"].empty_count == 1
|
|
37
|
+
assert by_name["score"].fill_rate == pytest.approx(66.67, abs=0.01)
|
|
38
|
+
finally:
|
|
39
|
+
os.unlink(path)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# Single-column CSV
|
|
43
|
+
def test_single_column():
|
|
44
|
+
path = write_csv("fruit\napple\nbanana\n\n")
|
|
45
|
+
try:
|
|
46
|
+
report = inspect(path)
|
|
47
|
+
assert report.headers == ["fruit"]
|
|
48
|
+
assert report.total_rows == 2
|
|
49
|
+
assert report.column_stats[0].empty_count == 0
|
|
50
|
+
finally:
|
|
51
|
+
os.unlink(path)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# Auto-detect semicolon delimiter
|
|
55
|
+
def test_semicolon_delimiter():
|
|
56
|
+
path = write_csv("a;b;c\n1;2;3\n4;5;6\n")
|
|
57
|
+
try:
|
|
58
|
+
report = inspect(path)
|
|
59
|
+
assert report.headers == ["a", "b", "c"]
|
|
60
|
+
assert report.total_rows == 2
|
|
61
|
+
finally:
|
|
62
|
+
os.unlink(path)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
# Explicit delimiter
|
|
66
|
+
def test_explicit_tab_delimiter():
|
|
67
|
+
path = write_csv("x\ty\n10\t20\n30\t40\n")
|
|
68
|
+
try:
|
|
69
|
+
report = inspect(path, delimiter="\t")
|
|
70
|
+
assert report.headers == ["x", "y"]
|
|
71
|
+
assert report.total_rows == 2
|
|
72
|
+
finally:
|
|
73
|
+
os.unlink(path)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# File not found
|
|
77
|
+
def test_missing_file():
|
|
78
|
+
with pytest.raises(FileNotFoundError):
|
|
79
|
+
inspect("/nonexistent/path/file.csv")
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
# Empty file (header only)
|
|
83
|
+
def test_header_only():
|
|
84
|
+
path = write_csv("col1,col2,col3\n")
|
|
85
|
+
try:
|
|
86
|
+
report = inspect(path)
|
|
87
|
+
assert report.total_rows == 0
|
|
88
|
+
for stat in report.column_stats:
|
|
89
|
+
assert stat.non_empty_count == 0
|
|
90
|
+
assert stat.fill_rate == 0.0
|
|
91
|
+
finally:
|
|
92
|
+
os.unlink(path)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
# __str__ sanity check
|
|
96
|
+
def test_str_output():
|
|
97
|
+
path = write_csv("name,age\nAlice,30\nBob,\n")
|
|
98
|
+
try:
|
|
99
|
+
report = inspect(path)
|
|
100
|
+
text = str(report)
|
|
101
|
+
assert "name" in text
|
|
102
|
+
assert "age" in text
|
|
103
|
+
assert str(report.total_rows) in text
|
|
104
|
+
finally:
|
|
105
|
+
os.unlink(path)
|