csv-inspector-tool 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Sanchita Karki
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6
+
7
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
8
+
9
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,35 @@
1
+ Metadata-Version: 2.4
2
+ Name: csv-inspector-tool
3
+ Version: 0.1.0
4
+ Summary: Read CSV files and report title, headers, and per-column row counts.
5
+ Author-email: Sanchita Karki <karkisanchu06@gmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/Sann842/csv-inspector.git
8
+ Keywords: csv,data,inspector
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.8
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Dynamic: license-file
15
+
16
+ # csv-inspector
17
+
18
+ A Python package that reads a CSV file and reports:
19
+ -- Table Title - derived from the file name
20
+ -- Headers - every column name found in the first row
21
+ -- Row Counts per Column - total rows, non-empty rows, empty rows, and fill-rate %
22
+
23
+
24
+ # installation
25
+
26
+ ```bash
27
+ pip install .
28
+ ```
29
+
30
+
31
+ # running test
32
+
33
+ ```bash
34
+ python -m pytest tests/
35
+ ```
@@ -0,0 +1,20 @@
1
+ # csv-inspector
2
+
3
+ A Python package that reads a CSV file and reports:
4
+ -- Table Title - derived from the file name
5
+ -- Headers - every column name found in the first row
6
+ -- Row Counts per Column - total rows, non-empty rows, empty rows, and fill-rate %
7
+
8
+
9
+ # installation
10
+
11
+ ```bash
12
+ pip install .
13
+ ```
14
+
15
+
16
+ # running test
17
+
18
+ ```bash
19
+ python -m pytest tests/
20
+ ```
@@ -0,0 +1,5 @@
1
+ """csv_inspector — inspect CSV files from Python or the command line."""
2
+
3
+ from .reader import ColumnStats, TableReport, inspect
4
+
5
+ __all__ = ["inspect", "TableReport", "ColumnStats"]
@@ -0,0 +1,42 @@
1
+ """
2
+ Command-line interface: python -m csv_inspector <file> [file …]
3
+ """
4
+
5
+ import argparse
6
+ import sys
7
+
8
+ from .reader import inspect
9
+
10
+
11
+ def main() -> None:
12
+ parser = argparse.ArgumentParser(
13
+ prog="csv-inspector",
14
+ description="Inspect CSV files: show title, headers, and row counts per column.",
15
+ )
16
+ parser.add_argument("files", nargs="+", metavar="FILE", help="One or more CSV files to inspect.")
17
+ parser.add_argument("--encoding", default="utf-8", help="File encoding (default: utf-8).")
18
+ parser.add_argument(
19
+ "--delimiter",
20
+ default=None,
21
+ help="Column delimiter. Omit to auto-detect.",
22
+ )
23
+
24
+ args = parser.parse_args()
25
+
26
+ exit_code = 0
27
+ for path in args.files:
28
+ try:
29
+ report = inspect(path, encoding=args.encoding, delimiter=args.delimiter)
30
+ print(report)
31
+ except FileNotFoundError:
32
+ print(f"ERROR: file not found — {path}", file=sys.stderr)
33
+ exit_code = 1
34
+ except Exception as exc: # noqa: BLE001
35
+ print(f"ERROR: {path} — {exc}", file=sys.stderr)
36
+ exit_code = 1
37
+
38
+ sys.exit(exit_code)
39
+
40
+
41
+ if __name__ == "__main__":
42
+ main()
@@ -0,0 +1,131 @@
1
+ """
2
+ Core logic for reading and analysing CSV files.
3
+ """
4
+
5
+ import csv
6
+ import os
7
+ from dataclasses import dataclass, field
8
+ from typing import List, Optional
9
+
10
+
11
+ @dataclass
12
+ class ColumnStats:
13
+ """Statistics for a single column (header)."""
14
+ name: str
15
+ non_empty_count: int # rows where the cell is not blank
16
+ empty_count: int # rows where the cell is blank / missing
17
+ total_count: int # total data rows (excludes the header row itself)
18
+
19
+ @property
20
+ def fill_rate(self) -> float:
21
+ """Percentage of rows that have a value for this column."""
22
+ if self.total_count == 0:
23
+ return 0.0
24
+ return round(self.non_empty_count / self.total_count * 100, 2)
25
+
26
+ def __str__(self) -> str:
27
+ return (
28
+ f" {self.name!r:<30} "
29
+ f"total={self.total_count} "
30
+ f"non-empty={self.non_empty_count} "
31
+ f"empty={self.empty_count} "
32
+ f"fill={self.fill_rate}%"
33
+ )
34
+
35
+
36
+ @dataclass
37
+ class TableReport:
38
+ """Full report for one CSV file."""
39
+ file_path: str
40
+ title: str # derived from the file name (no extension)
41
+ headers: List[str]
42
+ total_rows: int # data rows (excludes header)
43
+ column_stats: List[ColumnStats] = field(default_factory=list)
44
+
45
+ def __str__(self) -> str:
46
+ sep = "=" * 60
47
+ lines = [
48
+ sep,
49
+ f"Table : {self.title}",
50
+ f"File : {self.file_path}",
51
+ f"Rows : {self.total_rows}",
52
+ f"Columns: {len(self.headers)}",
53
+ "",
54
+ "Headers & row counts:",
55
+ ]
56
+ for stat in self.column_stats:
57
+ lines.append(str(stat))
58
+ lines.append(sep)
59
+ return "\n".join(lines)
60
+
61
+
62
+ def inspect(
63
+ file_path: str,
64
+ encoding: str = "utf-8",
65
+ delimiter: Optional[str] = None,
66
+ ) -> TableReport:
67
+ """
68
+ Read a CSV file and return a :class:`TableReport`.
69
+
70
+ Parameters
71
+ ----------
72
+ file_path : str
73
+ Path to the CSV file.
74
+ encoding : str
75
+ File encoding (default ``"utf-8"``).
76
+ delimiter : str or None
77
+ Column delimiter. ``None`` means auto-detect via :pyclass:`csv.Sniffer`.
78
+
79
+ Returns
80
+ -------
81
+ TableReport
82
+ """
83
+ file_path = os.path.abspath(file_path)
84
+ title = os.path.splitext(os.path.basename(file_path))[0]
85
+
86
+ with open(file_path, newline="", encoding=encoding) as fh:
87
+ raw = fh.read()
88
+
89
+ # Auto-detect delimiter when not supplied
90
+ if delimiter is None:
91
+ sample = raw[:4096]
92
+ try:
93
+ dialect = csv.Sniffer().sniff(sample, delimiters=",;\t|")
94
+ delimiter = dialect.delimiter
95
+ except csv.Error:
96
+ delimiter = "," # fall back to comma
97
+
98
+ reader = csv.DictReader(raw.splitlines(), delimiter=delimiter)
99
+ headers = reader.fieldnames or []
100
+
101
+ # Initialise counters
102
+ non_empty: dict = {h: 0 for h in headers}
103
+ empty: dict = {h: 0 for h in headers}
104
+ total_rows = 0
105
+
106
+ for row in reader:
107
+ total_rows += 1
108
+ for h in headers:
109
+ val = (row.get(h) or "").strip()
110
+ if val:
111
+ non_empty[h] += 1
112
+ else:
113
+ empty[h] += 1
114
+
115
+ column_stats = [
116
+ ColumnStats(
117
+ name=h,
118
+ non_empty_count=non_empty[h],
119
+ empty_count=empty[h],
120
+ total_count=total_rows,
121
+ )
122
+ for h in headers
123
+ ]
124
+
125
+ return TableReport(
126
+ file_path=file_path,
127
+ title=title,
128
+ headers=list(headers),
129
+ total_rows=total_rows,
130
+ column_stats=column_stats,
131
+ )
@@ -0,0 +1,35 @@
1
+ Metadata-Version: 2.4
2
+ Name: csv-inspector-tool
3
+ Version: 0.1.0
4
+ Summary: Read CSV files and report title, headers, and per-column row counts.
5
+ Author-email: Sanchita Karki <karkisanchu06@gmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/Sann842/csv-inspector.git
8
+ Keywords: csv,data,inspector
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.8
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Dynamic: license-file
15
+
16
+ # csv-inspector
17
+
18
+ A Python package that reads a CSV file and reports:
19
+ -- Table Title - derived from the file name
20
+ -- Headers - every column name found in the first row
21
+ -- Row Counts per Column - total rows, non-empty rows, empty rows, and fill-rate %
22
+
23
+
24
+ # installation
25
+
26
+ ```bash
27
+ pip install .
28
+ ```
29
+
30
+
31
+ # running test
32
+
33
+ ```bash
34
+ python -m pytest tests/
35
+ ```
@@ -0,0 +1,12 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ csv_inspector/__init__.py
5
+ csv_inspector/cli.py
6
+ csv_inspector/reader.py
7
+ csv_inspector_tool.egg-info/PKG-INFO
8
+ csv_inspector_tool.egg-info/SOURCES.txt
9
+ csv_inspector_tool.egg-info/dependency_links.txt
10
+ csv_inspector_tool.egg-info/entry_points.txt
11
+ csv_inspector_tool.egg-info/top_level.txt
12
+ tests/test_reader.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ csv-inspector = csv_inspector.cli:main
@@ -0,0 +1,27 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "csv-inspector-tool"
7
+ version = "0.1.0"
8
+ description = "Read CSV files and report title, headers, and per-column row counts."
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ license-files = ["LICENSE"]
12
+ requires-python = ">=3.8"
13
+ authors = [
14
+ { name = "Sanchita Karki", email = "karkisanchu06@gmail.com" }
15
+ ]
16
+ keywords = ["csv", "data", "inspector"]
17
+ classifiers = [
18
+ "Programming Language :: Python :: 3",
19
+ "Operating System :: OS Independent",
20
+ ]
21
+ dependencies = []
22
+
23
+ [project.scripts]
24
+ csv-inspector = "csv_inspector.cli:main"
25
+
26
+ [project.urls]
27
+ Homepage = "https://github.com/Sann842/csv-inspector.git"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,105 @@
1
+ import os
2
+ import textwrap
3
+ import tempfile
4
+ import pytest
5
+
6
+ from csv_inspector import inspect, TableReport, ColumnStats
7
+
8
+
9
+ def write_csv(content: str) -> str:
10
+ """Write content to a temp CSV file and return the path."""
11
+ f = tempfile.NamedTemporaryFile(
12
+ mode="w", suffix=".csv", delete=False, encoding="utf-8"
13
+ )
14
+ f.write(textwrap.dedent(content))
15
+ f.close()
16
+ return f.name
17
+
18
+
19
+ # Basic smoke test
20
+ def test_basic():
21
+ path = write_csv("""\
22
+ id,name,score
23
+ 1,Alice,95
24
+ 2,Bob,88
25
+ 3,Carol,
26
+ """)
27
+ try:
28
+ report = inspect(path)
29
+ assert isinstance(report, TableReport)
30
+ assert report.total_rows == 3
31
+ assert report.headers == ["id", "name", "score"]
32
+ assert report.title == os.path.splitext(os.path.basename(path))[0]
33
+
34
+ by_name = {s.name: s for s in report.column_stats}
35
+ assert by_name["id"].non_empty_count == 3
36
+ assert by_name["score"].empty_count == 1
37
+ assert by_name["score"].fill_rate == pytest.approx(66.67, abs=0.01)
38
+ finally:
39
+ os.unlink(path)
40
+
41
+
42
+ # Single-column CSV
43
+ def test_single_column():
44
+ path = write_csv("fruit\napple\nbanana\n\n")
45
+ try:
46
+ report = inspect(path)
47
+ assert report.headers == ["fruit"]
48
+ assert report.total_rows == 2
49
+ assert report.column_stats[0].empty_count == 0
50
+ finally:
51
+ os.unlink(path)
52
+
53
+
54
+ # Auto-detect semicolon delimiter
55
+ def test_semicolon_delimiter():
56
+ path = write_csv("a;b;c\n1;2;3\n4;5;6\n")
57
+ try:
58
+ report = inspect(path)
59
+ assert report.headers == ["a", "b", "c"]
60
+ assert report.total_rows == 2
61
+ finally:
62
+ os.unlink(path)
63
+
64
+
65
+ # Explicit delimiter
66
+ def test_explicit_tab_delimiter():
67
+ path = write_csv("x\ty\n10\t20\n30\t40\n")
68
+ try:
69
+ report = inspect(path, delimiter="\t")
70
+ assert report.headers == ["x", "y"]
71
+ assert report.total_rows == 2
72
+ finally:
73
+ os.unlink(path)
74
+
75
+
76
+ # File not found
77
+ def test_missing_file():
78
+ with pytest.raises(FileNotFoundError):
79
+ inspect("/nonexistent/path/file.csv")
80
+
81
+
82
+ # Empty file (header only)
83
+ def test_header_only():
84
+ path = write_csv("col1,col2,col3\n")
85
+ try:
86
+ report = inspect(path)
87
+ assert report.total_rows == 0
88
+ for stat in report.column_stats:
89
+ assert stat.non_empty_count == 0
90
+ assert stat.fill_rate == 0.0
91
+ finally:
92
+ os.unlink(path)
93
+
94
+
95
+ # __str__ sanity check
96
+ def test_str_output():
97
+ path = write_csv("name,age\nAlice,30\nBob,\n")
98
+ try:
99
+ report = inspect(path)
100
+ text = str(report)
101
+ assert "name" in text
102
+ assert "age" in text
103
+ assert str(report.total_rows) in text
104
+ finally:
105
+ os.unlink(path)