compareexcel 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- compareexcel/__init__.py +20 -0
- compareexcel/cli.py +81 -0
- compareexcel/core.py +150 -0
- compareexcel/report.py +17 -0
- compareexcel-0.2.1.dist-info/METADATA +111 -0
- compareexcel-0.2.1.dist-info/RECORD +10 -0
- compareexcel-0.2.1.dist-info/WHEEL +5 -0
- compareexcel-0.2.1.dist-info/entry_points.txt +3 -0
- compareexcel-0.2.1.dist-info/licenses/LICENSE +21 -0
- compareexcel-0.2.1.dist-info/top_level.txt +1 -0
compareexcel/__init__.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""Compare Excel workbooks for formatting and alignment differences."""
|
|
2
|
+
|
|
3
|
+
from compareexcel.core import (
|
|
4
|
+
compare_data_alignment,
|
|
5
|
+
compare_formatting,
|
|
6
|
+
compare_header_alignment,
|
|
7
|
+
get_column_formats,
|
|
8
|
+
)
|
|
9
|
+
from compareexcel.report import write_report
|
|
10
|
+
|
|
11
|
+
__version__ = "0.2.0"
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"__version__",
|
|
15
|
+
"compare_data_alignment",
|
|
16
|
+
"compare_formatting",
|
|
17
|
+
"compare_header_alignment",
|
|
18
|
+
"get_column_formats",
|
|
19
|
+
"write_report",
|
|
20
|
+
]
|
compareexcel/cli.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Command-line interface for comparing two Excel files."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
from openpyxl import load_workbook
|
|
9
|
+
|
|
10
|
+
from compareexcel.core import (
|
|
11
|
+
compare_data_alignment,
|
|
12
|
+
compare_formatting,
|
|
13
|
+
compare_header_alignment,
|
|
14
|
+
)
|
|
15
|
+
from compareexcel.report import write_report
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _ensure_utf8_stdio():
|
|
19
|
+
for stream in (sys.stdout, sys.stderr):
|
|
20
|
+
if hasattr(stream, "reconfigure"):
|
|
21
|
+
try:
|
|
22
|
+
stream.reconfigure(encoding="utf-8", errors="replace")
|
|
23
|
+
except Exception:
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def main():
|
|
28
|
+
_ensure_utf8_stdio()
|
|
29
|
+
|
|
30
|
+
parser = argparse.ArgumentParser(
|
|
31
|
+
description="Compare two Excel workbooks for formatting and alignment differences.",
|
|
32
|
+
)
|
|
33
|
+
parser.add_argument("file1")
|
|
34
|
+
parser.add_argument("file2")
|
|
35
|
+
parser.add_argument("--sheet")
|
|
36
|
+
parser.add_argument("--alignment-only", "--ao", action="store_true")
|
|
37
|
+
parser.add_argument("--output", help="Output Excel report file")
|
|
38
|
+
|
|
39
|
+
args = parser.parse_args()
|
|
40
|
+
|
|
41
|
+
wb1 = load_workbook(args.file1)
|
|
42
|
+
wb2 = load_workbook(args.file2)
|
|
43
|
+
|
|
44
|
+
sheets = [args.sheet] if args.sheet else set(wb1.sheetnames) & set(wb2.sheetnames)
|
|
45
|
+
|
|
46
|
+
all_format_diffs = []
|
|
47
|
+
all_data_align_diffs = []
|
|
48
|
+
all_header_align_diffs = []
|
|
49
|
+
|
|
50
|
+
for sheet in sheets:
|
|
51
|
+
ws1 = wb1[sheet]
|
|
52
|
+
ws2 = wb2[sheet]
|
|
53
|
+
|
|
54
|
+
fmt = compare_formatting(ws1, ws2)
|
|
55
|
+
da = compare_data_alignment(ws1, ws2)
|
|
56
|
+
ha = compare_header_alignment(ws1, ws2)
|
|
57
|
+
|
|
58
|
+
for row in fmt:
|
|
59
|
+
row["Sheet"] = sheet
|
|
60
|
+
for row in da:
|
|
61
|
+
row["Sheet"] = sheet
|
|
62
|
+
for row in ha:
|
|
63
|
+
row["Sheet"] = sheet
|
|
64
|
+
|
|
65
|
+
all_format_diffs.extend(fmt)
|
|
66
|
+
all_data_align_diffs.extend(da)
|
|
67
|
+
all_header_align_diffs.extend(ha)
|
|
68
|
+
|
|
69
|
+
print("\n=== SUMMARY ===")
|
|
70
|
+
print(f"Formatting diffs: {len(all_format_diffs)}")
|
|
71
|
+
print(f"Data alignment diffs: {len(all_data_align_diffs)}")
|
|
72
|
+
print(f"Header alignment diffs: {len(all_header_align_diffs)}")
|
|
73
|
+
|
|
74
|
+
if args.output:
|
|
75
|
+
write_report(
|
|
76
|
+
args.output,
|
|
77
|
+
all_format_diffs,
|
|
78
|
+
all_data_align_diffs,
|
|
79
|
+
all_header_align_diffs,
|
|
80
|
+
)
|
|
81
|
+
print(f"\nReport written to: {args.output}")
|
compareexcel/core.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"""Worksheet comparison: headers, number formats, and cell alignment."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def _header_to_column_map(ws):
|
|
7
|
+
header_cells = list(ws.iter_rows(min_row=1, max_row=1))[0]
|
|
8
|
+
return {
|
|
9
|
+
str(c.value).strip(): c.column
|
|
10
|
+
for c in header_cells if c.value
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _alignment_tuple(cell):
|
|
15
|
+
if cell is None or cell.alignment is None:
|
|
16
|
+
return ("general", "bottom", 0, False, False, 0)
|
|
17
|
+
|
|
18
|
+
al = cell.alignment
|
|
19
|
+
return (
|
|
20
|
+
al.horizontal or "general",
|
|
21
|
+
al.vertical or "bottom",
|
|
22
|
+
al.text_rotation or 0,
|
|
23
|
+
bool(al.wrap_text),
|
|
24
|
+
bool(al.shrink_to_fit),
|
|
25
|
+
al.indent or 0,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _sample_alignments(ws, col_idx, sample_size=5):
|
|
30
|
+
result = []
|
|
31
|
+
for r in range(2, ws.max_row + 1):
|
|
32
|
+
cell = ws.cell(r, col_idx)
|
|
33
|
+
if cell.value not in (None, ""):
|
|
34
|
+
result.append(_alignment_tuple(cell))
|
|
35
|
+
if len(result) >= sample_size:
|
|
36
|
+
break
|
|
37
|
+
return result
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def get_column_formats(ws):
|
|
41
|
+
col_map = _header_to_column_map(ws)
|
|
42
|
+
result = {}
|
|
43
|
+
|
|
44
|
+
for header, col_idx in col_map.items():
|
|
45
|
+
fmt = None
|
|
46
|
+
for r in range(2, ws.max_row + 1):
|
|
47
|
+
c = ws.cell(r, col_idx)
|
|
48
|
+
if c.value not in (None, ""):
|
|
49
|
+
fmt = c.number_format
|
|
50
|
+
break
|
|
51
|
+
result[header] = fmt or "No data"
|
|
52
|
+
|
|
53
|
+
return result
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _analyze_format(fmt):
|
|
57
|
+
if not fmt or fmt == "No data":
|
|
58
|
+
return {"type": "unknown", "raw": fmt}
|
|
59
|
+
|
|
60
|
+
f = fmt.lower()
|
|
61
|
+
result = {"raw": fmt}
|
|
62
|
+
|
|
63
|
+
if any(x in f for x in ["yy", "mm", "dd"]):
|
|
64
|
+
result["type"] = "date"
|
|
65
|
+
elif "$" in fmt or "€" in fmt or "£" in fmt:
|
|
66
|
+
result["type"] = "currency"
|
|
67
|
+
elif "0" in f or "#" in f:
|
|
68
|
+
result["type"] = "numeric"
|
|
69
|
+
else:
|
|
70
|
+
result["type"] = "text"
|
|
71
|
+
|
|
72
|
+
result["decimals"] = len(f.split(".")[-1]) if "." in f else 0
|
|
73
|
+
result["thousands"] = "," in f
|
|
74
|
+
|
|
75
|
+
result["currency"] = None
|
|
76
|
+
for sym in ["$", "€", "£"]:
|
|
77
|
+
if sym in fmt:
|
|
78
|
+
result["currency"] = sym
|
|
79
|
+
|
|
80
|
+
return result
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _diff_formats(f1, f2):
|
|
84
|
+
a1 = _analyze_format(f1)
|
|
85
|
+
a2 = _analyze_format(f2)
|
|
86
|
+
|
|
87
|
+
diffs = []
|
|
88
|
+
for k in set(a1) | set(a2):
|
|
89
|
+
if a1.get(k) != a2.get(k):
|
|
90
|
+
diffs.append(f"{k}: {a1.get(k)} → {a2.get(k)}")
|
|
91
|
+
return diffs
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def compare_formatting(ws1, ws2):
|
|
95
|
+
f1 = get_column_formats(ws1)
|
|
96
|
+
f2 = get_column_formats(ws2)
|
|
97
|
+
|
|
98
|
+
diffs = []
|
|
99
|
+
|
|
100
|
+
for col in set(f1) & set(f2):
|
|
101
|
+
if f1[col] != f2[col]:
|
|
102
|
+
diff_details = _diff_formats(f1[col], f2[col])
|
|
103
|
+
diffs.append({
|
|
104
|
+
"Column": col,
|
|
105
|
+
"File1_Format": f1[col],
|
|
106
|
+
"File2_Format": f2[col],
|
|
107
|
+
"Differences": "; ".join(diff_details)
|
|
108
|
+
})
|
|
109
|
+
|
|
110
|
+
return diffs
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def compare_data_alignment(ws1, ws2):
|
|
114
|
+
m1 = _header_to_column_map(ws1)
|
|
115
|
+
m2 = _header_to_column_map(ws2)
|
|
116
|
+
|
|
117
|
+
diffs = []
|
|
118
|
+
|
|
119
|
+
for col in set(m1) & set(m2):
|
|
120
|
+
a1 = set(_sample_alignments(ws1, m1[col]))
|
|
121
|
+
a2 = set(_sample_alignments(ws2, m2[col]))
|
|
122
|
+
|
|
123
|
+
if a1 != a2:
|
|
124
|
+
diffs.append({
|
|
125
|
+
"Column": col,
|
|
126
|
+
"File1_Alignment": str(list(a1)),
|
|
127
|
+
"File2_Alignment": str(list(a2)),
|
|
128
|
+
})
|
|
129
|
+
|
|
130
|
+
return diffs
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def compare_header_alignment(ws1, ws2):
|
|
134
|
+
m1 = _header_to_column_map(ws1)
|
|
135
|
+
m2 = _header_to_column_map(ws2)
|
|
136
|
+
|
|
137
|
+
diffs = []
|
|
138
|
+
|
|
139
|
+
for col in set(m1) & set(m2):
|
|
140
|
+
a1 = _alignment_tuple(ws1.cell(1, m1[col]))
|
|
141
|
+
a2 = _alignment_tuple(ws2.cell(1, m2[col]))
|
|
142
|
+
|
|
143
|
+
if a1 != a2:
|
|
144
|
+
diffs.append({
|
|
145
|
+
"Column": col,
|
|
146
|
+
"File1_Header_Alignment": str(a1),
|
|
147
|
+
"File2_Header_Alignment": str(a2),
|
|
148
|
+
})
|
|
149
|
+
|
|
150
|
+
return diffs
|
compareexcel/report.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Write comparison results to an Excel workbook."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def write_report(output_file, formatting, data_align, header_align):
|
|
9
|
+
with pd.ExcelWriter(output_file, engine="openpyxl") as writer:
|
|
10
|
+
if formatting:
|
|
11
|
+
pd.DataFrame(formatting).to_excel(writer, sheet_name="Formatting_Diffs", index=False)
|
|
12
|
+
|
|
13
|
+
if data_align:
|
|
14
|
+
pd.DataFrame(data_align).to_excel(writer, sheet_name="Data_Alignment_Diffs", index=False)
|
|
15
|
+
|
|
16
|
+
if header_align:
|
|
17
|
+
pd.DataFrame(header_align).to_excel(writer, sheet_name="Header_Alignment_Diffs", index=False)
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: compareexcel
|
|
3
|
+
Version: 0.2.1
|
|
4
|
+
Summary: Compare two Excel workbooks for column number formats and cell alignment differences, with optional Excel report output.
|
|
5
|
+
Author: Excel Compare contributors
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Repository, https://github.com/abhidotnet/compareexcel
|
|
8
|
+
Project-URL: Issues, https://github.com/abhidotnet/compareexcel/issues
|
|
9
|
+
Keywords: excel,xlsx,compare,openpyxl,pandas
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Environment :: Console
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Topic :: Office/Business :: Financial :: Spreadsheet
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: pandas>=2.0
|
|
23
|
+
Requires-Dist: openpyxl>=3.1
|
|
24
|
+
Dynamic: license-file
|
|
25
|
+
|
|
26
|
+
# Excel Compare (compareExcel)
|
|
27
|
+
|
|
28
|
+
**compareExcel** is a small command-line tool and Python library that compares two Excel (`.xlsx`) workbooks. It reports differences in **column number formats**, **data cell alignment** (sampled from non-empty cells), and **header row alignment** for columns that exist in both files.
|
|
29
|
+
|
|
30
|
+
Install from PyPI with `pip install compareexcel`. The distribution and import package name is **`compareexcel`**. Installed console scripts are **`compareexcel`** and **`compareExcel`** (same entry point).
|
|
31
|
+
|
|
32
|
+
## Requirements
|
|
33
|
+
|
|
34
|
+
- Python 3.10+
|
|
35
|
+
- Dependencies: `pandas`, `openpyxl` (declared in `pyproject.toml`)
|
|
36
|
+
|
|
37
|
+
## Installation
|
|
38
|
+
|
|
39
|
+
From the repository root (`compareexcel/`):
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
pip install .
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Editable install while developing:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
pip install -e .
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Command-line usage
|
|
52
|
+
|
|
53
|
+
```text
|
|
54
|
+
compareExcel FILE1 FILE2 [--sheet SHEET_NAME] [--output REPORT.xlsx] [--alignment-only]
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
| Argument / option | Description |
|
|
58
|
+
|-------------------|-------------|
|
|
59
|
+
| `FILE1`, `FILE2` | Paths to the two Excel files to compare. |
|
|
60
|
+
| `--sheet` | Compare only this sheet; default is all sheets that exist in **both** workbooks. |
|
|
61
|
+
| `--output` | Write a multi-sheet Excel report (`Formatting_Diffs`, `Data_Alignment_Diffs`, `Header_Alignment_Diffs`). |
|
|
62
|
+
| `--alignment-only` / `--ao` | Reserved for future use (currently parsed only). |
|
|
63
|
+
|
|
64
|
+
Example:
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
compareExcel workbook_a.xlsx workbook_b.xlsx --output diff_report.xlsx
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
A short summary is always printed to the console; use `--output` to persist detailed rows to an `.xlsx` file.
|
|
71
|
+
|
|
72
|
+
## Library usage
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
from openpyxl import load_workbook
|
|
76
|
+
from compareexcel import (
|
|
77
|
+
compare_formatting,
|
|
78
|
+
compare_data_alignment,
|
|
79
|
+
compare_header_alignment,
|
|
80
|
+
write_report,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
wb1 = load_workbook("a.xlsx")
|
|
84
|
+
wb2 = load_workbook("b.xlsx")
|
|
85
|
+
sheet = wb1.sheetnames[0]
|
|
86
|
+
fmt = compare_formatting(wb1[sheet], wb2[sheet])
|
|
87
|
+
data_align = compare_data_alignment(wb1[sheet], wb2[sheet])
|
|
88
|
+
header_align = compare_header_alignment(wb1[sheet], wb2[sheet])
|
|
89
|
+
|
|
90
|
+
write_report("out.xlsx", fmt, data_align, header_align)
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## Package layout
|
|
94
|
+
|
|
95
|
+
```text
|
|
96
|
+
compareexcel/
|
|
97
|
+
├── src/
|
|
98
|
+
│ └── compareexcel/
|
|
99
|
+
│ ├── __init__.py # Public API and version
|
|
100
|
+
│ ├── cli.py # Entry point and argument parsing
|
|
101
|
+
│ ├── core.py # Comparison logic
|
|
102
|
+
│ └── report.py # Excel report writer
|
|
103
|
+
├── pyproject.toml
|
|
104
|
+
├── README.md
|
|
105
|
+
├── LICENSE
|
|
106
|
+
└── .gitignore
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## License
|
|
110
|
+
|
|
111
|
+
See [LICENSE](LICENSE).
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
compareexcel/__init__.py,sha256=oJsYarXfE9XByZ2gz2QvBLMDFzf4sTkiTSOW1L5OVKc,449
|
|
2
|
+
compareexcel/cli.py,sha256=uKT9nQWjiz8wy-7scaozlc6A6371AcACZqsAos5qHAc,2233
|
|
3
|
+
compareexcel/core.py,sha256=pfFEpIZuXOPVrnji2-jCcbeieLf-UV0_6mE-gJdYRKo,3740
|
|
4
|
+
compareexcel/report.py,sha256=NPjBjyJrFz31yhvrL0gwvcm8TgoENwCExO1L2DVeWWk,627
|
|
5
|
+
compareexcel-0.2.1.dist-info/licenses/LICENSE,sha256=VBlKcwWPZXlJFE9C7XI2hvL0HXjmK44CI9RqeADM4cQ,1083
|
|
6
|
+
compareexcel-0.2.1.dist-info/METADATA,sha256=TVSLc92opOh1uO8AjCiz-jMEQiy683I1RlTm9Ei8vQI,3546
|
|
7
|
+
compareexcel-0.2.1.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
8
|
+
compareexcel-0.2.1.dist-info/entry_points.txt,sha256=AQPNdzbZvMI2BCnWljYSnXBHC3XHyIBED0bJpL2K74Y,92
|
|
9
|
+
compareexcel-0.2.1.dist-info/top_level.txt,sha256=51FWEmNAH_R98xZ6lWiGMR7JWgpxMWy_A4jipFAGcNg,13
|
|
10
|
+
compareexcel-0.2.1.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Excel Compare contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
compareexcel
|