diffmonkey 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffmonkey-1.0.0/LICENSE +21 -0
- diffmonkey-1.0.0/PKG-INFO +153 -0
- diffmonkey-1.0.0/README.md +98 -0
- diffmonkey-1.0.0/pyproject.toml +48 -0
- diffmonkey-1.0.0/setup.cfg +4 -0
- diffmonkey-1.0.0/src/diffmonkey/__init__.py +49 -0
- diffmonkey-1.0.0/src/diffmonkey/cli.py +168 -0
- diffmonkey-1.0.0/src/diffmonkey/comparators.py +259 -0
- diffmonkey-1.0.0/src/diffmonkey/compare.py +253 -0
- diffmonkey-1.0.0/src/diffmonkey/formatters/__init__.py +13 -0
- diffmonkey-1.0.0/src/diffmonkey/formatters/csv_out.py +55 -0
- diffmonkey-1.0.0/src/diffmonkey/formatters/html.py +93 -0
- diffmonkey-1.0.0/src/diffmonkey/formatters/markdown.py +94 -0
- diffmonkey-1.0.0/src/diffmonkey/matching.py +141 -0
- diffmonkey-1.0.0/src/diffmonkey/models.py +185 -0
- diffmonkey-1.0.0/src/diffmonkey/readers.py +117 -0
- diffmonkey-1.0.0/src/diffmonkey.egg-info/PKG-INFO +153 -0
- diffmonkey-1.0.0/src/diffmonkey.egg-info/SOURCES.txt +29 -0
- diffmonkey-1.0.0/src/diffmonkey.egg-info/dependency_links.txt +1 -0
- diffmonkey-1.0.0/src/diffmonkey.egg-info/entry_points.txt +2 -0
- diffmonkey-1.0.0/src/diffmonkey.egg-info/requires.txt +14 -0
- diffmonkey-1.0.0/src/diffmonkey.egg-info/top_level.txt +1 -0
- diffmonkey-1.0.0/tests/test_cli.py +240 -0
- diffmonkey-1.0.0/tests/test_comparators.py +505 -0
- diffmonkey-1.0.0/tests/test_compare.py +453 -0
- diffmonkey-1.0.0/tests/test_compare_properties.py +237 -0
- diffmonkey-1.0.0/tests/test_formatters.py +462 -0
- diffmonkey-1.0.0/tests/test_matching.py +374 -0
- diffmonkey-1.0.0/tests/test_models.py +436 -0
- diffmonkey-1.0.0/tests/test_readers.py +216 -0
- diffmonkey-1.0.0/tests/test_review_fixes.py +146 -0
diffmonkey-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 RexBytes
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: diffmonkey
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Type-aware, key-based structural diffing of tabular datasets with human- and machine-readable reports.
|
|
5
|
+
Author-email: RexBytes <pythonic@rexbytes.com>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2026 RexBytes
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Project-URL: Homepage, https://github.com/RexBytes/diffmonkey
|
|
29
|
+
Project-URL: Issues, https://github.com/RexBytes/diffmonkey/issues
|
|
30
|
+
Keywords: diff,csv,tabular,compare,dataset,changes,reconciliation
|
|
31
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
32
|
+
Classifier: Intended Audience :: Developers
|
|
33
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
34
|
+
Classifier: Operating System :: OS Independent
|
|
35
|
+
Classifier: Programming Language :: Python :: 3
|
|
36
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
38
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
39
|
+
Classifier: Topic :: Utilities
|
|
40
|
+
Requires-Python: >=3.11
|
|
41
|
+
Description-Content-Type: text/markdown
|
|
42
|
+
License-File: LICENSE
|
|
43
|
+
Requires-Dist: cleanmonkey
|
|
44
|
+
Requires-Dist: typemonkey
|
|
45
|
+
Requires-Dist: datemonkey
|
|
46
|
+
Provides-Extra: excel
|
|
47
|
+
Requires-Dist: openpyxl>=3.0; extra == "excel"
|
|
48
|
+
Provides-Extra: dsv
|
|
49
|
+
Requires-Dist: dsvmonkey; extra == "dsv"
|
|
50
|
+
Provides-Extra: dev
|
|
51
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
52
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
53
|
+
Requires-Dist: hypothesis>=6.0; extra == "dev"
|
|
54
|
+
Dynamic: license-file
|
|
55
|
+
|
|
56
|
+
# diffmonkey
|
|
57
|
+
|
|
58
|
+
Type-aware, key-based structural diffing of tabular datasets — answer "what
|
|
59
|
+
changed between last month's export and this month's?" in one call, with
|
|
60
|
+
human- and machine-readable reports.
|
|
61
|
+
|
|
62
|
+
diffmonkey matches rows by a key column (or composite key), compares the
|
|
63
|
+
remaining columns *with type awareness* (numbers by value, dates by calendar
|
|
64
|
+
date, booleans by truth, strings whitespace-normalised, nulls unified), and
|
|
65
|
+
buckets the result into **added / removed / changed / unchanged** with summary
|
|
66
|
+
statistics. It is built on the rexbytes ecosystem — [`typemonkey`] for type
|
|
67
|
+
inference and number parsing, [`datemonkey`] for date parsing, [`cleanmonkey`]
|
|
68
|
+
for whitespace and invisible-character normalisation — so it does not re-derive
|
|
69
|
+
those wheels.
|
|
70
|
+
|
|
71
|
+
In scope: structural comparison, change detection, change reporting. Out of
|
|
72
|
+
scope: merge/reconciliation, text diffing, schema migration, version control.
|
|
73
|
+
|
|
74
|
+
## Install
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
pip install diffmonkey # CSV/TSV/pipe input built in
|
|
78
|
+
pip install "diffmonkey[excel]" # add .xlsx reading (openpyxl)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
Requires Python 3.11+.
|
|
82
|
+
|
|
83
|
+
## Quick start
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
from diffmonkey import compare
|
|
87
|
+
|
|
88
|
+
old = [{"id": "1", "name": "Widget", "price": "1,234"},
|
|
89
|
+
{"id": "2", "name": "Gadget", "price": "50"}]
|
|
90
|
+
new = [{"id": "1", "name": "Widget", "price": "1234"}, # price reformatted, not changed
|
|
91
|
+
{"id": "3", "name": "Gizmo", "price": "9"}] # id 2 removed, id 3 added
|
|
92
|
+
|
|
93
|
+
result = compare(old, new, key="id")
|
|
94
|
+
|
|
95
|
+
print(result.summary.one_line())
|
|
96
|
+
# 1 added, 1 removed, 0 changed (of 2 current), 0 unchanged
|
|
97
|
+
|
|
98
|
+
print(result.to_markdown()) # human report
|
|
99
|
+
result.to_dict() # machine-readable
|
|
100
|
+
result.write_csv("changes.csv")
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
`"1,234"` vs `"1234"` is **not** reported as a change — type-aware numeric
|
|
104
|
+
comparison sees one number. The same applies to `"01/02/2025"` vs `"2025-01-02"`
|
|
105
|
+
(dates, with a `locale` hint), `" foo "` vs `"foo"` (whitespace), and
|
|
106
|
+
`None`/`""`/`"NA"` (nulls).
|
|
107
|
+
|
|
108
|
+
## CLI
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
diffmonkey compare old.csv new.csv --key id
|
|
112
|
+
diffmonkey compare old.csv new.csv --key region,sku --ignore updated_at --format markdown
|
|
113
|
+
diffmonkey compare old.xlsx new.xlsx --key id --format json -o diff.json
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
Exit code is `0` when the datasets are identical and `1` when they differ —
|
|
117
|
+
handy in CI and scripts.
|
|
118
|
+
|
|
119
|
+
## Key options
|
|
120
|
+
|
|
121
|
+
| Option | Purpose |
|
|
122
|
+
|---|---|
|
|
123
|
+
| `key` | Identity column, or list for a composite key |
|
|
124
|
+
| `columns` / `ignore` | Restrict / exclude columns from comparison |
|
|
125
|
+
| `column_map={"old":"new"}` | Handle renamed columns (avoids false add+remove) |
|
|
126
|
+
| `rel_tol` / `abs_tol` | Floating-point tolerance for numeric columns |
|
|
127
|
+
| `locale="us"` / `"eu"` | Disambiguate slash dates and number separators |
|
|
128
|
+
| `null_equivalent` | Treat all null spellings as one value (default on) |
|
|
129
|
+
| `type_aware` / `date_aware` | Toggle type/date-aware comparison |
|
|
130
|
+
| `include_unchanged` | Retain unchanged rows in the result |
|
|
131
|
+
| `on_duplicate` / `on_missing_key` | Policies for messy keys |
|
|
132
|
+
|
|
133
|
+
## Output formats
|
|
134
|
+
|
|
135
|
+
- `result.to_dict()` — JSON-serialisable structure
|
|
136
|
+
- `result.to_markdown()` — report for PRs, chat, email
|
|
137
|
+
- `result.to_html()` — standalone HTML diff report
|
|
138
|
+
- `result.to_csv()` / `result.write_csv(path)` — one row per field change
|
|
139
|
+
|
|
140
|
+
## Using with AI assistants
|
|
141
|
+
|
|
142
|
+
See [`SKILL.md`](./SKILL.md) for LLM-oriented usage (decision tree, worked
|
|
143
|
+
examples, anti-patterns). See [`LIMITATIONS.md`](./LIMITATIONS.md) for the
|
|
144
|
+
deliberate design tradeoffs (date/locale ambiguity, null vocabulary, duplicate
|
|
145
|
+
handling) so behaviour that looks surprising is not mistaken for a bug.
|
|
146
|
+
|
|
147
|
+
## License
|
|
148
|
+
|
|
149
|
+
MIT — see [`LICENSE`](./LICENSE).
|
|
150
|
+
|
|
151
|
+
[`typemonkey`]: https://pypi.org/project/typemonkey/
|
|
152
|
+
[`datemonkey`]: https://pypi.org/project/datemonkey/
|
|
153
|
+
[`cleanmonkey`]: https://pypi.org/project/cleanmonkey/
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# diffmonkey
|
|
2
|
+
|
|
3
|
+
Type-aware, key-based structural diffing of tabular datasets — answer "what
|
|
4
|
+
changed between last month's export and this month's?" in one call, with
|
|
5
|
+
human- and machine-readable reports.
|
|
6
|
+
|
|
7
|
+
diffmonkey matches rows by a key column (or composite key), compares the
|
|
8
|
+
remaining columns *with type awareness* (numbers by value, dates by calendar
|
|
9
|
+
date, booleans by truth, strings whitespace-normalised, nulls unified), and
|
|
10
|
+
buckets the result into **added / removed / changed / unchanged** with summary
|
|
11
|
+
statistics. It is built on the rexbytes ecosystem — [`typemonkey`] for type
|
|
12
|
+
inference and number parsing, [`datemonkey`] for date parsing, [`cleanmonkey`]
|
|
13
|
+
for whitespace and invisible-character normalisation — so it does not re-derive
|
|
14
|
+
those wheels.
|
|
15
|
+
|
|
16
|
+
In scope: structural comparison, change detection, change reporting. Out of
|
|
17
|
+
scope: merge/reconciliation, text diffing, schema migration, version control.
|
|
18
|
+
|
|
19
|
+
## Install
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
pip install diffmonkey # CSV/TSV/pipe input built in
|
|
23
|
+
pip install "diffmonkey[excel]" # add .xlsx reading (openpyxl)
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Requires Python 3.11+.
|
|
27
|
+
|
|
28
|
+
## Quick start
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
from diffmonkey import compare
|
|
32
|
+
|
|
33
|
+
old = [{"id": "1", "name": "Widget", "price": "1,234"},
|
|
34
|
+
{"id": "2", "name": "Gadget", "price": "50"}]
|
|
35
|
+
new = [{"id": "1", "name": "Widget", "price": "1234"}, # price reformatted, not changed
|
|
36
|
+
{"id": "3", "name": "Gizmo", "price": "9"}] # id 2 removed, id 3 added
|
|
37
|
+
|
|
38
|
+
result = compare(old, new, key="id")
|
|
39
|
+
|
|
40
|
+
print(result.summary.one_line())
|
|
41
|
+
# 1 added, 1 removed, 0 changed (of 2 current), 0 unchanged
|
|
42
|
+
|
|
43
|
+
print(result.to_markdown()) # human report
|
|
44
|
+
result.to_dict() # machine-readable
|
|
45
|
+
result.write_csv("changes.csv")
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
`"1,234"` vs `"1234"` is **not** reported as a change — type-aware numeric
|
|
49
|
+
comparison sees one number. The same applies to `"01/02/2025"` vs `"2025-01-02"`
|
|
50
|
+
(dates, with a `locale` hint), `" foo "` vs `"foo"` (whitespace), and
|
|
51
|
+
`None`/`""`/`"NA"` (nulls).
|
|
52
|
+
|
|
53
|
+
## CLI
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
diffmonkey compare old.csv new.csv --key id
|
|
57
|
+
diffmonkey compare old.csv new.csv --key region,sku --ignore updated_at --format markdown
|
|
58
|
+
diffmonkey compare old.xlsx new.xlsx --key id --format json -o diff.json
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Exit code is `0` when the datasets are identical and `1` when they differ —
|
|
62
|
+
handy in CI and scripts.
|
|
63
|
+
|
|
64
|
+
## Key options
|
|
65
|
+
|
|
66
|
+
| Option | Purpose |
|
|
67
|
+
|---|---|
|
|
68
|
+
| `key` | Identity column, or list for a composite key |
|
|
69
|
+
| `columns` / `ignore` | Restrict / exclude columns from comparison |
|
|
70
|
+
| `column_map={"old":"new"}` | Handle renamed columns (avoids false add+remove) |
|
|
71
|
+
| `rel_tol` / `abs_tol` | Floating-point tolerance for numeric columns |
|
|
72
|
+
| `locale="us"` / `"eu"` | Disambiguate slash dates and number separators |
|
|
73
|
+
| `null_equivalent` | Treat all null spellings as one value (default on) |
|
|
74
|
+
| `type_aware` / `date_aware` | Toggle type/date-aware comparison |
|
|
75
|
+
| `include_unchanged` | Retain unchanged rows in the result |
|
|
76
|
+
| `on_duplicate` / `on_missing_key` | Policies for messy keys |
|
|
77
|
+
|
|
78
|
+
## Output formats
|
|
79
|
+
|
|
80
|
+
- `result.to_dict()` — JSON-serialisable structure
|
|
81
|
+
- `result.to_markdown()` — report for PRs, chat, email
|
|
82
|
+
- `result.to_html()` — standalone HTML diff report
|
|
83
|
+
- `result.to_csv()` / `result.write_csv(path)` — one row per field change
|
|
84
|
+
|
|
85
|
+
## Using with AI assistants
|
|
86
|
+
|
|
87
|
+
See [`SKILL.md`](./SKILL.md) for LLM-oriented usage (decision tree, worked
|
|
88
|
+
examples, anti-patterns). See [`LIMITATIONS.md`](./LIMITATIONS.md) for the
|
|
89
|
+
deliberate design tradeoffs (date/locale ambiguity, null vocabulary, duplicate
|
|
90
|
+
handling) so behaviour that looks surprising is not mistaken for a bug.
|
|
91
|
+
|
|
92
|
+
## License
|
|
93
|
+
|
|
94
|
+
MIT — see [`LICENSE`](./LICENSE).
|
|
95
|
+
|
|
96
|
+
[`typemonkey`]: https://pypi.org/project/typemonkey/
|
|
97
|
+
[`datemonkey`]: https://pypi.org/project/datemonkey/
|
|
98
|
+
[`cleanmonkey`]: https://pypi.org/project/cleanmonkey/
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "diffmonkey"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "Type-aware, key-based structural diffing of tabular datasets with human- and machine-readable reports."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { file = "LICENSE" }
|
|
11
|
+
authors = [{ name = "RexBytes", email = "pythonic@rexbytes.com" }]
|
|
12
|
+
requires-python = ">=3.11"
|
|
13
|
+
keywords = ["diff", "csv", "tabular", "compare", "dataset", "changes", "reconciliation"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 5 - Production/Stable",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Operating System :: OS Independent",
|
|
19
|
+
"Programming Language :: Python :: 3",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Topic :: Software Development :: Libraries",
|
|
23
|
+
"Topic :: Utilities",
|
|
24
|
+
]
|
|
25
|
+
dependencies = [
|
|
26
|
+
"cleanmonkey",
|
|
27
|
+
"typemonkey",
|
|
28
|
+
"datemonkey",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
[project.optional-dependencies]
|
|
32
|
+
excel = ["openpyxl>=3.0"]
|
|
33
|
+
dsv = ["dsvmonkey"]
|
|
34
|
+
dev = ["pytest>=7.0", "pytest-cov", "hypothesis>=6.0"]
|
|
35
|
+
|
|
36
|
+
[project.scripts]
|
|
37
|
+
diffmonkey = "diffmonkey.cli:main"
|
|
38
|
+
|
|
39
|
+
[project.urls]
|
|
40
|
+
Homepage = "https://github.com/RexBytes/diffmonkey"
|
|
41
|
+
Issues = "https://github.com/RexBytes/diffmonkey/issues"
|
|
42
|
+
|
|
43
|
+
[tool.setuptools.packages.find]
|
|
44
|
+
where = ["src"]
|
|
45
|
+
|
|
46
|
+
[tool.pytest.ini_options]
|
|
47
|
+
testpaths = ["tests"]
|
|
48
|
+
pythonpath = ["src"]
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""diffmonkey — type-aware, key-based structural diffing of tabular datasets.
|
|
2
|
+
|
|
3
|
+
Public API::
|
|
4
|
+
|
|
5
|
+
from diffmonkey import compare
|
|
6
|
+
result = compare(old_rows, new_rows, key="id")
|
|
7
|
+
print(result.summary.one_line())
|
|
8
|
+
print(result.to_markdown())
|
|
9
|
+
|
|
10
|
+
``compare`` matches rows by a key column (or composite key), compares the
|
|
11
|
+
remaining columns with type awareness (numbers by value, dates by calendar
|
|
12
|
+
date, booleans by truth, strings whitespace-normalised, nulls unified), and
|
|
13
|
+
returns a :class:`DiffResult` bucketed into added / removed / changed /
|
|
14
|
+
unchanged with summary statistics and multiple report formats.
|
|
15
|
+
|
|
16
|
+
See ``LIMITATIONS.md`` for deliberate design tradeoffs and ``SKILL.md`` for
|
|
17
|
+
LLM-oriented usage guidance.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
from .compare import compare
|
|
23
|
+
from .models import (
|
|
24
|
+
DiffMonkeyError,
|
|
25
|
+
DiffResult,
|
|
26
|
+
DiffSummary,
|
|
27
|
+
DuplicateKeyError,
|
|
28
|
+
FieldChange,
|
|
29
|
+
MissingKeyError,
|
|
30
|
+
RowDiff,
|
|
31
|
+
)
|
|
32
|
+
from .readers import read_csv, read_excel, read_table
|
|
33
|
+
|
|
34
|
+
__version__ = "1.0.0"
|
|
35
|
+
|
|
36
|
+
__all__ = [
|
|
37
|
+
"compare",
|
|
38
|
+
"DiffResult",
|
|
39
|
+
"DiffSummary",
|
|
40
|
+
"RowDiff",
|
|
41
|
+
"FieldChange",
|
|
42
|
+
"DiffMonkeyError",
|
|
43
|
+
"DuplicateKeyError",
|
|
44
|
+
"MissingKeyError",
|
|
45
|
+
"read_table",
|
|
46
|
+
"read_csv",
|
|
47
|
+
"read_excel",
|
|
48
|
+
"__version__",
|
|
49
|
+
]
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""Command-line interface — a thin wrapper around :func:`diffmonkey.compare`.
|
|
2
|
+
|
|
3
|
+
This module exists only to parse arguments, read input files
|
|
4
|
+
(:mod:`diffmonkey.readers`), call the library, render the chosen format and
|
|
5
|
+
return a process exit code. It contains no comparison logic. Exit codes:
|
|
6
|
+
``0`` = no differences, ``1`` = differences found, ``2`` = usage/IO error.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import argparse
|
|
12
|
+
import json
|
|
13
|
+
import sys
|
|
14
|
+
from typing import Sequence
|
|
15
|
+
|
|
16
|
+
from . import __version__
|
|
17
|
+
from .compare import compare
|
|
18
|
+
from .models import DiffMonkeyError
|
|
19
|
+
from .readers import read_table
|
|
20
|
+
|
|
21
|
+
EXIT_NO_DIFF = 0
|
|
22
|
+
EXIT_DIFF = 1
|
|
23
|
+
EXIT_ERROR = 2
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _split_csv_opt(value: str | None) -> list[str] | None:
|
|
27
|
+
if value is None:
|
|
28
|
+
return None
|
|
29
|
+
return [part.strip() for part in value.split(",") if part.strip()]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _parse_column_map(pairs: Sequence[str] | None) -> dict[str, str] | None:
|
|
33
|
+
if not pairs:
|
|
34
|
+
return None
|
|
35
|
+
mapping: dict[str, str] = {}
|
|
36
|
+
for pair in pairs:
|
|
37
|
+
if "=" not in pair:
|
|
38
|
+
# ValueError (not argparse.ArgumentTypeError): this runs inside
|
|
39
|
+
# main(), not as an argparse type= callable, so it must be caught
|
|
40
|
+
# by main()'s handler and turned into exit code 2.
|
|
41
|
+
raise ValueError(f"--map expects OLD=NEW, got {pair!r}")
|
|
42
|
+
old, new = pair.split("=", 1)
|
|
43
|
+
mapping[old.strip()] = new.strip()
|
|
44
|
+
return mapping
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
48
|
+
parser = argparse.ArgumentParser(
|
|
49
|
+
prog="diffmonkey",
|
|
50
|
+
description="Type-aware, key-based structural diff of two tabular files.",
|
|
51
|
+
)
|
|
52
|
+
parser.add_argument("--version", action="version", version=f"diffmonkey {__version__}")
|
|
53
|
+
sub = parser.add_subparsers(dest="command", required=True)
|
|
54
|
+
|
|
55
|
+
cmp = sub.add_parser("compare", help="Compare two files by key column(s).")
|
|
56
|
+
cmp.add_argument("old", help="Baseline file (CSV/TSV/Excel).")
|
|
57
|
+
cmp.add_argument("new", help="Current file to compare against the baseline.")
|
|
58
|
+
cmp.add_argument(
|
|
59
|
+
"-k", "--key", required=True,
|
|
60
|
+
help="Identity column, or comma-separated columns for a composite key.",
|
|
61
|
+
)
|
|
62
|
+
cmp.add_argument("--columns", help="Comma-separated columns to compare (default: all).")
|
|
63
|
+
cmp.add_argument("--ignore", help="Comma-separated columns to exclude.")
|
|
64
|
+
cmp.add_argument(
|
|
65
|
+
"--map", action="append", metavar="OLD=NEW",
|
|
66
|
+
help="Rename an old column to a new name (repeatable).",
|
|
67
|
+
)
|
|
68
|
+
cmp.add_argument(
|
|
69
|
+
"--format", choices=["summary", "markdown", "html", "csv", "json"],
|
|
70
|
+
default="summary", help="Output format (default: summary).",
|
|
71
|
+
)
|
|
72
|
+
cmp.add_argument("-o", "--output", help="Write report to this file instead of stdout.")
|
|
73
|
+
cmp.add_argument("--rel-tol", type=float, default=1e-9, help="Numeric relative tolerance.")
|
|
74
|
+
cmp.add_argument("--abs-tol", type=float, default=0.0, help="Numeric absolute tolerance.")
|
|
75
|
+
cmp.add_argument("--locale", choices=["us", "eu"], help="Number/date locale hint.")
|
|
76
|
+
cmp.add_argument(
|
|
77
|
+
"--no-type-aware", action="store_true",
|
|
78
|
+
help="Compare every column as a normalised string.",
|
|
79
|
+
)
|
|
80
|
+
cmp.add_argument(
|
|
81
|
+
"--no-null-equivalent", action="store_true",
|
|
82
|
+
help="Do not treat different null spellings as equal.",
|
|
83
|
+
)
|
|
84
|
+
cmp.add_argument(
|
|
85
|
+
"--include-unchanged", action="store_true",
|
|
86
|
+
help="Include unchanged rows in json/markdown output.",
|
|
87
|
+
)
|
|
88
|
+
cmp.add_argument(
|
|
89
|
+
"--on-duplicate", choices=["warn", "first", "last", "error"], default="warn",
|
|
90
|
+
help="Duplicate-key policy (default: warn).",
|
|
91
|
+
)
|
|
92
|
+
cmp.add_argument(
|
|
93
|
+
"--on-missing-key", choices=["warn", "skip", "error"], default="warn",
|
|
94
|
+
help="Missing-key policy (default: warn).",
|
|
95
|
+
)
|
|
96
|
+
cmp.add_argument("--delimiter", help="Force a delimiter for DSV inputs.")
|
|
97
|
+
return parser
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _render(result, fmt: str, *, include_unchanged: bool) -> str:
|
|
101
|
+
if fmt == "summary":
|
|
102
|
+
text = result.summary.one_line()
|
|
103
|
+
if result.warnings:
|
|
104
|
+
text += "\n" + "\n".join(f"warning: {w}" for w in result.warnings)
|
|
105
|
+
return text + "\n"
|
|
106
|
+
if fmt == "markdown":
|
|
107
|
+
return result.to_markdown()
|
|
108
|
+
if fmt == "html":
|
|
109
|
+
return result.to_html()
|
|
110
|
+
if fmt == "csv":
|
|
111
|
+
return result.to_csv()
|
|
112
|
+
if fmt == "json":
|
|
113
|
+
return json.dumps(result.to_dict(), indent=2, default=str) + "\n"
|
|
114
|
+
raise ValueError(f"unknown format {fmt!r}") # pragma: no cover
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def main(argv: Sequence[str] | None = None) -> int:
|
|
118
|
+
parser = build_parser()
|
|
119
|
+
args = parser.parse_args(argv)
|
|
120
|
+
|
|
121
|
+
if args.command == "compare":
|
|
122
|
+
try:
|
|
123
|
+
read_kwargs = {"delimiter": args.delimiter} if args.delimiter else {}
|
|
124
|
+
old_rows = read_table(args.old, **read_kwargs)
|
|
125
|
+
new_rows = read_table(args.new, **read_kwargs)
|
|
126
|
+
except (OSError, RuntimeError, ValueError, TypeError) as exc:
|
|
127
|
+
print(f"diffmonkey: error reading input: {exc}", file=sys.stderr)
|
|
128
|
+
return EXIT_ERROR
|
|
129
|
+
|
|
130
|
+
try:
|
|
131
|
+
result = compare(
|
|
132
|
+
old_rows,
|
|
133
|
+
new_rows,
|
|
134
|
+
key=_split_csv_opt(args.key),
|
|
135
|
+
columns=_split_csv_opt(args.columns),
|
|
136
|
+
ignore=_split_csv_opt(args.ignore),
|
|
137
|
+
column_map=_parse_column_map(args.map),
|
|
138
|
+
rel_tol=args.rel_tol,
|
|
139
|
+
abs_tol=args.abs_tol,
|
|
140
|
+
locale=args.locale,
|
|
141
|
+
type_aware=not args.no_type_aware,
|
|
142
|
+
null_equivalent=not args.no_null_equivalent,
|
|
143
|
+
include_unchanged=args.include_unchanged,
|
|
144
|
+
on_duplicate=args.on_duplicate,
|
|
145
|
+
on_missing_key=args.on_missing_key,
|
|
146
|
+
)
|
|
147
|
+
except (DiffMonkeyError, ValueError, TypeError) as exc:
|
|
148
|
+
print(f"diffmonkey: {exc}", file=sys.stderr)
|
|
149
|
+
return EXIT_ERROR
|
|
150
|
+
|
|
151
|
+
text = _render(result, args.format, include_unchanged=args.include_unchanged)
|
|
152
|
+
if args.output:
|
|
153
|
+
try:
|
|
154
|
+
with open(args.output, "w", encoding="utf-8", newline="") as fh:
|
|
155
|
+
fh.write(text)
|
|
156
|
+
except OSError as exc:
|
|
157
|
+
print(f"diffmonkey: error writing output: {exc}", file=sys.stderr)
|
|
158
|
+
return EXIT_ERROR
|
|
159
|
+
else:
|
|
160
|
+
sys.stdout.write(text)
|
|
161
|
+
|
|
162
|
+
return EXIT_DIFF if result.has_changes() else EXIT_NO_DIFF
|
|
163
|
+
|
|
164
|
+
return EXIT_ERROR # pragma: no cover - argparse enforces a command
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
if __name__ == "__main__": # pragma: no cover
|
|
168
|
+
raise SystemExit(main())
|