diffmonkey 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. diffmonkey-1.0.0/LICENSE +21 -0
  2. diffmonkey-1.0.0/PKG-INFO +153 -0
  3. diffmonkey-1.0.0/README.md +98 -0
  4. diffmonkey-1.0.0/pyproject.toml +48 -0
  5. diffmonkey-1.0.0/setup.cfg +4 -0
  6. diffmonkey-1.0.0/src/diffmonkey/__init__.py +49 -0
  7. diffmonkey-1.0.0/src/diffmonkey/cli.py +168 -0
  8. diffmonkey-1.0.0/src/diffmonkey/comparators.py +259 -0
  9. diffmonkey-1.0.0/src/diffmonkey/compare.py +253 -0
  10. diffmonkey-1.0.0/src/diffmonkey/formatters/__init__.py +13 -0
  11. diffmonkey-1.0.0/src/diffmonkey/formatters/csv_out.py +55 -0
  12. diffmonkey-1.0.0/src/diffmonkey/formatters/html.py +93 -0
  13. diffmonkey-1.0.0/src/diffmonkey/formatters/markdown.py +94 -0
  14. diffmonkey-1.0.0/src/diffmonkey/matching.py +141 -0
  15. diffmonkey-1.0.0/src/diffmonkey/models.py +185 -0
  16. diffmonkey-1.0.0/src/diffmonkey/readers.py +117 -0
  17. diffmonkey-1.0.0/src/diffmonkey.egg-info/PKG-INFO +153 -0
  18. diffmonkey-1.0.0/src/diffmonkey.egg-info/SOURCES.txt +29 -0
  19. diffmonkey-1.0.0/src/diffmonkey.egg-info/dependency_links.txt +1 -0
  20. diffmonkey-1.0.0/src/diffmonkey.egg-info/entry_points.txt +2 -0
  21. diffmonkey-1.0.0/src/diffmonkey.egg-info/requires.txt +14 -0
  22. diffmonkey-1.0.0/src/diffmonkey.egg-info/top_level.txt +1 -0
  23. diffmonkey-1.0.0/tests/test_cli.py +240 -0
  24. diffmonkey-1.0.0/tests/test_comparators.py +505 -0
  25. diffmonkey-1.0.0/tests/test_compare.py +453 -0
  26. diffmonkey-1.0.0/tests/test_compare_properties.py +237 -0
  27. diffmonkey-1.0.0/tests/test_formatters.py +462 -0
  28. diffmonkey-1.0.0/tests/test_matching.py +374 -0
  29. diffmonkey-1.0.0/tests/test_models.py +436 -0
  30. diffmonkey-1.0.0/tests/test_readers.py +216 -0
  31. diffmonkey-1.0.0/tests/test_review_fixes.py +146 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 RexBytes
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,153 @@
1
+ Metadata-Version: 2.4
2
+ Name: diffmonkey
3
+ Version: 1.0.0
4
+ Summary: Type-aware, key-based structural diffing of tabular datasets with human- and machine-readable reports.
5
+ Author-email: RexBytes <pythonic@rexbytes.com>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 RexBytes
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/RexBytes/diffmonkey
29
+ Project-URL: Issues, https://github.com/RexBytes/diffmonkey/issues
30
+ Keywords: diff,csv,tabular,compare,dataset,changes,reconciliation
31
+ Classifier: Development Status :: 5 - Production/Stable
32
+ Classifier: Intended Audience :: Developers
33
+ Classifier: License :: OSI Approved :: MIT License
34
+ Classifier: Operating System :: OS Independent
35
+ Classifier: Programming Language :: Python :: 3
36
+ Classifier: Programming Language :: Python :: 3.11
37
+ Classifier: Programming Language :: Python :: 3.12
38
+ Classifier: Topic :: Software Development :: Libraries
39
+ Classifier: Topic :: Utilities
40
+ Requires-Python: >=3.11
41
+ Description-Content-Type: text/markdown
42
+ License-File: LICENSE
43
+ Requires-Dist: cleanmonkey
44
+ Requires-Dist: typemonkey
45
+ Requires-Dist: datemonkey
46
+ Provides-Extra: excel
47
+ Requires-Dist: openpyxl>=3.0; extra == "excel"
48
+ Provides-Extra: dsv
49
+ Requires-Dist: dsvmonkey; extra == "dsv"
50
+ Provides-Extra: dev
51
+ Requires-Dist: pytest>=7.0; extra == "dev"
52
+ Requires-Dist: pytest-cov; extra == "dev"
53
+ Requires-Dist: hypothesis>=6.0; extra == "dev"
54
+ Dynamic: license-file
55
+
56
+ # diffmonkey
57
+
58
+ Type-aware, key-based structural diffing of tabular datasets — answer "what
59
+ changed between last month's export and this month's?" in one call, with
60
+ human- and machine-readable reports.
61
+
62
+ diffmonkey matches rows by a key column (or composite key), compares the
63
+ remaining columns *with type awareness* (numbers by value, dates by calendar
64
+ date, booleans by truth, strings whitespace-normalised, nulls unified), and
65
+ buckets the result into **added / removed / changed / unchanged** with summary
66
+ statistics. It is built on the rexbytes ecosystem — [`typemonkey`] for type
67
+ inference and number parsing, [`datemonkey`] for date parsing, [`cleanmonkey`]
68
+ for whitespace and invisible-character normalisation — so it does not re-derive
69
+ those wheels.
70
+
71
+ In scope: structural comparison, change detection, change reporting. Out of
72
+ scope: merge/reconciliation, text diffing, schema migration, version control.
73
+
74
+ ## Install
75
+
76
+ ```bash
77
+ pip install diffmonkey # CSV/TSV/pipe input built in
78
+ pip install "diffmonkey[excel]" # add .xlsx reading (openpyxl)
79
+ ```
80
+
81
+ Requires Python 3.11+.
82
+
83
+ ## Quick start
84
+
85
+ ```python
86
+ from diffmonkey import compare
87
+
88
+ old = [{"id": "1", "name": "Widget", "price": "1,234"},
89
+ {"id": "2", "name": "Gadget", "price": "50"}]
90
+ new = [{"id": "1", "name": "Widget", "price": "1234"}, # price reformatted, not changed
91
+ {"id": "3", "name": "Gizmo", "price": "9"}] # id 2 removed, id 3 added
92
+
93
+ result = compare(old, new, key="id")
94
+
95
+ print(result.summary.one_line())
96
+ # 1 added, 1 removed, 0 changed (of 2 current), 0 unchanged
97
+
98
+ print(result.to_markdown()) # human report
99
+ result.to_dict() # machine-readable
100
+ result.write_csv("changes.csv")
101
+ ```
102
+
103
+ `"1,234"` vs `"1234"` is **not** reported as a change — type-aware numeric
104
+ comparison sees one number. The same applies to `"01/02/2025"` vs `"2025-01-02"`
105
+ (dates, with a `locale` hint), `" foo "` vs `"foo"` (whitespace), and
106
+ `None`/`""`/`"NA"` (nulls).
107
+
108
+ ## CLI
109
+
110
+ ```bash
111
+ diffmonkey compare old.csv new.csv --key id
112
+ diffmonkey compare old.csv new.csv --key region,sku --ignore updated_at --format markdown
113
+ diffmonkey compare old.xlsx new.xlsx --key id --format json -o diff.json
114
+ ```
115
+
116
+ Exit code is `0` when the datasets are identical and `1` when they differ —
117
+ handy in CI and scripts.
118
+
119
+ ## Key options
120
+
121
+ | Option | Purpose |
122
+ |---|---|
123
+ | `key` | Identity column, or list for a composite key |
124
+ | `columns` / `ignore` | Restrict / exclude columns from comparison |
125
+ | `column_map={"old":"new"}` | Handle renamed columns (avoids false add+remove) |
126
+ | `rel_tol` / `abs_tol` | Floating-point tolerance for numeric columns |
127
+ | `locale="us"` / `"eu"` | Disambiguate slash dates and number separators |
128
+ | `null_equivalent` | Treat all null spellings as one value (default on) |
129
+ | `type_aware` / `date_aware` | Toggle type/date-aware comparison |
130
+ | `include_unchanged` | Retain unchanged rows in the result |
131
+ | `on_duplicate` / `on_missing_key` | Policies for messy keys |
132
+
133
+ ## Output formats
134
+
135
+ - `result.to_dict()` — JSON-serialisable structure
136
+ - `result.to_markdown()` — report for PRs, chat, email
137
+ - `result.to_html()` — standalone HTML diff report
138
+ - `result.to_csv()` / `result.write_csv(path)` — one row per field change
139
+
140
+ ## Using with AI assistants
141
+
142
+ See [`SKILL.md`](./SKILL.md) for LLM-oriented usage (decision tree, worked
143
+ examples, anti-patterns). See [`LIMITATIONS.md`](./LIMITATIONS.md) for the
144
+ deliberate design tradeoffs (date/locale ambiguity, null vocabulary, duplicate
145
+ handling) so behaviour that looks surprising is not mistaken for a bug.
146
+
147
+ ## License
148
+
149
+ MIT — see [`LICENSE`](./LICENSE).
150
+
151
+ [`typemonkey`]: https://pypi.org/project/typemonkey/
152
+ [`datemonkey`]: https://pypi.org/project/datemonkey/
153
+ [`cleanmonkey`]: https://pypi.org/project/cleanmonkey/
@@ -0,0 +1,98 @@
1
+ # diffmonkey
2
+
3
+ Type-aware, key-based structural diffing of tabular datasets — answer "what
4
+ changed between last month's export and this month's?" in one call, with
5
+ human- and machine-readable reports.
6
+
7
+ diffmonkey matches rows by a key column (or composite key), compares the
8
+ remaining columns *with type awareness* (numbers by value, dates by calendar
9
+ date, booleans by truth, strings whitespace-normalised, nulls unified), and
10
+ buckets the result into **added / removed / changed / unchanged** with summary
11
+ statistics. It is built on the rexbytes ecosystem — [`typemonkey`] for type
12
+ inference and number parsing, [`datemonkey`] for date parsing, [`cleanmonkey`]
13
+ for whitespace and invisible-character normalisation — so it does not re-derive
14
+ those wheels.
15
+
16
+ In scope: structural comparison, change detection, change reporting. Out of
17
+ scope: merge/reconciliation, text diffing, schema migration, version control.
18
+
19
+ ## Install
20
+
21
+ ```bash
22
+ pip install diffmonkey # CSV/TSV/pipe input built in
23
+ pip install "diffmonkey[excel]" # add .xlsx reading (openpyxl)
24
+ ```
25
+
26
+ Requires Python 3.11+.
27
+
28
+ ## Quick start
29
+
30
+ ```python
31
+ from diffmonkey import compare
32
+
33
+ old = [{"id": "1", "name": "Widget", "price": "1,234"},
34
+ {"id": "2", "name": "Gadget", "price": "50"}]
35
+ new = [{"id": "1", "name": "Widget", "price": "1234"}, # price reformatted, not changed
36
+ {"id": "3", "name": "Gizmo", "price": "9"}] # id 2 removed, id 3 added
37
+
38
+ result = compare(old, new, key="id")
39
+
40
+ print(result.summary.one_line())
41
+ # 1 added, 1 removed, 0 changed (of 2 current), 0 unchanged
42
+
43
+ print(result.to_markdown()) # human report
44
+ result.to_dict() # machine-readable
45
+ result.write_csv("changes.csv")
46
+ ```
47
+
48
+ `"1,234"` vs `"1234"` is **not** reported as a change — type-aware numeric
49
+ comparison sees one number. The same applies to `"01/02/2025"` vs `"2025-01-02"`
50
+ (dates, with a `locale` hint), `" foo "` vs `"foo"` (whitespace), and
51
+ `None`/`""`/`"NA"` (nulls).
52
+
53
+ ## CLI
54
+
55
+ ```bash
56
+ diffmonkey compare old.csv new.csv --key id
57
+ diffmonkey compare old.csv new.csv --key region,sku --ignore updated_at --format markdown
58
+ diffmonkey compare old.xlsx new.xlsx --key id --format json -o diff.json
59
+ ```
60
+
61
+ Exit code is `0` when the datasets are identical and `1` when they differ —
62
+ handy in CI and scripts.
63
+
64
+ ## Key options
65
+
66
+ | Option | Purpose |
67
+ |---|---|
68
+ | `key` | Identity column, or list for a composite key |
69
+ | `columns` / `ignore` | Restrict / exclude columns from comparison |
70
+ | `column_map={"old":"new"}` | Handle renamed columns (avoids false add+remove) |
71
+ | `rel_tol` / `abs_tol` | Floating-point tolerance for numeric columns |
72
+ | `locale="us"` / `"eu"` | Disambiguate slash dates and number separators |
73
+ | `null_equivalent` | Treat all null spellings as one value (default on) |
74
+ | `type_aware` / `date_aware` | Toggle type/date-aware comparison |
75
+ | `include_unchanged` | Retain unchanged rows in the result |
76
+ | `on_duplicate` / `on_missing_key` | Policies for messy keys |
77
+
78
+ ## Output formats
79
+
80
+ - `result.to_dict()` — JSON-serialisable structure
81
+ - `result.to_markdown()` — report for PRs, chat, email
82
+ - `result.to_html()` — standalone HTML diff report
83
+ - `result.to_csv()` / `result.write_csv(path)` — one row per field change
84
+
85
+ ## Using with AI assistants
86
+
87
+ See [`SKILL.md`](./SKILL.md) for LLM-oriented usage (decision tree, worked
88
+ examples, anti-patterns). See [`LIMITATIONS.md`](./LIMITATIONS.md) for the
89
+ deliberate design tradeoffs (date/locale ambiguity, null vocabulary, duplicate
90
+ handling) so behaviour that looks surprising is not mistaken for a bug.
91
+
92
+ ## License
93
+
94
+ MIT — see [`LICENSE`](./LICENSE).
95
+
96
+ [`typemonkey`]: https://pypi.org/project/typemonkey/
97
+ [`datemonkey`]: https://pypi.org/project/datemonkey/
98
+ [`cleanmonkey`]: https://pypi.org/project/cleanmonkey/
@@ -0,0 +1,48 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "diffmonkey"
7
+ version = "1.0.0"
8
+ description = "Type-aware, key-based structural diffing of tabular datasets with human- and machine-readable reports."
9
+ readme = "README.md"
10
+ license = { file = "LICENSE" }
11
+ authors = [{ name = "RexBytes", email = "pythonic@rexbytes.com" }]
12
+ requires-python = ">=3.11"
13
+ keywords = ["diff", "csv", "tabular", "compare", "dataset", "changes", "reconciliation"]
14
+ classifiers = [
15
+ "Development Status :: 5 - Production/Stable",
16
+ "Intended Audience :: Developers",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Operating System :: OS Independent",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Topic :: Software Development :: Libraries",
23
+ "Topic :: Utilities",
24
+ ]
25
+ dependencies = [
26
+ "cleanmonkey",
27
+ "typemonkey",
28
+ "datemonkey",
29
+ ]
30
+
31
+ [project.optional-dependencies]
32
+ excel = ["openpyxl>=3.0"]
33
+ dsv = ["dsvmonkey"]
34
+ dev = ["pytest>=7.0", "pytest-cov", "hypothesis>=6.0"]
35
+
36
+ [project.scripts]
37
+ diffmonkey = "diffmonkey.cli:main"
38
+
39
+ [project.urls]
40
+ Homepage = "https://github.com/RexBytes/diffmonkey"
41
+ Issues = "https://github.com/RexBytes/diffmonkey/issues"
42
+
43
+ [tool.setuptools.packages.find]
44
+ where = ["src"]
45
+
46
+ [tool.pytest.ini_options]
47
+ testpaths = ["tests"]
48
+ pythonpath = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,49 @@
1
+ """diffmonkey — type-aware, key-based structural diffing of tabular datasets.
2
+
3
+ Public API::
4
+
5
+ from diffmonkey import compare
6
+ result = compare(old_rows, new_rows, key="id")
7
+ print(result.summary.one_line())
8
+ print(result.to_markdown())
9
+
10
+ ``compare`` matches rows by a key column (or composite key), compares the
11
+ remaining columns with type awareness (numbers by value, dates by calendar
12
+ date, booleans by truth, strings whitespace-normalised, nulls unified), and
13
+ returns a :class:`DiffResult` bucketed into added / removed / changed /
14
+ unchanged with summary statistics and multiple report formats.
15
+
16
+ See ``LIMITATIONS.md`` for deliberate design tradeoffs and ``SKILL.md`` for
17
+ LLM-oriented usage guidance.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ from .compare import compare
23
+ from .models import (
24
+ DiffMonkeyError,
25
+ DiffResult,
26
+ DiffSummary,
27
+ DuplicateKeyError,
28
+ FieldChange,
29
+ MissingKeyError,
30
+ RowDiff,
31
+ )
32
+ from .readers import read_csv, read_excel, read_table
33
+
34
+ __version__ = "1.0.0"
35
+
36
+ __all__ = [
37
+ "compare",
38
+ "DiffResult",
39
+ "DiffSummary",
40
+ "RowDiff",
41
+ "FieldChange",
42
+ "DiffMonkeyError",
43
+ "DuplicateKeyError",
44
+ "MissingKeyError",
45
+ "read_table",
46
+ "read_csv",
47
+ "read_excel",
48
+ "__version__",
49
+ ]
@@ -0,0 +1,168 @@
1
+ """Command-line interface — a thin wrapper around :func:`diffmonkey.compare`.
2
+
3
+ This module exists only to parse arguments, read input files
4
+ (:mod:`diffmonkey.readers`), call the library, render the chosen format and
5
+ return a process exit code. It contains no comparison logic. Exit codes:
6
+ ``0`` = no differences, ``1`` = differences found, ``2`` = usage/IO error.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import argparse
12
+ import json
13
+ import sys
14
+ from typing import Sequence
15
+
16
+ from . import __version__
17
+ from .compare import compare
18
+ from .models import DiffMonkeyError
19
+ from .readers import read_table
20
+
21
+ EXIT_NO_DIFF = 0
22
+ EXIT_DIFF = 1
23
+ EXIT_ERROR = 2
24
+
25
+
26
+ def _split_csv_opt(value: str | None) -> list[str] | None:
27
+ if value is None:
28
+ return None
29
+ return [part.strip() for part in value.split(",") if part.strip()]
30
+
31
+
32
+ def _parse_column_map(pairs: Sequence[str] | None) -> dict[str, str] | None:
33
+ if not pairs:
34
+ return None
35
+ mapping: dict[str, str] = {}
36
+ for pair in pairs:
37
+ if "=" not in pair:
38
+ # ValueError (not argparse.ArgumentTypeError): this runs inside
39
+ # main(), not as an argparse type= callable, so it must be caught
40
+ # by main()'s handler and turned into exit code 2.
41
+ raise ValueError(f"--map expects OLD=NEW, got {pair!r}")
42
+ old, new = pair.split("=", 1)
43
+ mapping[old.strip()] = new.strip()
44
+ return mapping
45
+
46
+
47
+ def build_parser() -> argparse.ArgumentParser:
48
+ parser = argparse.ArgumentParser(
49
+ prog="diffmonkey",
50
+ description="Type-aware, key-based structural diff of two tabular files.",
51
+ )
52
+ parser.add_argument("--version", action="version", version=f"diffmonkey {__version__}")
53
+ sub = parser.add_subparsers(dest="command", required=True)
54
+
55
+ cmp = sub.add_parser("compare", help="Compare two files by key column(s).")
56
+ cmp.add_argument("old", help="Baseline file (CSV/TSV/Excel).")
57
+ cmp.add_argument("new", help="Current file to compare against the baseline.")
58
+ cmp.add_argument(
59
+ "-k", "--key", required=True,
60
+ help="Identity column, or comma-separated columns for a composite key.",
61
+ )
62
+ cmp.add_argument("--columns", help="Comma-separated columns to compare (default: all).")
63
+ cmp.add_argument("--ignore", help="Comma-separated columns to exclude.")
64
+ cmp.add_argument(
65
+ "--map", action="append", metavar="OLD=NEW",
66
+ help="Rename an old column to a new name (repeatable).",
67
+ )
68
+ cmp.add_argument(
69
+ "--format", choices=["summary", "markdown", "html", "csv", "json"],
70
+ default="summary", help="Output format (default: summary).",
71
+ )
72
+ cmp.add_argument("-o", "--output", help="Write report to this file instead of stdout.")
73
+ cmp.add_argument("--rel-tol", type=float, default=1e-9, help="Numeric relative tolerance.")
74
+ cmp.add_argument("--abs-tol", type=float, default=0.0, help="Numeric absolute tolerance.")
75
+ cmp.add_argument("--locale", choices=["us", "eu"], help="Number/date locale hint.")
76
+ cmp.add_argument(
77
+ "--no-type-aware", action="store_true",
78
+ help="Compare every column as a normalised string.",
79
+ )
80
+ cmp.add_argument(
81
+ "--no-null-equivalent", action="store_true",
82
+ help="Do not treat different null spellings as equal.",
83
+ )
84
+ cmp.add_argument(
85
+ "--include-unchanged", action="store_true",
86
+ help="Include unchanged rows in json/markdown output.",
87
+ )
88
+ cmp.add_argument(
89
+ "--on-duplicate", choices=["warn", "first", "last", "error"], default="warn",
90
+ help="Duplicate-key policy (default: warn).",
91
+ )
92
+ cmp.add_argument(
93
+ "--on-missing-key", choices=["warn", "skip", "error"], default="warn",
94
+ help="Missing-key policy (default: warn).",
95
+ )
96
+ cmp.add_argument("--delimiter", help="Force a delimiter for DSV inputs.")
97
+ return parser
98
+
99
+
100
+ def _render(result, fmt: str, *, include_unchanged: bool) -> str:
101
+ if fmt == "summary":
102
+ text = result.summary.one_line()
103
+ if result.warnings:
104
+ text += "\n" + "\n".join(f"warning: {w}" for w in result.warnings)
105
+ return text + "\n"
106
+ if fmt == "markdown":
107
+ return result.to_markdown()
108
+ if fmt == "html":
109
+ return result.to_html()
110
+ if fmt == "csv":
111
+ return result.to_csv()
112
+ if fmt == "json":
113
+ return json.dumps(result.to_dict(), indent=2, default=str) + "\n"
114
+ raise ValueError(f"unknown format {fmt!r}") # pragma: no cover
115
+
116
+
117
+ def main(argv: Sequence[str] | None = None) -> int:
118
+ parser = build_parser()
119
+ args = parser.parse_args(argv)
120
+
121
+ if args.command == "compare":
122
+ try:
123
+ read_kwargs = {"delimiter": args.delimiter} if args.delimiter else {}
124
+ old_rows = read_table(args.old, **read_kwargs)
125
+ new_rows = read_table(args.new, **read_kwargs)
126
+ except (OSError, RuntimeError, ValueError, TypeError) as exc:
127
+ print(f"diffmonkey: error reading input: {exc}", file=sys.stderr)
128
+ return EXIT_ERROR
129
+
130
+ try:
131
+ result = compare(
132
+ old_rows,
133
+ new_rows,
134
+ key=_split_csv_opt(args.key),
135
+ columns=_split_csv_opt(args.columns),
136
+ ignore=_split_csv_opt(args.ignore),
137
+ column_map=_parse_column_map(args.map),
138
+ rel_tol=args.rel_tol,
139
+ abs_tol=args.abs_tol,
140
+ locale=args.locale,
141
+ type_aware=not args.no_type_aware,
142
+ null_equivalent=not args.no_null_equivalent,
143
+ include_unchanged=args.include_unchanged,
144
+ on_duplicate=args.on_duplicate,
145
+ on_missing_key=args.on_missing_key,
146
+ )
147
+ except (DiffMonkeyError, ValueError, TypeError) as exc:
148
+ print(f"diffmonkey: {exc}", file=sys.stderr)
149
+ return EXIT_ERROR
150
+
151
+ text = _render(result, args.format, include_unchanged=args.include_unchanged)
152
+ if args.output:
153
+ try:
154
+ with open(args.output, "w", encoding="utf-8", newline="") as fh:
155
+ fh.write(text)
156
+ except OSError as exc:
157
+ print(f"diffmonkey: error writing output: {exc}", file=sys.stderr)
158
+ return EXIT_ERROR
159
+ else:
160
+ sys.stdout.write(text)
161
+
162
+ return EXIT_DIFF if result.has_changes() else EXIT_NO_DIFF
163
+
164
+ return EXIT_ERROR # pragma: no cover - argparse enforces a command
165
+
166
+
167
+ if __name__ == "__main__": # pragma: no cover
168
+ raise SystemExit(main())