endnote-utils 0.1.3__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
endnote_utils/cli.py ADDED
@@ -0,0 +1,186 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import logging
5
+ import sys
6
+ from pathlib import Path
7
+ from typing import List, Optional, Tuple
8
+
9
+ from .core import (
10
+ DEFAULT_FIELDNAMES,
11
+ export_files_with_report, # generic writer: csv/json/xlsx
12
+ )
13
+
14
+ SUPPORTED_FORMATS = ("csv", "json", "xlsx")
15
+ EXT_TO_FORMAT = {".csv": "csv", ".json": "json", ".xlsx": "xlsx"}
16
+
17
+
18
+ def build_parser() -> argparse.ArgumentParser:
19
+ p = argparse.ArgumentParser(
20
+ description="Export EndNote XML (file or folder) to CSV/JSON/XLSX with a TXT report."
21
+ )
22
+
23
+ # Input source (mutually exclusive)
24
+ g = p.add_mutually_exclusive_group(required=True)
25
+ g.add_argument("--xml", help="Path to a single EndNote XML file.")
26
+ g.add_argument("--folder", help="Path to a folder containing *.xml files.")
27
+
28
+ # Output selection (CSV legacy flag + new generic flags)
29
+ p.add_argument(
30
+ "--csv",
31
+ required=False,
32
+ help="(Legacy) Output CSV path. Prefer --out for csv/json/xlsx.",
33
+ )
34
+ p.add_argument(
35
+ "--out",
36
+ required=False,
37
+ help="Generic output path; format inferred from file extension if --format not provided. "
38
+ "Supported extensions: .csv, .json, .xlsx",
39
+ )
40
+ p.add_argument(
41
+ "--format",
42
+ choices=SUPPORTED_FORMATS,
43
+ help="Output format. If omitted, inferred from --out extension or --csv.",
44
+ )
45
+
46
+ # Report controls
47
+ p.add_argument("--report", required=False, help="Path to TXT report (default: <output>_report.txt).")
48
+ p.add_argument(
49
+ "--no-report",
50
+ action="store_true",
51
+ help="Disable writing the TXT report (by default, a report is always generated).",
52
+ )
53
+
54
+ # CSV-specific formatting options (ignored for JSON/XLSX except delimiter/quoting/header)
55
+ p.add_argument("--delimiter", default=",", help="CSV delimiter (default: ',').")
56
+ p.add_argument(
57
+ "--quoting",
58
+ default="minimal",
59
+ choices=["minimal", "all", "nonnumeric", "none"],
60
+ help="CSV quoting mode (default: minimal).",
61
+ )
62
+ p.add_argument("--no-header", action="store_true", help="Do not write CSV header row.")
63
+ p.add_argument("--encoding", default="utf-8", help="Output text encoding (default: utf-8).")
64
+
65
+ # Filters / limits
66
+ p.add_argument("--ref-type", default=None, help="Filter by ref_type name.")
67
+ p.add_argument("--year", default=None, help="Filter by year.")
68
+ p.add_argument("--max-records", type=int, default=None, help="Max records per file (testing).")
69
+
70
+ # Deduplication & Stats
71
+ p.add_argument("--dedupe", choices=["none", "doi", "title-year"], default="none",
72
+ help="Deduplicate records by key. Default: none.")
73
+ p.add_argument("--dedupe-keep", choices=["first", "last"], default="first",
74
+ help="When duplicates found, keep the first or last occurrence. Default: first.")
75
+ p.add_argument("--stats", action="store_true",
76
+ help="Compute summary stats and include them in the TXT report.")
77
+ p.add_argument("--stats-json",
78
+ help="Optional JSON file path to write detailed stats (when --stats is used).")
79
+ p.add_argument("--top-authors", type=int, default=10,
80
+ help="How many top authors to list in the report/stats JSON. Default: 10.")
81
+
82
+ # Verbosity
83
+ p.add_argument("--verbose", action="store_true", help="Verbose logging.")
84
+
85
+ return p
86
+
87
+
88
+ def _resolve_inputs(args: argparse.Namespace) -> List[Path]:
89
+ if args.xml:
90
+ xml_path = Path(args.xml)
91
+ if not xml_path.is_file():
92
+ raise FileNotFoundError(xml_path)
93
+ return [xml_path]
94
+
95
+ folder = Path(args.folder)
96
+ if not folder.is_dir():
97
+ raise FileNotFoundError(folder)
98
+ inputs = sorted(p for p in folder.glob("*.xml") if p.is_file())
99
+ if not inputs:
100
+ raise FileNotFoundError(f"No *.xml files found in folder: {folder}")
101
+ return inputs
102
+
103
+
104
+ def _resolve_output_and_format(args: argparse.Namespace) -> tuple[Path, str, Optional[Path]]:
105
+ """
106
+ Decide final out_path, out_format, and report_path using:
107
+ - Prefer --out/--format if provided
108
+ - Fallback to --csv (legacy) which implies CSV
109
+ - If --no-report, return report_path=None
110
+ """
111
+ target_path: Optional[Path] = None
112
+ out_format: Optional[str] = None
113
+
114
+ if args.out:
115
+ target_path = Path(args.out)
116
+ out_format = args.format
117
+ if not out_format:
118
+ # infer from extension
119
+ out_format = EXT_TO_FORMAT.get(target_path.suffix.lower())
120
+ if not out_format:
121
+ raise SystemExit(
122
+ "Cannot infer output format from extension. "
123
+ "Use --format {csv,json,xlsx} or set a supported extension."
124
+ )
125
+ elif args.csv:
126
+ target_path = Path(args.csv)
127
+ out_format = args.format or "csv"
128
+ if out_format != "csv":
129
+ # user asked for non-csv but used --csv path
130
+ raise SystemExit("When using --csv, --format must be 'csv'. Use --out for json/xlsx.")
131
+ else:
132
+ raise SystemExit("You must provide either --out (preferred) or --csv (legacy).")
133
+
134
+ # Report path defaults next to chosen output file (unless disabled)
135
+ if args.no_report:
136
+ report_path: Optional[Path] = None
137
+ else:
138
+ report_path = Path(args.report) if args.report else target_path.with_name(target_path.stem + "_report.txt")
139
+
140
+ return target_path, out_format, report_path
141
+
142
+
143
+ def main() -> None:
144
+ args = build_parser().parse_args()
145
+ logging.basicConfig(
146
+ level=logging.DEBUG if args.verbose else logging.INFO,
147
+ format="%(levelname)s: %(message)s",
148
+ stream=sys.stderr,
149
+ )
150
+
151
+ try:
152
+ inputs = _resolve_inputs(args)
153
+ out_path, out_format, report_path = _resolve_output_and_format(args)
154
+
155
+ total, final_out, final_report = export_files_with_report(
156
+ inputs=inputs,
157
+ out_path=out_path,
158
+ out_format=out_format,
159
+ fieldnames=DEFAULT_FIELDNAMES,
160
+ delimiter=args.delimiter,
161
+ quoting=args.quoting,
162
+ include_header=not args.no_header,
163
+ encoding=args.encoding,
164
+ ref_type=args.ref_type,
165
+ year=args.year,
166
+ max_records_per_file=args.max_records,
167
+ dedupe=args.dedupe,
168
+ dedupe_keep=args.dedupe_keep,
169
+ stats=args.stats,
170
+ stats_json=Path(args.stats_json) if args.stats_json else None,
171
+ top_authors=args.top_authors,
172
+ report_path=report_path, # may be None → core should skip writing report
173
+ )
174
+
175
+ logging.info("Exported %d record(s) → %s", total, final_out)
176
+ if report_path is None:
177
+ logging.info("Report disabled by --no-report.")
178
+ else:
179
+ logging.info("Report → %s", final_report)
180
+
181
+ except FileNotFoundError as e:
182
+ logging.error("File/folder not found: %s", e)
183
+ sys.exit(1)
184
+ except Exception as e:
185
+ logging.error("Unexpected error: %s", e)
186
+ sys.exit(2)