endnote-utils 0.1.3__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- endnote_utils/cli.py +186 -0
- endnote_utils/core.py +673 -0
- endnote_utils-0.2.0.dist-info/METADATA +223 -0
- endnote_utils-0.2.0.dist-info/RECORD +8 -0
- endnote_utils-0.2.0.dist-info/top_level.txt +1 -0
- endnote-utils/cli.py +0 -54
- endnote-utils/core.py +0 -209
- endnote_utils-0.1.3.dist-info/METADATA +0 -145
- endnote_utils-0.1.3.dist-info/RECORD +0 -8
- endnote_utils-0.1.3.dist-info/top_level.txt +0 -1
- {endnote-utils → endnote_utils}/__init__.py +0 -0
- {endnote_utils-0.1.3.dist-info → endnote_utils-0.2.0.dist-info}/WHEEL +0 -0
- {endnote_utils-0.1.3.dist-info → endnote_utils-0.2.0.dist-info}/entry_points.txt +0 -0
endnote_utils/cli.py
ADDED
@@ -0,0 +1,186 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
import argparse
|
4
|
+
import logging
|
5
|
+
import sys
|
6
|
+
from pathlib import Path
|
7
|
+
from typing import List, Optional, Tuple
|
8
|
+
|
9
|
+
from .core import (
|
10
|
+
DEFAULT_FIELDNAMES,
|
11
|
+
export_files_with_report, # generic writer: csv/json/xlsx
|
12
|
+
)
|
13
|
+
|
14
|
+
SUPPORTED_FORMATS = ("csv", "json", "xlsx")
|
15
|
+
EXT_TO_FORMAT = {".csv": "csv", ".json": "json", ".xlsx": "xlsx"}
|
16
|
+
|
17
|
+
|
18
|
+
def build_parser() -> argparse.ArgumentParser:
|
19
|
+
p = argparse.ArgumentParser(
|
20
|
+
description="Export EndNote XML (file or folder) to CSV/JSON/XLSX with a TXT report."
|
21
|
+
)
|
22
|
+
|
23
|
+
# Input source (mutually exclusive)
|
24
|
+
g = p.add_mutually_exclusive_group(required=True)
|
25
|
+
g.add_argument("--xml", help="Path to a single EndNote XML file.")
|
26
|
+
g.add_argument("--folder", help="Path to a folder containing *.xml files.")
|
27
|
+
|
28
|
+
# Output selection (CSV legacy flag + new generic flags)
|
29
|
+
p.add_argument(
|
30
|
+
"--csv",
|
31
|
+
required=False,
|
32
|
+
help="(Legacy) Output CSV path. Prefer --out for csv/json/xlsx.",
|
33
|
+
)
|
34
|
+
p.add_argument(
|
35
|
+
"--out",
|
36
|
+
required=False,
|
37
|
+
help="Generic output path; format inferred from file extension if --format not provided. "
|
38
|
+
"Supported extensions: .csv, .json, .xlsx",
|
39
|
+
)
|
40
|
+
p.add_argument(
|
41
|
+
"--format",
|
42
|
+
choices=SUPPORTED_FORMATS,
|
43
|
+
help="Output format. If omitted, inferred from --out extension or --csv.",
|
44
|
+
)
|
45
|
+
|
46
|
+
# Report controls
|
47
|
+
p.add_argument("--report", required=False, help="Path to TXT report (default: <output>_report.txt).")
|
48
|
+
p.add_argument(
|
49
|
+
"--no-report",
|
50
|
+
action="store_true",
|
51
|
+
help="Disable writing the TXT report (by default, a report is always generated).",
|
52
|
+
)
|
53
|
+
|
54
|
+
# CSV-specific formatting options (ignored for JSON/XLSX except delimiter/quoting/header)
|
55
|
+
p.add_argument("--delimiter", default=",", help="CSV delimiter (default: ',').")
|
56
|
+
p.add_argument(
|
57
|
+
"--quoting",
|
58
|
+
default="minimal",
|
59
|
+
choices=["minimal", "all", "nonnumeric", "none"],
|
60
|
+
help="CSV quoting mode (default: minimal).",
|
61
|
+
)
|
62
|
+
p.add_argument("--no-header", action="store_true", help="Do not write CSV header row.")
|
63
|
+
p.add_argument("--encoding", default="utf-8", help="Output text encoding (default: utf-8).")
|
64
|
+
|
65
|
+
# Filters / limits
|
66
|
+
p.add_argument("--ref-type", default=None, help="Filter by ref_type name.")
|
67
|
+
p.add_argument("--year", default=None, help="Filter by year.")
|
68
|
+
p.add_argument("--max-records", type=int, default=None, help="Max records per file (testing).")
|
69
|
+
|
70
|
+
# Deduplication & Stats
|
71
|
+
p.add_argument("--dedupe", choices=["none", "doi", "title-year"], default="none",
|
72
|
+
help="Deduplicate records by key. Default: none.")
|
73
|
+
p.add_argument("--dedupe-keep", choices=["first", "last"], default="first",
|
74
|
+
help="When duplicates found, keep the first or last occurrence. Default: first.")
|
75
|
+
p.add_argument("--stats", action="store_true",
|
76
|
+
help="Compute summary stats and include them in the TXT report.")
|
77
|
+
p.add_argument("--stats-json",
|
78
|
+
help="Optional JSON file path to write detailed stats (when --stats is used).")
|
79
|
+
p.add_argument("--top-authors", type=int, default=10,
|
80
|
+
help="How many top authors to list in the report/stats JSON. Default: 10.")
|
81
|
+
|
82
|
+
# Verbosity
|
83
|
+
p.add_argument("--verbose", action="store_true", help="Verbose logging.")
|
84
|
+
|
85
|
+
return p
|
86
|
+
|
87
|
+
|
88
|
+
def _resolve_inputs(args: argparse.Namespace) -> List[Path]:
|
89
|
+
if args.xml:
|
90
|
+
xml_path = Path(args.xml)
|
91
|
+
if not xml_path.is_file():
|
92
|
+
raise FileNotFoundError(xml_path)
|
93
|
+
return [xml_path]
|
94
|
+
|
95
|
+
folder = Path(args.folder)
|
96
|
+
if not folder.is_dir():
|
97
|
+
raise FileNotFoundError(folder)
|
98
|
+
inputs = sorted(p for p in folder.glob("*.xml") if p.is_file())
|
99
|
+
if not inputs:
|
100
|
+
raise FileNotFoundError(f"No *.xml files found in folder: {folder}")
|
101
|
+
return inputs
|
102
|
+
|
103
|
+
|
104
|
+
def _resolve_output_and_format(args: argparse.Namespace) -> tuple[Path, str, Optional[Path]]:
|
105
|
+
"""
|
106
|
+
Decide final out_path, out_format, and report_path using:
|
107
|
+
- Prefer --out/--format if provided
|
108
|
+
- Fallback to --csv (legacy) which implies CSV
|
109
|
+
- If --no-report, return report_path=None
|
110
|
+
"""
|
111
|
+
target_path: Optional[Path] = None
|
112
|
+
out_format: Optional[str] = None
|
113
|
+
|
114
|
+
if args.out:
|
115
|
+
target_path = Path(args.out)
|
116
|
+
out_format = args.format
|
117
|
+
if not out_format:
|
118
|
+
# infer from extension
|
119
|
+
out_format = EXT_TO_FORMAT.get(target_path.suffix.lower())
|
120
|
+
if not out_format:
|
121
|
+
raise SystemExit(
|
122
|
+
"Cannot infer output format from extension. "
|
123
|
+
"Use --format {csv,json,xlsx} or set a supported extension."
|
124
|
+
)
|
125
|
+
elif args.csv:
|
126
|
+
target_path = Path(args.csv)
|
127
|
+
out_format = args.format or "csv"
|
128
|
+
if out_format != "csv":
|
129
|
+
# user asked for non-csv but used --csv path
|
130
|
+
raise SystemExit("When using --csv, --format must be 'csv'. Use --out for json/xlsx.")
|
131
|
+
else:
|
132
|
+
raise SystemExit("You must provide either --out (preferred) or --csv (legacy).")
|
133
|
+
|
134
|
+
# Report path defaults next to chosen output file (unless disabled)
|
135
|
+
if args.no_report:
|
136
|
+
report_path: Optional[Path] = None
|
137
|
+
else:
|
138
|
+
report_path = Path(args.report) if args.report else target_path.with_name(target_path.stem + "_report.txt")
|
139
|
+
|
140
|
+
return target_path, out_format, report_path
|
141
|
+
|
142
|
+
|
143
|
+
def main() -> None:
|
144
|
+
args = build_parser().parse_args()
|
145
|
+
logging.basicConfig(
|
146
|
+
level=logging.DEBUG if args.verbose else logging.INFO,
|
147
|
+
format="%(levelname)s: %(message)s",
|
148
|
+
stream=sys.stderr,
|
149
|
+
)
|
150
|
+
|
151
|
+
try:
|
152
|
+
inputs = _resolve_inputs(args)
|
153
|
+
out_path, out_format, report_path = _resolve_output_and_format(args)
|
154
|
+
|
155
|
+
total, final_out, final_report = export_files_with_report(
|
156
|
+
inputs=inputs,
|
157
|
+
out_path=out_path,
|
158
|
+
out_format=out_format,
|
159
|
+
fieldnames=DEFAULT_FIELDNAMES,
|
160
|
+
delimiter=args.delimiter,
|
161
|
+
quoting=args.quoting,
|
162
|
+
include_header=not args.no_header,
|
163
|
+
encoding=args.encoding,
|
164
|
+
ref_type=args.ref_type,
|
165
|
+
year=args.year,
|
166
|
+
max_records_per_file=args.max_records,
|
167
|
+
dedupe=args.dedupe,
|
168
|
+
dedupe_keep=args.dedupe_keep,
|
169
|
+
stats=args.stats,
|
170
|
+
stats_json=Path(args.stats_json) if args.stats_json else None,
|
171
|
+
top_authors=args.top_authors,
|
172
|
+
report_path=report_path, # may be None → core should skip writing report
|
173
|
+
)
|
174
|
+
|
175
|
+
logging.info("Exported %d record(s) → %s", total, final_out)
|
176
|
+
if report_path is None:
|
177
|
+
logging.info("Report disabled by --no-report.")
|
178
|
+
else:
|
179
|
+
logging.info("Report → %s", final_report)
|
180
|
+
|
181
|
+
except FileNotFoundError as e:
|
182
|
+
logging.error("File/folder not found: %s", e)
|
183
|
+
sys.exit(1)
|
184
|
+
except Exception as e:
|
185
|
+
logging.error("Unexpected error: %s", e)
|
186
|
+
sys.exit(2)
|