poco-harmonizer 0.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- poco_harmonizer-0.0.2/PKG-INFO +430 -0
- poco_harmonizer-0.0.2/README.md +412 -0
- poco_harmonizer-0.0.2/poco_harmonizer.egg-info/PKG-INFO +430 -0
- poco_harmonizer-0.0.2/poco_harmonizer.egg-info/SOURCES.txt +72 -0
- poco_harmonizer-0.0.2/poco_harmonizer.egg-info/dependency_links.txt +1 -0
- poco_harmonizer-0.0.2/poco_harmonizer.egg-info/entry_points.txt +3 -0
- poco_harmonizer-0.0.2/poco_harmonizer.egg-info/requires.txt +11 -0
- poco_harmonizer-0.0.2/poco_harmonizer.egg-info/top_level.txt +1 -0
- poco_harmonizer-0.0.2/pyproject.toml +39 -0
- poco_harmonizer-0.0.2/refharmonizer/__init__.py +10 -0
- poco_harmonizer-0.0.2/refharmonizer/api/__init__.py +6 -0
- poco_harmonizer-0.0.2/refharmonizer/api/app.py +1020 -0
- poco_harmonizer-0.0.2/refharmonizer/api/sessions.py +66 -0
- poco_harmonizer-0.0.2/refharmonizer/cli.py +121 -0
- poco_harmonizer-0.0.2/refharmonizer/core/__init__.py +7 -0
- poco_harmonizer-0.0.2/refharmonizer/core/adapters/__init__.py +12 -0
- poco_harmonizer-0.0.2/refharmonizer/core/adapters/base.py +36 -0
- poco_harmonizer-0.0.2/refharmonizer/core/adapters/csljson.py +69 -0
- poco_harmonizer-0.0.2/refharmonizer/core/adapters/endnote_xml.py +282 -0
- poco_harmonizer-0.0.2/refharmonizer/core/adapters/zotero_local_api.py +69 -0
- poco_harmonizer-0.0.2/refharmonizer/core/api_clients/__init__.py +6 -0
- poco_harmonizer-0.0.2/refharmonizer/core/api_clients/cache.py +94 -0
- poco_harmonizer-0.0.2/refharmonizer/core/api_clients/crossref.py +173 -0
- poco_harmonizer-0.0.2/refharmonizer/core/api_clients/openalex.py +105 -0
- poco_harmonizer-0.0.2/refharmonizer/core/api_clients/openlibrary.py +144 -0
- poco_harmonizer-0.0.2/refharmonizer/core/audit.py +68 -0
- poco_harmonizer-0.0.2/refharmonizer/core/backup.py +18 -0
- poco_harmonizer-0.0.2/refharmonizer/core/detect.py +153 -0
- poco_harmonizer-0.0.2/refharmonizer/core/diff.py +76 -0
- poco_harmonizer-0.0.2/refharmonizer/core/discover.py +187 -0
- poco_harmonizer-0.0.2/refharmonizer/core/enrich.py +1051 -0
- poco_harmonizer-0.0.2/refharmonizer/core/invariants.py +112 -0
- poco_harmonizer-0.0.2/refharmonizer/core/manifest.py +84 -0
- poco_harmonizer-0.0.2/refharmonizer/core/matcher.py +173 -0
- poco_harmonizer-0.0.2/refharmonizer/core/model.py +98 -0
- poco_harmonizer-0.0.2/refharmonizer/core/normalize.py +200 -0
- poco_harmonizer-0.0.2/refharmonizer/core/patch.py +153 -0
- poco_harmonizer-0.0.2/refharmonizer/core/patch_apply.py +58 -0
- poco_harmonizer-0.0.2/refharmonizer/core/pipeline.py +230 -0
- poco_harmonizer-0.0.2/refharmonizer/core/prefs_store.py +65 -0
- poco_harmonizer-0.0.2/refharmonizer/core/preview.py +96 -0
- poco_harmonizer-0.0.2/refharmonizer/core/render.py +94 -0
- poco_harmonizer-0.0.2/refharmonizer/core/ris.py +167 -0
- poco_harmonizer-0.0.2/refharmonizer/core/titlecase.py +147 -0
- poco_harmonizer-0.0.2/refharmonizer/core/tune.py +425 -0
- poco_harmonizer-0.0.2/refharmonizer/data/journals_ltwa.json +25 -0
- poco_harmonizer-0.0.2/refharmonizer/data/sample_library.json +255 -0
- poco_harmonizer-0.0.2/refharmonizer/launch.py +50 -0
- poco_harmonizer-0.0.2/refharmonizer/tests/__init__.py +0 -0
- poco_harmonizer-0.0.2/refharmonizer/tests/conftest.py +47 -0
- poco_harmonizer-0.0.2/refharmonizer/tests/test_api_category_previews.py +161 -0
- poco_harmonizer-0.0.2/refharmonizer/tests/test_author_completion.py +190 -0
- poco_harmonizer-0.0.2/refharmonizer/tests/test_cache_reparse.py +37 -0
- poco_harmonizer-0.0.2/refharmonizer/tests/test_cli_export.py +32 -0
- poco_harmonizer-0.0.2/refharmonizer/tests/test_discovery.py +278 -0
- poco_harmonizer-0.0.2/refharmonizer/tests/test_endnote_adapter.py +73 -0
- poco_harmonizer-0.0.2/refharmonizer/tests/test_enrich_completion.py +170 -0
- poco_harmonizer-0.0.2/refharmonizer/tests/test_entity_decoding.py +47 -0
- poco_harmonizer-0.0.2/refharmonizer/tests/test_invariants.py +57 -0
- poco_harmonizer-0.0.2/refharmonizer/tests/test_manual_edit.py +149 -0
- poco_harmonizer-0.0.2/refharmonizer/tests/test_matcher.py +94 -0
- poco_harmonizer-0.0.2/refharmonizer/tests/test_pipeline.py +99 -0
- poco_harmonizer-0.0.2/refharmonizer/tests/test_prefs_store.py +48 -0
- poco_harmonizer-0.0.2/refharmonizer/tests/test_review_preview.py +63 -0
- poco_harmonizer-0.0.2/refharmonizer/tests/test_ris_export.py +226 -0
- poco_harmonizer-0.0.2/refharmonizer/tests/test_tune_preferences.py +516 -0
- poco_harmonizer-0.0.2/refharmonizer/tests/test_v1_features.py +117 -0
- poco_harmonizer-0.0.2/refharmonizer/webui/assets/index-CerAaclY.js +9 -0
- poco_harmonizer-0.0.2/refharmonizer/webui/assets/index-jCF1P51-.css +1 -0
- poco_harmonizer-0.0.2/refharmonizer/webui/assets/localApi-BGCbXgya.js +3 -0
- poco_harmonizer-0.0.2/refharmonizer/webui/favicon.svg +1 -0
- poco_harmonizer-0.0.2/refharmonizer/webui/icons.svg +24 -0
- poco_harmonizer-0.0.2/refharmonizer/webui/index.html +20 -0
- poco_harmonizer-0.0.2/setup.cfg +4 -0
|
@@ -0,0 +1,430 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: poco-harmonizer
|
|
3
|
+
Version: 0.0.2
|
|
4
|
+
Summary: PoCo — trust-first, deterministic reference library harmonizer (no LLM).
|
|
5
|
+
License: Apache-2.0
|
|
6
|
+
Requires-Python: >=3.9
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: fastapi>=0.110
|
|
9
|
+
Requires-Dist: uvicorn[standard]>=0.29
|
|
10
|
+
Requires-Dist: python-multipart>=0.0.9
|
|
11
|
+
Requires-Dist: pydantic>=2.6
|
|
12
|
+
Requires-Dist: habanero>=1.2.6
|
|
13
|
+
Requires-Dist: lxml>=5.0
|
|
14
|
+
Requires-Dist: httpx>=0.27
|
|
15
|
+
Provides-Extra: dev
|
|
16
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
17
|
+
Requires-Dist: hypothesis>=6.100; extra == "dev"
|
|
18
|
+
|
|
19
|
+
# PoCo - Polish & Complete
|
|
20
|
+
|
|
21
|
+
PoCo is a local-first tool for cleaning and completing Zotero and EndNote
|
|
22
|
+
reference libraries.
|
|
23
|
+
|
|
24
|
+
It helps you turn an inconsistent reference export into a cleaner copy: consistent
|
|
25
|
+
journal names, safer DOI formatting, completed author names, page ranges,
|
|
26
|
+
publication details, book metadata, and a transparent audit trail of what changed.
|
|
27
|
+
|
|
28
|
+
The important part: **you stay in control**. PoCo shows every proposed change
|
|
29
|
+
before export, lets you reject or edit suggestions, and never modifies your
|
|
30
|
+
original library file.
|
|
31
|
+
|
|
32
|
+
PoCo does not use an LLM or generative AI. The engine is deterministic: each
|
|
33
|
+
suggestion comes from a scoped rule, your chosen preferences, a local lookup
|
|
34
|
+
table, or source-backed metadata from public services such as CrossRef, OpenAlex,
|
|
35
|
+
and Open Library.
|
|
36
|
+
|
|
37
|
+
## Quick Start
|
|
38
|
+
|
|
39
|
+
PoCo runs on your own computer and opens in your browser. You install it once
|
|
40
|
+
from a terminal, then start it any time by typing `poco`. The steps below set up
|
|
41
|
+
everything from scratch — no prior tools needed.
|
|
42
|
+
|
|
43
|
+
> **Before PoCo's first PyPI release**, replace `pipx install poco-harmonizer`
|
|
44
|
+
> in the steps below with:
|
|
45
|
+
> `pipx install git+https://github.com/Rkn12345/poco-reference-harmonizer.git`
|
|
46
|
+
|
|
47
|
+
### Windows
|
|
48
|
+
|
|
49
|
+
1. **Install Python.** Download it from
|
|
50
|
+
[python.org/downloads](https://www.python.org/downloads/) and run the
|
|
51
|
+
installer. On the first screen, **tick "Add python.exe to PATH"**, then click
|
|
52
|
+
*Install Now*. (This checkbox is easy to miss and everything else depends on
|
|
53
|
+
it.)
|
|
54
|
+
2. **Open a terminal.** Click Start, type `PowerShell`, and press Enter.
|
|
55
|
+
3. **Install pipx** (a small helper that installs apps like PoCo cleanly). Paste
|
|
56
|
+
these lines one at a time:
|
|
57
|
+
```powershell
|
|
58
|
+
py -m pip install --user pipx
|
|
59
|
+
py -m pipx ensurepath
|
|
60
|
+
```
|
|
61
|
+
Then **close PowerShell and open it again** so it picks up the change.
|
|
62
|
+
4. **Install PoCo:**
|
|
63
|
+
```powershell
|
|
64
|
+
pipx install poco-harmonizer
|
|
65
|
+
```
|
|
66
|
+
5. **Start PoCo:**
|
|
67
|
+
```powershell
|
|
68
|
+
poco
|
|
69
|
+
```
|
|
70
|
+
Your browser opens with PoCo running. To stop it, go back to PowerShell and
|
|
71
|
+
press `Ctrl+C`.
|
|
72
|
+
|
|
73
|
+
### macOS
|
|
74
|
+
|
|
75
|
+
1. **Open the Terminal.** Press `Cmd+Space`, type `Terminal`, and press Enter.
|
|
76
|
+
2. **Install Homebrew** if you don't already have it — it sets up Python and pipx
|
|
77
|
+
for you. Paste this line and follow the prompts (skip if you already have
|
|
78
|
+
Homebrew):
|
|
79
|
+
```bash
|
|
80
|
+
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
|
|
81
|
+
```
|
|
82
|
+
3. **Install pipx:**
|
|
83
|
+
```bash
|
|
84
|
+
brew install pipx
|
|
85
|
+
pipx ensurepath
|
|
86
|
+
```
|
|
87
|
+
Then **close the Terminal and open it again**.
|
|
88
|
+
4. **Install PoCo:**
|
|
89
|
+
```bash
|
|
90
|
+
pipx install poco-harmonizer
|
|
91
|
+
```
|
|
92
|
+
5. **Start PoCo:**
|
|
93
|
+
```bash
|
|
94
|
+
poco
|
|
95
|
+
```
|
|
96
|
+
Your browser opens with PoCo running. To stop it, go back to the Terminal and
|
|
97
|
+
press `Ctrl+C`.
|
|
98
|
+
|
|
99
|
+
After the first setup, you only ever need the last step — just type `poco` to
|
|
100
|
+
start it again.
|
|
101
|
+
|
|
102
|
+
## Why You Can Trust It
|
|
103
|
+
|
|
104
|
+
PoCo is built so you don't have to take any of the claims above on faith — each
|
|
105
|
+
one is checkable:
|
|
106
|
+
|
|
107
|
+
- **It's open source (Apache-2.0).** The whole engine and rule set are in this
|
|
108
|
+
repository. Nothing about how a change is decided is hidden.
|
|
109
|
+
- **No LLM, no guessing.** Every change traces to a named rule, your preference,
|
|
110
|
+
a local table, or a cited public source — see [`refharmonizer/core/`](refharmonizer/core/).
|
|
111
|
+
- **Your originals are never touched.** PoCo reads your file and writes a *new*
|
|
112
|
+
copy; the input bytes are hashed into the run manifest so you can prove it.
|
|
113
|
+
- **Nothing is exported without your confirmation.** Review is a hard gate, and
|
|
114
|
+
editing any decision resets it (see [Confirm](#5-confirm)).
|
|
115
|
+
- **It runs on your machine.** No accounts, analytics, or telemetry. Offline mode
|
|
116
|
+
sends zero network requests; online enrichment only sends the lookup fields
|
|
117
|
+
documented in [Privacy And Control](#privacy-and-control).
|
|
118
|
+
- **Every run is reproducible.** A manifest records input/output hashes, engine
|
|
119
|
+
and table versions, the sources queried, and the disposition of every change.
|
|
120
|
+
- **It's honest about its limits.** See [Known Limitations](#known-limitations)
|
|
121
|
+
rather than discovering them yourself.
|
|
122
|
+
|
|
123
|
+
## What You Can Do With It
|
|
124
|
+
|
|
125
|
+
- Import a Zotero CSL-JSON export, an EndNote XML export, the bundled sample
|
|
126
|
+
library, or a read-only Zotero local API session.
|
|
127
|
+
- Choose how you want references to look before analysis: author names,
|
|
128
|
+
journal-name style, DOI style, page ranges, title casing, publisher style,
|
|
129
|
+
volume/issue completion, journal capitalization, and protected terms.
|
|
130
|
+
- Analyze the library for existing conventions and possible improvements.
|
|
131
|
+
- Review proposed changes grouped by category: titles, journal names, authors,
|
|
132
|
+
identifiers, item types, publication details, book metadata, text cleanup, and
|
|
133
|
+
your manual edits.
|
|
134
|
+
- Accept, reject, or edit suggestions before export.
|
|
135
|
+
- Add your own manual corrections for key fields such as title, journal title,
|
|
136
|
+
DOI, pages, volume, issue, publisher, ISSN, ISBN, and year.
|
|
137
|
+
- Export a polished copy plus audit artifacts that explain the run.
|
|
138
|
+
|
|
139
|
+
## How The Workflow Works
|
|
140
|
+
|
|
141
|
+
### 1. Import
|
|
142
|
+
|
|
143
|
+
Start with a reference export from Zotero or EndNote, or use the bundled sample
|
|
144
|
+
library.
|
|
145
|
+
|
|
146
|
+
PoCo keeps the original input intact. Internally, it builds an analysis view and
|
|
147
|
+
stores the original records separately so accepted changes can be applied back
|
|
148
|
+
onto a copy at export time.
|
|
149
|
+
|
|
150
|
+
Supported inputs today:
|
|
151
|
+
|
|
152
|
+
- Zotero CSL-JSON export
|
|
153
|
+
- EndNote XML export
|
|
154
|
+
- Zotero local API read from `localhost:23119`
|
|
155
|
+
- Bundled sample library for trying the app immediately
|
|
156
|
+
|
|
157
|
+
### 2. Tune
|
|
158
|
+
|
|
159
|
+
Before running the engine, you choose the conventions you want PoCo to follow.
|
|
160
|
+
For example:
|
|
161
|
+
|
|
162
|
+
- full author names or initials
|
|
163
|
+
- full or abbreviated journal names
|
|
164
|
+
- bare DOI or DOI URL
|
|
165
|
+
- expanded or abbreviated page ranges
|
|
166
|
+
- full or abbreviated publisher names
|
|
167
|
+
- sentence case or headline case titles
|
|
168
|
+
- whether to complete volume/issue fields
|
|
169
|
+
|
|
170
|
+
These choices are not hidden defaults. They become part of the run configuration
|
|
171
|
+
and are recorded in the manifest.
|
|
172
|
+
|
|
173
|
+
### 3. Detect And Enrich
|
|
174
|
+
|
|
175
|
+
PoCo scans the library to detect existing conventions and identify records that
|
|
176
|
+
may be improved.
|
|
177
|
+
|
|
178
|
+
It can:
|
|
179
|
+
|
|
180
|
+
- normalize DOI format and safe text hygiene
|
|
181
|
+
- detect title-case, journal-name, and page-range conventions
|
|
182
|
+
- complete missing or abbreviated author names when source evidence is available
|
|
183
|
+
- complete pages, volume, issue, dates, ISSN/ISBN, and publisher fields
|
|
184
|
+
- discover missing DOIs through CrossRef bibliographic search when enabled
|
|
185
|
+
- flag exact-DOI duplicates
|
|
186
|
+
- flag retraction or correction notices when source metadata exposes them
|
|
187
|
+
- use local journal tables when offline mode is enabled
|
|
188
|
+
|
|
189
|
+
Every suggested change is represented as a patch with a source, confidence tier,
|
|
190
|
+
category, evidence, and before/after value.
|
|
191
|
+
|
|
192
|
+
### 4. Review
|
|
193
|
+
|
|
194
|
+
The review step is the main safety gate.
|
|
195
|
+
|
|
196
|
+
PoCo groups changes into readable sections and shows each proposed change in
|
|
197
|
+
context. You can:
|
|
198
|
+
|
|
199
|
+
- accept a suggestion
|
|
200
|
+
- reject a suggestion
|
|
201
|
+
- edit the suggested value
|
|
202
|
+
- inspect source evidence
|
|
203
|
+
- see conflicts when metadata sources disagree
|
|
204
|
+
- see which records were left unchanged and why
|
|
205
|
+
|
|
206
|
+
Rejecting a discovered DOI also rejects the changes that depended on that DOI.
|
|
207
|
+
Manual edits are logged like every other change and apply last, so your value
|
|
208
|
+
wins.
|
|
209
|
+
|
|
210
|
+
### 5. Confirm
|
|
211
|
+
|
|
212
|
+
Export is blocked until you confirm the review.
|
|
213
|
+
|
|
214
|
+
If you change an accept/reject decision or edit a value, the confirmation gate is
|
|
215
|
+
reset. This makes it difficult to accidentally export a library after changing
|
|
216
|
+
the review state.
|
|
217
|
+
|
|
218
|
+
### 6. Export
|
|
219
|
+
|
|
220
|
+
PoCo exports a new file. It does not edit your original library.
|
|
221
|
+
|
|
222
|
+
Export includes:
|
|
223
|
+
|
|
224
|
+
- a polished library file (`poco_library.json` for Zotero, `poco_library.ris`
|
|
225
|
+
for EndNote)
|
|
226
|
+
- an audit log as CSV
|
|
227
|
+
- an audit log as JSON
|
|
228
|
+
- an audit log as HTML
|
|
229
|
+
- a run manifest as JSON
|
|
230
|
+
|
|
231
|
+
For Zotero, import the polished CSL-JSON into a new collection.
|
|
232
|
+
|
|
233
|
+
For EndNote, the polished library is exported as **RIS** — EndNote's own XML
|
|
234
|
+
re-import is unreliable, whereas RIS imports dependably through the built-in
|
|
235
|
+
*Reference Manager (RIS)* filter (`File > Import`). Import it into a new, empty
|
|
236
|
+
library or group to avoid duplicates. RIS does not carry EndNote's `rec-number`,
|
|
237
|
+
so it does not round-trip in place; the audit log records the `rec-number` ->
|
|
238
|
+
change mapping. The original EndNote XML is still available as an advanced
|
|
239
|
+
download for anyone who prefers it.
|
|
240
|
+
|
|
241
|
+
## Privacy And Control
|
|
242
|
+
|
|
243
|
+
PoCo is designed as a local-first tool.
|
|
244
|
+
|
|
245
|
+
- Your library is processed on your machine.
|
|
246
|
+
- The local app keeps session data in memory while it is running.
|
|
247
|
+
- PoCo does not provide accounts, analytics, telemetry, or a hosted database in
|
|
248
|
+
the local version.
|
|
249
|
+
- Your original library file is never modified.
|
|
250
|
+
- Export only happens after you confirm the review.
|
|
251
|
+
- Offline mode disables public metadata lookups and uses bundled/local data.
|
|
252
|
+
|
|
253
|
+
When online enrichment is enabled, PoCo may query public metadata services such
|
|
254
|
+
as CrossRef, OpenAlex, and Open Library. Depending on the record, those requests
|
|
255
|
+
may use identifiers such as DOI, ISSN, or ISBN, or bibliographic search fields
|
|
256
|
+
such as title, author, and year for DOI discovery.
|
|
257
|
+
|
|
258
|
+
PoCo may also keep local convenience files on your own machine:
|
|
259
|
+
|
|
260
|
+
- API response cache: `~/.cache/refharmonizer/api_cache.sqlite`
|
|
261
|
+
- saved Tune preferences and protected terms:
|
|
262
|
+
`~/.cache/refharmonizer/preferences.json`
|
|
263
|
+
|
|
264
|
+
These local files are used to make repeat runs faster, support auditability, and
|
|
265
|
+
remember your preferences. They are not uploaded to PoCo.
|
|
266
|
+
|
|
267
|
+
## Transparency And Auditability
|
|
268
|
+
|
|
269
|
+
Every run is designed to be inspectable.
|
|
270
|
+
|
|
271
|
+
The audit logs record the proposed changes with:
|
|
272
|
+
|
|
273
|
+
- record key
|
|
274
|
+
- field path
|
|
275
|
+
- old value
|
|
276
|
+
- new value
|
|
277
|
+
- category
|
|
278
|
+
- rule family
|
|
279
|
+
- confidence tier
|
|
280
|
+
- source
|
|
281
|
+
- dependency information
|
|
282
|
+
- label explaining the change
|
|
283
|
+
|
|
284
|
+
The manifest records:
|
|
285
|
+
|
|
286
|
+
- engine version
|
|
287
|
+
- ruleset version
|
|
288
|
+
- lookup table version
|
|
289
|
+
- input and output file hashes
|
|
290
|
+
- whether offline mode was used
|
|
291
|
+
- effective configuration and preferences
|
|
292
|
+
- DOI discovery log
|
|
293
|
+
- API sources queried
|
|
294
|
+
- accepted, rejected, skipped, and unchanged items
|
|
295
|
+
|
|
296
|
+
The goal is not just to produce a cleaner library, but to make the process
|
|
297
|
+
reviewable later.
|
|
298
|
+
|
|
299
|
+
## Install And Run
|
|
300
|
+
|
|
301
|
+
See [Quick Start](#quick-start) for the from-scratch, step-by-step setup. PoCo
|
|
302
|
+
runs entirely on your own machine — there is no hosted backend and nothing is
|
|
303
|
+
uploaded.
|
|
304
|
+
|
|
305
|
+
If you already have [pipx](https://pipx.pypa.io):
|
|
306
|
+
|
|
307
|
+
```bash
|
|
308
|
+
pipx install poco-harmonizer
|
|
309
|
+
poco
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
Or run it without a permanent install using [uv](https://docs.astral.sh/uv/):
|
|
313
|
+
|
|
314
|
+
```bash
|
|
315
|
+
uvx --from poco-harmonizer poco
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
Running `poco` opens `http://127.0.0.1:<port>` in your browser. Choose Import →
|
|
319
|
+
use the bundled sample library, or upload a Zotero CSL-JSON / EndNote XML export.
|
|
320
|
+
Your library is processed locally and your original file is never modified.
|
|
321
|
+
|
|
322
|
+
## Command Line
|
|
323
|
+
|
|
324
|
+
The CLI uses the same deterministic engine and preview-before-export posture.
|
|
325
|
+
|
|
326
|
+
```bash
|
|
327
|
+
# Dry run on the bundled sample. Writes nothing.
|
|
328
|
+
python -m refharmonizer.cli
|
|
329
|
+
|
|
330
|
+
# Dry run on your library. Writes nothing.
|
|
331
|
+
python -m refharmonizer.cli my-library.json
|
|
332
|
+
|
|
333
|
+
# Export a polished copy after previewing the summary.
|
|
334
|
+
python -m refharmonizer.cli my-library.json --apply
|
|
335
|
+
|
|
336
|
+
# EndNote XML, offline mode.
|
|
337
|
+
python -m refharmonizer.cli my-library.xml --offline --apply
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
Without `--apply`, the CLI prints detected conventions, proposed change counts,
|
|
341
|
+
examples, and skipped items, then exits without writing output.
|
|
342
|
+
|
|
343
|
+
## Tests
|
|
344
|
+
|
|
345
|
+
```bash
|
|
346
|
+
pip install -e ".[dev]"
|
|
347
|
+
pytest
|
|
348
|
+
```
|
|
349
|
+
|
|
350
|
+
Current verified state:
|
|
351
|
+
|
|
352
|
+
- Python test suite: 113 tests passing
|
|
353
|
+
- Frontend production build: passing
|
|
354
|
+
|
|
355
|
+
## Project Map
|
|
356
|
+
|
|
357
|
+
```text
|
|
358
|
+
refharmonizer/
|
|
359
|
+
api/ FastAPI app, local sessions, review gate, downloads
|
|
360
|
+
core/
|
|
361
|
+
adapters/ CSL-JSON, EndNote XML, Zotero local API ingest/export
|
|
362
|
+
api_clients/ CrossRef, OpenAlex, Open Library, local response cache
|
|
363
|
+
detect.py convention detection
|
|
364
|
+
discover.py DOI discovery with review gating
|
|
365
|
+
enrich.py source-backed completion and flags
|
|
366
|
+
normalize.py deterministic local cleanup rules
|
|
367
|
+
invariants.py rule contracts; unsafe patches are dropped
|
|
368
|
+
patch.py patch/evidence/confidence model
|
|
369
|
+
preview.py grouped preview and skipped-item report
|
|
370
|
+
render.py neutral reference previews
|
|
371
|
+
ris.py RIS export for the EndNote path
|
|
372
|
+
audit.py CSV/JSON/HTML audit logs
|
|
373
|
+
manifest.py reproducible run manifest
|
|
374
|
+
pipeline.py engine orchestration
|
|
375
|
+
data/ bundled sample library and journal lookup table
|
|
376
|
+
tests/ engine, API, adapter, preview, and invariant tests
|
|
377
|
+
|
|
378
|
+
web/ Vite + React + TypeScript frontend
|
|
379
|
+
```
|
|
380
|
+
|
|
381
|
+
## Development
|
|
382
|
+
|
|
383
|
+
Contributors run the two processes separately for hot-reload. Backend (Python 3.9+):
|
|
384
|
+
|
|
385
|
+
```bash
|
|
386
|
+
python3 -m venv .venv
|
|
387
|
+
source .venv/bin/activate
|
|
388
|
+
pip install -e ".[dev]"
|
|
389
|
+
uvicorn refharmonizer.api.app:app --port 8000 --reload
|
|
390
|
+
```
|
|
391
|
+
|
|
392
|
+
Frontend (Node 18+), in another terminal — proxies `/api` to the backend:
|
|
393
|
+
|
|
394
|
+
```bash
|
|
395
|
+
cd web
|
|
396
|
+
npm install
|
|
397
|
+
npm run dev # http://localhost:5173
|
|
398
|
+
```
|
|
399
|
+
|
|
400
|
+
To produce the single-process app that `poco` serves, build the UI into the
|
|
401
|
+
package, then install:
|
|
402
|
+
|
|
403
|
+
```bash
|
|
404
|
+
cd web && npm run build # outputs to refharmonizer/webui/
|
|
405
|
+
cd .. && pip install . # bundles the built UI into the wheel
|
|
406
|
+
poco
|
|
407
|
+
```
|
|
408
|
+
|
|
409
|
+
## Known Limitations
|
|
410
|
+
|
|
411
|
+
PoCo is honest about what it does not yet do:
|
|
412
|
+
|
|
413
|
+
- **No in-place update.** PoCo exports a clean copy for import into a *new*
|
|
414
|
+
collection or library; it does not modify your Zotero or EndNote library in
|
|
415
|
+
place. (A future "Connect Zotero" mode could update items via the Web API.)
|
|
416
|
+
- **EndNote round-trip is import-only.** The RIS export imports cleanly but cannot
|
|
417
|
+
carry `rec-number`, so changes are not merged back onto your existing records.
|
|
418
|
+
- **Editors are not yet captured for book chapters.** The EndNote ingest reads
|
|
419
|
+
authors but not secondary-author/editor fields, so chapter editors are not
|
|
420
|
+
completed or carried through.
|
|
421
|
+
- **Input formats.** Direct `.enl` / `.enlx`, BibTeX, and Zotero database-file
|
|
422
|
+
parsing are not supported. Inputs are Zotero CSL-JSON, EndNote XML export, and
|
|
423
|
+
the Zotero local API.
|
|
424
|
+
- **Distribution.** Installs via `pipx`/`uv` and needs Python 3.9+ (see
|
|
425
|
+
[Install And Run](#install-and-run)). Signed standalone installers
|
|
426
|
+
(`.exe`/`.dmg`/AppImage) for users without Python are not yet provided.
|
|
427
|
+
|
|
428
|
+
Citation styling is intentionally out of scope. PoCo improves the underlying
|
|
429
|
+
reference metadata; your reference manager and citation style still control how
|
|
430
|
+
the final bibliography is formatted.
|