citations-collector 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- citations_collector/__init__.py +18 -0
- citations_collector/_version.py +34 -0
- citations_collector/cli.py +525 -0
- citations_collector/core.py +503 -0
- citations_collector/discovery/__init__.py +17 -0
- citations_collector/discovery/base.py +26 -0
- citations_collector/discovery/crossref.py +210 -0
- citations_collector/discovery/datacite.py +260 -0
- citations_collector/discovery/openalex.py +252 -0
- citations_collector/discovery/opencitations.py +168 -0
- citations_collector/discovery/utils.py +62 -0
- citations_collector/importers/__init__.py +17 -0
- citations_collector/importers/bibtex.py +178 -0
- citations_collector/importers/dandi.py +314 -0
- citations_collector/importers/github.py +147 -0
- citations_collector/importers/zenodo.py +110 -0
- citations_collector/importers/zotero.py +262 -0
- citations_collector/merge_detection.py +216 -0
- citations_collector/models/__init__.py +44 -0
- citations_collector/models/generated.py +525 -0
- citations_collector/pdf.py +260 -0
- citations_collector/persistence/__init__.py +7 -0
- citations_collector/persistence/tsv_io.py +121 -0
- citations_collector/persistence/yaml_io.py +50 -0
- citations_collector/py.typed +0 -0
- citations_collector/unpaywall.py +60 -0
- citations_collector/zotero_sync.py +591 -0
- citations_collector-0.2.3.dist-info/METADATA +456 -0
- citations_collector-0.2.3.dist-info/RECORD +31 -0
- citations_collector-0.2.3.dist-info/WHEEL +4 -0
- citations_collector-0.2.3.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,591 @@
|
|
|
1
|
+
"""Sync citations to Zotero as hierarchical collections."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
|
|
8
|
+
from pyzotero import zotero
|
|
9
|
+
|
|
10
|
+
from citations_collector.models import CitationRecord, Collection
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
TRACKER_PREFIX = "CitationTracker:"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class SyncReport:
|
|
19
|
+
"""Summary of a Zotero sync operation."""
|
|
20
|
+
|
|
21
|
+
collections_created: int = 0
|
|
22
|
+
items_created: int = 0
|
|
23
|
+
items_updated: int = 0
|
|
24
|
+
items_skipped: int = 0
|
|
25
|
+
attachments_created: int = 0
|
|
26
|
+
errors: list[str] = field(default_factory=list)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ZoteroSyncer:
|
|
30
|
+
"""Sync citation records to Zotero as hierarchical collections.
|
|
31
|
+
|
|
32
|
+
Creates a two-level collection hierarchy under the configured top-level
|
|
33
|
+
collection::
|
|
34
|
+
|
|
35
|
+
top_collection/
|
|
36
|
+
{item_id}/
|
|
37
|
+
{flavor}/
|
|
38
|
+
<active citation items>
|
|
39
|
+
Merged/
|
|
40
|
+
<preprints and old versions>
|
|
41
|
+
|
|
42
|
+
Active citations are dual-assigned to both the item-level and
|
|
43
|
+
flavor-level collections so they appear when browsing either level.
|
|
44
|
+
Merged citations are only placed in the ``Merged`` subcollection.
|
|
45
|
+
|
|
46
|
+
Each citation item includes a tracker key in the ``extra`` field
|
|
47
|
+
(``CitationTracker: {item_id}/{flavor}/{doi_or_url}``) so that
|
|
48
|
+
subsequent syncs can detect items that already exist.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def __init__(self, api_key: str, group_id: int, collection_key: str) -> None:
|
|
52
|
+
self.zot = zotero.Zotero(group_id, "group", api_key)
|
|
53
|
+
self.group_id = group_id
|
|
54
|
+
self.top_collection_key = collection_key
|
|
55
|
+
|
|
56
|
+
def sync(
|
|
57
|
+
self,
|
|
58
|
+
collection: Collection,
|
|
59
|
+
citations: list[CitationRecord],
|
|
60
|
+
dry_run: bool = False,
|
|
61
|
+
) -> SyncReport:
|
|
62
|
+
"""Sync citations to Zotero hierarchy.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
collection: The source collection definition.
|
|
66
|
+
citations: Citation records to sync.
|
|
67
|
+
dry_run: If ``True``, log what would happen but make no API calls.
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
A :class:`SyncReport` summarising the operations performed.
|
|
71
|
+
"""
|
|
72
|
+
report = SyncReport()
|
|
73
|
+
|
|
74
|
+
# 1. Fetch existing collections under top_collection_key
|
|
75
|
+
existing_collections = self._fetch_subcollections(self.top_collection_key)
|
|
76
|
+
|
|
77
|
+
# 2. Fetch existing items and index by tracker key
|
|
78
|
+
existing_items = self._fetch_existing_items()
|
|
79
|
+
|
|
80
|
+
# 3. Group citations by item_id, then flavor
|
|
81
|
+
grouped = self._group_citations(citations)
|
|
82
|
+
|
|
83
|
+
# 4. For each item, ensure collection hierarchy exists
|
|
84
|
+
for item_id, flavors in grouped.items():
|
|
85
|
+
bare_id = self._strip_prefix(item_id)
|
|
86
|
+
item_collection_name = bare_id
|
|
87
|
+
|
|
88
|
+
# Find or create item-level collection
|
|
89
|
+
item_coll_key = self._find_collection(existing_collections, item_collection_name)
|
|
90
|
+
if not item_coll_key:
|
|
91
|
+
if dry_run:
|
|
92
|
+
logger.info("Would create collection: %s", item_collection_name)
|
|
93
|
+
report.collections_created += 1
|
|
94
|
+
for flavor_id, buckets in flavors.items():
|
|
95
|
+
logger.info(" Would create sub-collection: %s", flavor_id)
|
|
96
|
+
report.collections_created += 1
|
|
97
|
+
for bucket_citations in buckets.values():
|
|
98
|
+
for c in bucket_citations:
|
|
99
|
+
logger.info(
|
|
100
|
+
" Would create item: %s",
|
|
101
|
+
c.citation_doi or c.citation_title,
|
|
102
|
+
)
|
|
103
|
+
report.items_created += 1
|
|
104
|
+
continue
|
|
105
|
+
item_coll_key = self._create_collection(
|
|
106
|
+
item_collection_name, self.top_collection_key
|
|
107
|
+
)
|
|
108
|
+
report.collections_created += 1
|
|
109
|
+
existing_collections[item_coll_key] = item_collection_name
|
|
110
|
+
|
|
111
|
+
# Fetch sub-collections for this item
|
|
112
|
+
item_subcollections = self._fetch_subcollections(item_coll_key)
|
|
113
|
+
|
|
114
|
+
for flavor_id, buckets in flavors.items():
|
|
115
|
+
# Find or create flavor-level collection
|
|
116
|
+
flavor_coll_key = self._find_collection(item_subcollections, flavor_id)
|
|
117
|
+
if not flavor_coll_key:
|
|
118
|
+
if dry_run:
|
|
119
|
+
logger.info(
|
|
120
|
+
" Would create sub-collection: %s under %s",
|
|
121
|
+
flavor_id,
|
|
122
|
+
item_collection_name,
|
|
123
|
+
)
|
|
124
|
+
report.collections_created += 1
|
|
125
|
+
for bucket_citations in buckets.values():
|
|
126
|
+
for c in bucket_citations:
|
|
127
|
+
logger.info(
|
|
128
|
+
" Would create item: %s",
|
|
129
|
+
c.citation_doi or c.citation_title,
|
|
130
|
+
)
|
|
131
|
+
report.items_created += 1
|
|
132
|
+
continue
|
|
133
|
+
flavor_coll_key = self._create_collection(flavor_id, item_coll_key)
|
|
134
|
+
report.collections_created += 1
|
|
135
|
+
item_subcollections[flavor_coll_key] = flavor_id
|
|
136
|
+
|
|
137
|
+
# Resolve Merged subcollection only if needed
|
|
138
|
+
merged_coll_key: str | None = None
|
|
139
|
+
|
|
140
|
+
# Sync active citations — dual-assign to item + flavor collections
|
|
141
|
+
for citation in buckets.get("active", []):
|
|
142
|
+
self._sync_single_citation(
|
|
143
|
+
citation,
|
|
144
|
+
[item_coll_key, flavor_coll_key],
|
|
145
|
+
existing_items,
|
|
146
|
+
dry_run,
|
|
147
|
+
report,
|
|
148
|
+
is_merged=False,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
# Sync merged citations — only in Merged subcollection
|
|
152
|
+
merged_list = buckets.get("merged", [])
|
|
153
|
+
if merged_list:
|
|
154
|
+
flavor_subcollections = self._fetch_subcollections(flavor_coll_key)
|
|
155
|
+
merged_coll_key = self._find_collection(flavor_subcollections, "Merged")
|
|
156
|
+
if not merged_coll_key:
|
|
157
|
+
if dry_run:
|
|
158
|
+
logger.info(" Would create sub-collection: Merged")
|
|
159
|
+
report.collections_created += 1
|
|
160
|
+
else:
|
|
161
|
+
merged_coll_key = self._create_collection("Merged", flavor_coll_key)
|
|
162
|
+
report.collections_created += 1
|
|
163
|
+
|
|
164
|
+
for citation in merged_list:
|
|
165
|
+
target = [merged_coll_key] if merged_coll_key else []
|
|
166
|
+
self._sync_single_citation(
|
|
167
|
+
citation, target, existing_items, dry_run, report, is_merged=True
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
return report
|
|
171
|
+
|
|
172
|
+
# ------------------------------------------------------------------
|
|
173
|
+
# Internal helpers
|
|
174
|
+
# ------------------------------------------------------------------
|
|
175
|
+
|
|
176
|
+
def _sync_single_citation(
|
|
177
|
+
self,
|
|
178
|
+
citation: CitationRecord,
|
|
179
|
+
collection_keys: list[str],
|
|
180
|
+
existing_items: dict[str, dict],
|
|
181
|
+
dry_run: bool,
|
|
182
|
+
report: SyncReport,
|
|
183
|
+
is_merged: bool = False,
|
|
184
|
+
) -> None:
|
|
185
|
+
"""Create or update a single citation item.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
citation: Citation record to sync
|
|
189
|
+
collection_keys: Target collection keys for this citation
|
|
190
|
+
existing_items: Dict of existing items by tracker key
|
|
191
|
+
dry_run: If True, log actions without making API calls
|
|
192
|
+
report: Sync report to update
|
|
193
|
+
is_merged: If True, this citation is marked as merged
|
|
194
|
+
"""
|
|
195
|
+
tracker_key = self._make_tracker_key(citation)
|
|
196
|
+
|
|
197
|
+
# Check if item already exists
|
|
198
|
+
if tracker_key in existing_items:
|
|
199
|
+
existing_item = existing_items[tracker_key]
|
|
200
|
+
current_collections = existing_item["data"].get("collections", [])
|
|
201
|
+
|
|
202
|
+
# If citation is merged, handle moving and related items
|
|
203
|
+
if is_merged:
|
|
204
|
+
needs_move = set(current_collections) != set(collection_keys)
|
|
205
|
+
needs_relation = False
|
|
206
|
+
published_key = None
|
|
207
|
+
|
|
208
|
+
# Try to find published version for related items link
|
|
209
|
+
if citation.citation_merged_into:
|
|
210
|
+
published_tracker = self._make_tracker_key_for_doi(
|
|
211
|
+
citation.item_id, citation.item_flavor, citation.citation_merged_into
|
|
212
|
+
)
|
|
213
|
+
if published_tracker in existing_items:
|
|
214
|
+
merged_key = existing_item["data"]["key"]
|
|
215
|
+
published_key = existing_items[published_tracker]["data"]["key"]
|
|
216
|
+
|
|
217
|
+
# Check if relation already exists
|
|
218
|
+
relations = existing_item["data"].get("relations", {})
|
|
219
|
+
dc_relation = relations.get("dc:relation", [])
|
|
220
|
+
if isinstance(dc_relation, str):
|
|
221
|
+
dc_relation = [dc_relation]
|
|
222
|
+
|
|
223
|
+
published_uri = (
|
|
224
|
+
f"http://zotero.org/groups/{self.group_id}/items/{published_key}"
|
|
225
|
+
)
|
|
226
|
+
needs_relation = published_uri not in dc_relation
|
|
227
|
+
|
|
228
|
+
# Update if move or relation needed
|
|
229
|
+
if needs_move or needs_relation:
|
|
230
|
+
if dry_run:
|
|
231
|
+
if needs_move:
|
|
232
|
+
logger.info(
|
|
233
|
+
" Would move existing item to Merged: %s",
|
|
234
|
+
citation.citation_title,
|
|
235
|
+
)
|
|
236
|
+
if needs_relation:
|
|
237
|
+
logger.info(
|
|
238
|
+
" Would add related item link: %s",
|
|
239
|
+
citation.citation_title,
|
|
240
|
+
)
|
|
241
|
+
report.items_updated += 1
|
|
242
|
+
else:
|
|
243
|
+
try:
|
|
244
|
+
if needs_move:
|
|
245
|
+
self._move_item_to_collections(existing_item, collection_keys)
|
|
246
|
+
logger.info("Moved item to Merged: %s", citation.citation_title)
|
|
247
|
+
|
|
248
|
+
if needs_relation and published_key:
|
|
249
|
+
# Build items-by-key dict for the relation method
|
|
250
|
+
merged_key = existing_item["data"]["key"]
|
|
251
|
+
items_by_key = {
|
|
252
|
+
merged_key: existing_item,
|
|
253
|
+
published_key: existing_items[published_tracker],
|
|
254
|
+
}
|
|
255
|
+
self._add_related_item(merged_key, published_key, items_by_key)
|
|
256
|
+
|
|
257
|
+
report.items_updated += 1
|
|
258
|
+
except Exception as e:
|
|
259
|
+
logger.error("Error updating item %s: %s", citation.citation_doi, e)
|
|
260
|
+
report.errors.append(f"{citation.citation_doi}: {e}")
|
|
261
|
+
else:
|
|
262
|
+
report.items_skipped += 1
|
|
263
|
+
else:
|
|
264
|
+
report.items_skipped += 1
|
|
265
|
+
return
|
|
266
|
+
|
|
267
|
+
# Create new item
|
|
268
|
+
if dry_run:
|
|
269
|
+
logger.info(" Would create: %s (%s)", citation.citation_title, citation.citation_doi)
|
|
270
|
+
report.items_created += 1
|
|
271
|
+
return
|
|
272
|
+
|
|
273
|
+
try:
|
|
274
|
+
zot_item = self._citation_to_zotero_item(citation, collection_keys)
|
|
275
|
+
resp = self.zot.create_items([zot_item])
|
|
276
|
+
|
|
277
|
+
if resp.get("successful"):
|
|
278
|
+
report.items_created += 1
|
|
279
|
+
# Attach PDF link if available
|
|
280
|
+
if citation.pdf_url:
|
|
281
|
+
created_key = resp["successful"]["0"]["key"]
|
|
282
|
+
self._attach_linked_url(created_key, citation.pdf_url, citation.citation_title)
|
|
283
|
+
report.attachments_created += 1
|
|
284
|
+
elif resp.get("failed"):
|
|
285
|
+
err = str(resp["failed"])
|
|
286
|
+
logger.error("Failed to create item %s: %s", citation.citation_doi, err)
|
|
287
|
+
report.errors.append(f"{citation.citation_doi}: {err}")
|
|
288
|
+
except Exception as e:
|
|
289
|
+
logger.error("Error creating item %s: %s", citation.citation_doi, e)
|
|
290
|
+
report.errors.append(f"{citation.citation_doi}: {e}")
|
|
291
|
+
|
|
292
|
+
def _fetch_subcollections(self, parent_key: str) -> dict[str, str]:
|
|
293
|
+
"""Fetch subcollections under *parent_key*. Returns ``{key: name}``."""
|
|
294
|
+
try:
|
|
295
|
+
collections = self.zot.collections_sub(parent_key)
|
|
296
|
+
return {c["key"]: c["data"]["name"] for c in collections}
|
|
297
|
+
except Exception:
|
|
298
|
+
return {}
|
|
299
|
+
|
|
300
|
+
def _fetch_existing_items(self) -> dict[str, dict]:
|
|
301
|
+
"""Fetch all items under the top collection tree, indexed by tracker key.
|
|
302
|
+
|
|
303
|
+
Walks subcollections recursively since ``collection_items`` only
|
|
304
|
+
returns items directly in the given collection.
|
|
305
|
+
"""
|
|
306
|
+
items: dict[str, dict] = {}
|
|
307
|
+
try:
|
|
308
|
+
collection_keys = self._collect_all_subcollection_keys(self.top_collection_key)
|
|
309
|
+
collection_keys.append(self.top_collection_key)
|
|
310
|
+
for coll_key in collection_keys:
|
|
311
|
+
coll_items = self.zot.everything(self.zot.collection_items(coll_key))
|
|
312
|
+
for item in coll_items:
|
|
313
|
+
if item["data"].get("itemType") in ("attachment", "note"):
|
|
314
|
+
continue
|
|
315
|
+
extra = item["data"].get("extra", "")
|
|
316
|
+
for line in extra.split("\n"):
|
|
317
|
+
if line.startswith(TRACKER_PREFIX):
|
|
318
|
+
tracker_key = line[len(TRACKER_PREFIX) :].strip()
|
|
319
|
+
items[tracker_key] = item
|
|
320
|
+
break
|
|
321
|
+
except Exception as e:
|
|
322
|
+
logger.warning("Error fetching existing items: %s", e)
|
|
323
|
+
return items
|
|
324
|
+
|
|
325
|
+
def _collect_all_subcollection_keys(self, parent_key: str) -> list[str]:
|
|
326
|
+
"""Recursively collect all subcollection keys under a parent."""
|
|
327
|
+
keys: list[str] = []
|
|
328
|
+
subs = self._fetch_subcollections(parent_key)
|
|
329
|
+
for key in subs:
|
|
330
|
+
keys.append(key)
|
|
331
|
+
keys.extend(self._collect_all_subcollection_keys(key))
|
|
332
|
+
return keys
|
|
333
|
+
|
|
334
|
+
def _group_citations(
|
|
335
|
+
self, citations: list[CitationRecord]
|
|
336
|
+
) -> dict[str, dict[str, dict[str, list[CitationRecord]]]]:
|
|
337
|
+
"""Group citations by ``item_id -> flavor -> status_bucket -> [citations]``.
|
|
338
|
+
|
|
339
|
+
``status_bucket`` is either ``"active"`` or ``"merged"``.
|
|
340
|
+
Other statuses (e.g. ``ignored``) are skipped entirely.
|
|
341
|
+
"""
|
|
342
|
+
grouped: dict[str, dict[str, dict[str, list[CitationRecord]]]] = {}
|
|
343
|
+
for c in citations:
|
|
344
|
+
status = str(c.citation_status) if c.citation_status else "active"
|
|
345
|
+
if status not in ("active", "merged"):
|
|
346
|
+
continue
|
|
347
|
+
bucket = "merged" if status == "merged" else "active"
|
|
348
|
+
(
|
|
349
|
+
grouped.setdefault(c.item_id, {})
|
|
350
|
+
.setdefault(c.item_flavor, {})
|
|
351
|
+
.setdefault(bucket, [])
|
|
352
|
+
.append(c)
|
|
353
|
+
)
|
|
354
|
+
return grouped
|
|
355
|
+
|
|
356
|
+
def _get_item_name(self, citations: list[CitationRecord], item_id: str) -> str | None:
|
|
357
|
+
"""Return the item name from the first citation matching *item_id*."""
|
|
358
|
+
for c in citations:
|
|
359
|
+
if c.item_id == item_id and c.item_name:
|
|
360
|
+
return c.item_name
|
|
361
|
+
return None
|
|
362
|
+
|
|
363
|
+
@staticmethod
|
|
364
|
+
def _strip_prefix(item_id: str) -> str:
|
|
365
|
+
"""Strip namespace prefix: ``'dandi:000020'`` -> ``'000020'``."""
|
|
366
|
+
return item_id.split(":", 1)[-1]
|
|
367
|
+
|
|
368
|
+
@staticmethod
|
|
369
|
+
def _find_collection(collections: dict[str, str], name: str) -> str | None:
|
|
370
|
+
"""Find collection key by name."""
|
|
371
|
+
for key, coll_name in collections.items():
|
|
372
|
+
if coll_name == name:
|
|
373
|
+
return key
|
|
374
|
+
return None
|
|
375
|
+
|
|
376
|
+
def _create_collection(self, name: str, parent_key: str) -> str:
|
|
377
|
+
"""Create a new collection under *parent_key*. Returns the new key."""
|
|
378
|
+
payload = {"name": name, "parentCollection": parent_key}
|
|
379
|
+
resp = self.zot.create_collections([payload])
|
|
380
|
+
if resp.get("successful"):
|
|
381
|
+
return str(resp["successful"]["0"]["key"])
|
|
382
|
+
raise RuntimeError(f"Failed to create collection '{name}': {resp}")
|
|
383
|
+
|
|
384
|
+
def _citation_to_zotero_item(
|
|
385
|
+
self, citation: CitationRecord, collection_keys: list[str]
|
|
386
|
+
) -> dict:
|
|
387
|
+
"""Convert a :class:`CitationRecord` to a Zotero item dict."""
|
|
388
|
+
# Determine item type
|
|
389
|
+
item_type = "journalArticle"
|
|
390
|
+
if citation.citation_type:
|
|
391
|
+
type_map = {
|
|
392
|
+
"Preprint": "preprint",
|
|
393
|
+
"Thesis": "thesis",
|
|
394
|
+
"Book": "book",
|
|
395
|
+
"Software": "computerProgram",
|
|
396
|
+
"Dataset": "dataset",
|
|
397
|
+
}
|
|
398
|
+
item_type = type_map.get(str(citation.citation_type), "journalArticle")
|
|
399
|
+
|
|
400
|
+
# Build creators list
|
|
401
|
+
creators = []
|
|
402
|
+
if citation.citation_authors:
|
|
403
|
+
for author in citation.citation_authors.split("; "):
|
|
404
|
+
parts = author.rsplit(" ", 1)
|
|
405
|
+
if len(parts) == 2:
|
|
406
|
+
creators.append(
|
|
407
|
+
{"creatorType": "author", "firstName": parts[0], "lastName": parts[1]}
|
|
408
|
+
)
|
|
409
|
+
else:
|
|
410
|
+
creators.append({"creatorType": "author", "name": author})
|
|
411
|
+
|
|
412
|
+
tracker_key = self._make_tracker_key(citation)
|
|
413
|
+
extra_lines = [f"{TRACKER_PREFIX} {tracker_key}"]
|
|
414
|
+
if citation.citation_source:
|
|
415
|
+
extra_lines.append(f"Discovery Source: {citation.citation_source}")
|
|
416
|
+
|
|
417
|
+
# Build base item
|
|
418
|
+
item = {
|
|
419
|
+
"itemType": item_type,
|
|
420
|
+
"title": citation.citation_title or "",
|
|
421
|
+
"creators": creators,
|
|
422
|
+
"DOI": citation.citation_doi or "",
|
|
423
|
+
"url": citation.citation_url or "",
|
|
424
|
+
"date": str(citation.citation_year) if citation.citation_year else "",
|
|
425
|
+
"extra": "\n".join(extra_lines),
|
|
426
|
+
"collections": collection_keys,
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
# Add journal/repository field based on item type
|
|
430
|
+
if citation.citation_journal:
|
|
431
|
+
if item_type == "preprint":
|
|
432
|
+
# Preprints use 'repository' field (e.g., bioRxiv, arXiv)
|
|
433
|
+
item["repository"] = citation.citation_journal
|
|
434
|
+
else:
|
|
435
|
+
# Journal articles and most other types use 'publicationTitle'
|
|
436
|
+
item["publicationTitle"] = citation.citation_journal
|
|
437
|
+
|
|
438
|
+
return item
|
|
439
|
+
|
|
440
|
+
@staticmethod
|
|
441
|
+
def _make_tracker_key(citation: CitationRecord) -> str:
|
|
442
|
+
"""Create tracker key for the ``extra`` field."""
|
|
443
|
+
return (
|
|
444
|
+
f"{citation.item_id}/{citation.item_flavor}"
|
|
445
|
+
f"/{citation.citation_doi or citation.citation_url or ''}"
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
@staticmethod
|
|
449
|
+
def _make_tracker_key_for_doi(item_id: str, flavor: str, doi: str) -> str:
|
|
450
|
+
"""Create tracker key for a specific DOI."""
|
|
451
|
+
return f"{item_id}/{flavor}/{doi}"
|
|
452
|
+
|
|
453
|
+
def _attach_linked_url(self, parent_key: str, url: str, title: str | None = None) -> None:
|
|
454
|
+
"""Attach a linked URL to a Zotero item."""
|
|
455
|
+
try:
|
|
456
|
+
attachment = {
|
|
457
|
+
"itemType": "attachment",
|
|
458
|
+
"linkMode": "linked_url",
|
|
459
|
+
"url": url,
|
|
460
|
+
"title": title or "PDF",
|
|
461
|
+
"parentItem": parent_key,
|
|
462
|
+
"tags": [],
|
|
463
|
+
"relations": {},
|
|
464
|
+
"contentType": "application/pdf",
|
|
465
|
+
}
|
|
466
|
+
self.zot.create_items([attachment])
|
|
467
|
+
except Exception as e:
|
|
468
|
+
logger.warning("Failed to attach URL to %s: %s", parent_key, e)
|
|
469
|
+
|
|
470
|
+
def _move_item_to_collections(
|
|
471
|
+
self, existing_item: dict, new_collection_keys: list[str]
|
|
472
|
+
) -> None:
|
|
473
|
+
"""Move an existing Zotero item to different collections.
|
|
474
|
+
|
|
475
|
+
Args:
|
|
476
|
+
existing_item: The existing item dict from Zotero API
|
|
477
|
+
new_collection_keys: List of collection keys to move the item to
|
|
478
|
+
"""
|
|
479
|
+
item_key = existing_item["data"]["key"]
|
|
480
|
+
version = existing_item["data"]["version"]
|
|
481
|
+
|
|
482
|
+
# Update the item's collections
|
|
483
|
+
updated_data = {
|
|
484
|
+
"key": item_key,
|
|
485
|
+
"version": version,
|
|
486
|
+
"collections": new_collection_keys,
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
try:
|
|
490
|
+
self.zot.update_item(updated_data)
|
|
491
|
+
logger.info(
|
|
492
|
+
"Updated collections for item %s: %s",
|
|
493
|
+
existing_item["data"].get("title", ""),
|
|
494
|
+
new_collection_keys,
|
|
495
|
+
)
|
|
496
|
+
except Exception as e:
|
|
497
|
+
logger.error("Failed to update item collections: %s", e)
|
|
498
|
+
raise
|
|
499
|
+
|
|
500
|
+
def _add_related_item(
|
|
501
|
+
self, item1_key: str, item2_key: str, existing_items_by_key: dict[str, dict]
|
|
502
|
+
) -> None:
|
|
503
|
+
"""Add bidirectional 'Related Items' link between two Zotero items.
|
|
504
|
+
|
|
505
|
+
Args:
|
|
506
|
+
item1_key: Zotero key of first item (e.g., merged preprint)
|
|
507
|
+
item2_key: Zotero key of second item (e.g., published version)
|
|
508
|
+
existing_items_by_key: Dict of existing items indexed by Zotero key
|
|
509
|
+
"""
|
|
510
|
+
try:
|
|
511
|
+
# Get current state of both items
|
|
512
|
+
item1 = existing_items_by_key.get(item1_key)
|
|
513
|
+
item2 = existing_items_by_key.get(item2_key)
|
|
514
|
+
|
|
515
|
+
if not item1 or not item2:
|
|
516
|
+
logger.warning(
|
|
517
|
+
f"Cannot add relation: item keys not found ({item1_key}, {item2_key})"
|
|
518
|
+
)
|
|
519
|
+
return
|
|
520
|
+
|
|
521
|
+
# Get current relations
|
|
522
|
+
relations1 = item1["data"].get("relations", {})
|
|
523
|
+
relations2 = item2["data"].get("relations", {})
|
|
524
|
+
|
|
525
|
+
# Ensure relations are dicts with proper structure
|
|
526
|
+
if not isinstance(relations1, dict):
|
|
527
|
+
relations1 = {}
|
|
528
|
+
if not isinstance(relations2, dict):
|
|
529
|
+
relations2 = {}
|
|
530
|
+
|
|
531
|
+
# Build full item URIs
|
|
532
|
+
item1_uri = f"http://zotero.org/groups/{self.group_id}/items/{item1_key}"
|
|
533
|
+
item2_uri = f"http://zotero.org/groups/{self.group_id}/items/{item2_key}"
|
|
534
|
+
|
|
535
|
+
# Add item2 to item1's related items (if not already there)
|
|
536
|
+
dc_relation1 = relations1.get("dc:relation", [])
|
|
537
|
+
if isinstance(dc_relation1, str):
|
|
538
|
+
dc_relation1 = [dc_relation1]
|
|
539
|
+
elif not isinstance(dc_relation1, list):
|
|
540
|
+
dc_relation1 = []
|
|
541
|
+
|
|
542
|
+
if item2_uri not in dc_relation1:
|
|
543
|
+
dc_relation1.append(item2_uri)
|
|
544
|
+
relations1["dc:relation"] = dc_relation1
|
|
545
|
+
|
|
546
|
+
# Update item1
|
|
547
|
+
update1 = {
|
|
548
|
+
"key": item1_key,
|
|
549
|
+
"version": item1["data"]["version"],
|
|
550
|
+
"relations": relations1,
|
|
551
|
+
}
|
|
552
|
+
self.zot.update_item(update1)
|
|
553
|
+
logger.info(f"Added related item link: {item1_key} -> {item2_key}")
|
|
554
|
+
|
|
555
|
+
# Add item1 to item2's related items (if not already there)
|
|
556
|
+
dc_relation2 = relations2.get("dc:relation", [])
|
|
557
|
+
if isinstance(dc_relation2, str):
|
|
558
|
+
dc_relation2 = [dc_relation2]
|
|
559
|
+
elif not isinstance(dc_relation2, list):
|
|
560
|
+
dc_relation2 = []
|
|
561
|
+
|
|
562
|
+
if item1_uri not in dc_relation2:
|
|
563
|
+
dc_relation2.append(item1_uri)
|
|
564
|
+
relations2["dc:relation"] = dc_relation2
|
|
565
|
+
|
|
566
|
+
# Refresh item2 to get latest version (item1 update may have changed it)
|
|
567
|
+
item2_refreshed = self.zot.item(item2_key)
|
|
568
|
+
relations2_refreshed = item2_refreshed["data"].get("relations", {})
|
|
569
|
+
if not isinstance(relations2_refreshed, dict):
|
|
570
|
+
relations2_refreshed = {}
|
|
571
|
+
|
|
572
|
+
dc_relation2_refreshed = relations2_refreshed.get("dc:relation", [])
|
|
573
|
+
if isinstance(dc_relation2_refreshed, str):
|
|
574
|
+
dc_relation2_refreshed = [dc_relation2_refreshed]
|
|
575
|
+
elif not isinstance(dc_relation2_refreshed, list):
|
|
576
|
+
dc_relation2_refreshed = []
|
|
577
|
+
|
|
578
|
+
if item1_uri not in dc_relation2_refreshed:
|
|
579
|
+
dc_relation2_refreshed.append(item1_uri)
|
|
580
|
+
relations2_refreshed["dc:relation"] = dc_relation2_refreshed
|
|
581
|
+
|
|
582
|
+
update2 = {
|
|
583
|
+
"key": item2_key,
|
|
584
|
+
"version": item2_refreshed["data"]["version"],
|
|
585
|
+
"relations": relations2_refreshed,
|
|
586
|
+
}
|
|
587
|
+
self.zot.update_item(update2)
|
|
588
|
+
logger.info(f"Added related item link: {item2_key} -> {item1_key}")
|
|
589
|
+
|
|
590
|
+
except Exception as e:
|
|
591
|
+
logger.warning(f"Failed to add related items: {e}")
|