docent-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,183 @@
1
+ """File-backed read-through cache for Mendeley collection metadata.
2
+
3
+ Step 11.7. Wraps `mendeley_list_documents(folder_id)` only — `get_document`
4
+ isn't wrapped yet because no reader needs the fields it adds (abstract,
5
+ attachments) and the bulk list call already covers title/authors/year/doi.
6
+
7
+ Cache file: `<cache_dir>/paper/mendeley_collection.json`.
8
+
9
+ {
10
+ "<folder_id>": {
11
+ "fetched_at": <unix_ts>,
12
+ "docs": {"<mendeley_id>": <doc>, ...}
13
+ },
14
+ ...
15
+ }
16
+
17
+ Across-CLI persistence is the whole point: each `docent paper next` is a
18
+ fresh Python process, and a 5-minute TTL only delivers the promised
19
+ "feels instant" UX if it survives process exits. `sync-from-mendeley`
20
+ calls `invalidate()` after writing the queue so the next reader pulls
21
+ fresh data.
22
+
23
+ On MCP transport / auth error the cache returns `None` — callers fall
24
+ back to the snapshot fields persisted in queue.json by sync-from-mendeley.
25
+ A failed fetch is never written to disk.
26
+ """
27
+ from __future__ import annotations
28
+
29
+ import json
30
+ import os
31
+ import time
32
+ from pathlib import Path
33
+ from typing import Any, Callable
34
+
35
+ from .mendeley_client import list_documents as default_list_documents
36
+ from .mendeley_client import list_folders as default_list_folders
37
+
38
+ DEFAULT_TTL_SECONDS = 300
39
+ # Folder IDs are effectively immutable in Mendeley — the only realistic
40
+ # invalidator is the user renaming/recreating the collection, which already
41
+ # surfaces a verbose actionable hint via sync-from-mendeley.
42
+ FOLDER_TTL_SECONDS = 86400
43
+ # Reserved key for the collection_name -> folder_id map. Mendeley folder
44
+ # IDs are UUID-shaped, so this can't collide with a real top-level entry.
45
+ _FOLDERS_KEY = "__folders__"
46
+
47
+
48
+ class MendeleyCache:
49
+ def __init__(
50
+ self,
51
+ cache_path: Path,
52
+ ttl_seconds: int = DEFAULT_TTL_SECONDS,
53
+ list_documents: Callable[..., dict[str, Any]] | None = None,
54
+ list_folders: Callable[..., dict[str, Any]] | None = None,
55
+ folder_ttl_seconds: int = FOLDER_TTL_SECONDS,
56
+ ) -> None:
57
+ self._path = cache_path
58
+ self._ttl = ttl_seconds
59
+ self._list_documents = list_documents or default_list_documents
60
+ self._list_folders = list_folders or default_list_folders
61
+ self._folder_ttl = folder_ttl_seconds
62
+
63
+ @property
64
+ def path(self) -> Path:
65
+ return self._path
66
+
67
+ def get_collection(
68
+ self,
69
+ folder_id: str,
70
+ launch_command: list[str] | None = None,
71
+ ) -> dict[str, dict[str, Any]] | None:
72
+ """Return `{mendeley_id: doc}` for the given folder, or None on
73
+ transport/auth error. Reads from disk if fresh, otherwise calls
74
+ `list_documents` and rewrites the cache file.
75
+ """
76
+ store = self._load()
77
+ entry = store.get(folder_id)
78
+ now = time.time()
79
+ if entry and (now - entry.get("fetched_at", 0.0)) < self._ttl:
80
+ docs = entry.get("docs")
81
+ if isinstance(docs, dict):
82
+ return docs
83
+
84
+ resp = self._list_documents(folder_id=folder_id, launch_command=launch_command)
85
+ if resp.get("error"):
86
+ return None
87
+ items = resp.get("items") or []
88
+ docs = {mid: doc for doc in items if (mid := _doc_id(doc))}
89
+ store[folder_id] = {"fetched_at": now, "docs": docs}
90
+ self._save(store)
91
+ return docs
92
+
93
+ def get_folder_id(
94
+ self,
95
+ collection_name: str,
96
+ launch_command: list[str] | None = None,
97
+ ) -> str | None:
98
+ """Return the Mendeley folder ID for `collection_name`, or None on
99
+ transport error / missing / ambiguous. Cached in the same file under
100
+ a reserved `__folders__` key with a long TTL — folder IDs are
101
+ effectively static, and this saves the ~5s `list_folders` MCP
102
+ round-trip on every reader call.
103
+ """
104
+ store = self._load()
105
+ entry = store.get(_FOLDERS_KEY)
106
+ now = time.time()
107
+ if entry and (now - entry.get("fetched_at", 0.0)) < self._folder_ttl:
108
+ by_name = entry.get("by_name")
109
+ if isinstance(by_name, dict) and collection_name in by_name:
110
+ fid = by_name[collection_name]
111
+ return fid if isinstance(fid, str) and fid else None
112
+
113
+ resp = self._list_folders(launch_command=launch_command)
114
+ if resp.get("error"):
115
+ return None
116
+ folders = resp.get("items") or []
117
+ # Count names first so duplicates get dropped entirely (not toggled).
118
+ counts: dict[str, int] = {}
119
+ for f in folders:
120
+ if isinstance(f, dict):
121
+ n = f.get("name")
122
+ if isinstance(n, str) and n:
123
+ counts[n] = counts.get(n, 0) + 1
124
+ by_name: dict[str, str] = {}
125
+ for f in folders:
126
+ if not isinstance(f, dict):
127
+ continue
128
+ name = f.get("name")
129
+ fid = f.get("id")
130
+ if (
131
+ isinstance(name, str) and name
132
+ and isinstance(fid, str) and fid
133
+ and counts.get(name, 0) == 1
134
+ ):
135
+ by_name[name] = fid
136
+ store[_FOLDERS_KEY] = {"fetched_at": now, "by_name": by_name}
137
+ self._save(store)
138
+ return by_name.get(collection_name)
139
+
140
+ def invalidate(self, folder_id: str | None = None) -> None:
141
+ """Drop one folder's entry, or the whole file if `folder_id` is None.
142
+ Called by `sync-from-mendeley` after a successful write so the next
143
+ reader pulls fresh data."""
144
+ if folder_id is None:
145
+ try:
146
+ self._path.unlink()
147
+ except FileNotFoundError:
148
+ pass
149
+ return
150
+ store = self._load()
151
+ if folder_id in store:
152
+ del store[folder_id]
153
+ self._save(store)
154
+
155
+ def _load(self) -> dict[str, dict[str, Any]]:
156
+ if not self._path.exists():
157
+ return {}
158
+ try:
159
+ data = json.loads(self._path.read_text(encoding="utf-8"))
160
+ except (json.JSONDecodeError, OSError):
161
+ # Corrupt cache file: behave as if empty. Next write rewrites it.
162
+ return {}
163
+ return data if isinstance(data, dict) else {}
164
+
165
+ def _save(self, store: dict[str, dict[str, Any]]) -> None:
166
+ self._path.parent.mkdir(parents=True, exist_ok=True)
167
+ tmp = self._path.with_suffix(self._path.suffix + ".tmp")
168
+ tmp.write_text(json.dumps(store, indent=2, ensure_ascii=False), encoding="utf-8")
169
+ os.replace(tmp, self._path)
170
+
171
+
172
+ def _doc_id(doc: Any) -> str | None:
173
+ """Mendeley docs may carry the id under `id` (library docs) or
174
+ `catalog_id` (catalog hits via mendeley_get_by_doi). list_documents
175
+ returns library docs, so `id` covers it; we accept `catalog_id` too
176
+ in case a future caller mixes payloads."""
177
+ if not isinstance(doc, dict):
178
+ return None
179
+ for key in ("id", "catalog_id"):
180
+ v = doc.get(key)
181
+ if isinstance(v, str) and v:
182
+ return v
183
+ return None
@@ -0,0 +1,132 @@
1
+ """Mendeley MCP client wrapper - sync facade over the async `mcp` SDK.
2
+
3
+ Step 11.4. Spawn-per-call: each call launches the Mendeley MCP server as a
4
+ subprocess via stdio, runs one `call_tool`, and tears down. Step 11.9 retired
5
+ `lookup_doi` / `search_library` (sync-mendeley subsumed by sync-from-mendeley);
6
+ only `list_folders` and `list_documents` remain — both feeding the Mendeley
7
+ read-through cache used by paper readers.
8
+
9
+ Return shape is `{"items": list, "error": str | None}`:
10
+
11
+ - success -> {"items": [...], "error": None} (items may be empty = not found)
12
+ - auth failure -> {"items": [], "error": "auth: ..."}
13
+ - transport -> {"items": [], "error": "transport: ..."}
14
+ - tool error -> {"items": [], "error": "tool: ..."}
15
+
16
+ Callers bucket on `error` prefix. Lazy-imports `mcp` so importing this
17
+ module is cheap; the SDK is only loaded when a function actually runs.
18
+ """
19
+ from __future__ import annotations
20
+
21
+ import asyncio
22
+ import json
23
+ from typing import Any
24
+
25
+ DEFAULT_LAUNCH_COMMAND: list[str] = ["uvx", "mendeley-mcp"]
26
+
27
+
28
+ def _parse_text_payload(result: Any) -> Any:
29
+ """MCP CallToolResult.content is a list of content blocks; the Mendeley
30
+ server returns a single text block carrying JSON. Returns the parsed
31
+ JSON value (may be dict, list, or scalar) or None if the shape is off.
32
+ """
33
+ try:
34
+ block = result.content[0]
35
+ except (AttributeError, IndexError):
36
+ return None
37
+ text = getattr(block, "text", None)
38
+ if text is None:
39
+ return None
40
+ try:
41
+ return json.loads(text)
42
+ except json.JSONDecodeError:
43
+ return None
44
+
45
+
46
+ def _classify_error(message: str) -> str:
47
+ low = message.lower()
48
+ if any(s in low for s in ("auth", "token", "credential", "401", "403", "unauthor")):
49
+ return "auth"
50
+ return "tool"
51
+
52
+
53
+ async def _call_tool(launch_command: list[str], tool_name: str, arguments: dict[str, Any]) -> dict[str, Any]:
54
+ # Lazy import: keeps `paper.py` import path free of the mcp SDK.
55
+ from mcp import ClientSession, StdioServerParameters
56
+ from mcp.client.stdio import stdio_client
57
+
58
+ if not launch_command:
59
+ return {"items": [], "error": "transport: empty launch command"}
60
+
61
+ params = StdioServerParameters(
62
+ command=launch_command[0],
63
+ args=list(launch_command[1:]),
64
+ env=None,
65
+ )
66
+
67
+ try:
68
+ async with stdio_client(params) as (read, write):
69
+ async with ClientSession(read, write) as session:
70
+ await session.initialize()
71
+ result = await session.call_tool(tool_name, arguments)
72
+ except FileNotFoundError as e:
73
+ return {"items": [], "error": f"transport: launch command not found ({e})"}
74
+ except Exception as e: # noqa: BLE001 — surfaces stdio_client / session errors uniformly.
75
+ return {"items": [], "error": f"transport: {type(e).__name__}: {e}"}
76
+
77
+ parsed = _parse_text_payload(result)
78
+
79
+ if getattr(result, "isError", False):
80
+ if isinstance(parsed, dict) and parsed.get("error"):
81
+ msg = str(parsed["error"])
82
+ else:
83
+ msg = "tool returned error"
84
+ return {"items": [], "error": f"{_classify_error(msg)}: {msg}"}
85
+
86
+ if parsed is None:
87
+ return {"items": [], "error": "tool: unparseable response"}
88
+
89
+ # Some tool error paths return a JSON dict with an `error` key but no isError flag.
90
+ if isinstance(parsed, dict) and "error" in parsed and len(parsed) == 1:
91
+ msg = str(parsed["error"])
92
+ return {"items": [], "error": f"{_classify_error(msg)}: {msg}"}
93
+
94
+ if isinstance(parsed, list):
95
+ items = parsed
96
+ elif isinstance(parsed, dict):
97
+ # Single-document response (mendeley_get_by_doi) or wrapped list.
98
+ items = parsed.get("documents") or parsed.get("results") or parsed.get("items") or [parsed]
99
+ else:
100
+ items = [parsed]
101
+
102
+ return {"items": items, "error": None}
103
+
104
+
105
+ def _run(coro: Any) -> dict[str, Any]:
106
+ return asyncio.run(coro)
107
+
108
+
109
+ def list_folders(launch_command: list[str] | None = None) -> dict[str, Any]:
110
+ """Call Mendeley's `mendeley_list_folders`. Returns flat list of
111
+ `{id, name, parent_id}`; nesting is encoded via `parent_id`. Used by
112
+ `sync-from-mendeley` to resolve a configured collection name to its id."""
113
+ cmd = launch_command or DEFAULT_LAUNCH_COMMAND
114
+ return _run(_call_tool(cmd, "mendeley_list_folders", {}))
115
+
116
+
117
+ def list_documents(
118
+ folder_id: str | None = None,
119
+ launch_command: list[str] | None = None,
120
+ limit: int = 200,
121
+ sort_by: str = "last_modified",
122
+ ) -> dict[str, Any]:
123
+ """Call Mendeley's `mendeley_list_documents`. With `folder_id`, scopes
124
+ to that collection; without, returns the whole library. Default limit
125
+ bumped from 50 (MCP default) to 200 — a reading queue can plausibly hold
126
+ that many. Documents above the limit are silently truncated; revisit if
127
+ real-data queues grow past it."""
128
+ cmd = launch_command or DEFAULT_LAUNCH_COMMAND
129
+ args: dict[str, Any] = {"limit": limit, "sort_by": sort_by}
130
+ if folder_id is not None:
131
+ args["folder_id"] = folder_id
132
+ return _run(_call_tool(cmd, "mendeley_list_documents", args))
@@ -0,0 +1,78 @@
1
+ """Startup deadline notifications for the reading queue.
2
+
3
+ Called once per day on first `docent` invocation. Prints a warning for any
4
+ entry whose deadline is within 3 days or already past. Deduplicates within a
5
+ calendar day so the same alert doesn't repeat across multiple commands.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import os
11
+ from datetime import date, timedelta
12
+ from pathlib import Path
13
+ from typing import Any
14
+
15
+
16
+ def check_deadlines(store_root: Path) -> list[str]:
17
+ """Return alert lines for entries with deadlines within 3 days or past due.
18
+
19
+ Only fires once per calendar day — seen entries are tracked in
20
+ `<store_root>/deadline-seen.json`. Returns an empty list when there is
21
+ nothing to report or when the daily gate has already fired.
22
+ """
23
+ queue_path = store_root / "queue.json"
24
+ seen_path = store_root / "deadline-seen.json"
25
+
26
+ if not queue_path.exists():
27
+ return []
28
+
29
+ today_str = date.today().isoformat()
30
+ seen: dict[str, str] = {} # entry_id -> last-seen date
31
+ if seen_path.exists():
32
+ try:
33
+ seen = json.loads(seen_path.read_text(encoding="utf-8"))
34
+ except (json.JSONDecodeError, OSError):
35
+ seen = {}
36
+
37
+ try:
38
+ queue: list[dict[str, Any]] = json.loads(queue_path.read_text(encoding="utf-8"))
39
+ except (json.JSONDecodeError, OSError):
40
+ return []
41
+
42
+ today = date.today()
43
+ warn_horizon = today + timedelta(days=3)
44
+ alerts: list[str] = []
45
+ updated_seen = dict(seen)
46
+
47
+ for entry in queue:
48
+ if entry.get("status") in ("done", "removed"):
49
+ continue
50
+ deadline_str = entry.get("deadline")
51
+ if not deadline_str:
52
+ continue
53
+ eid = entry.get("id", "")
54
+ if seen.get(eid) == today_str:
55
+ continue # already alerted today
56
+ try:
57
+ deadline = date.fromisoformat(deadline_str)
58
+ except ValueError:
59
+ continue
60
+
61
+ if deadline <= warn_horizon:
62
+ days_left = (deadline - today).days
63
+ title = entry.get("title") or eid
64
+ if days_left < 0:
65
+ alerts.append(f"[OVERDUE {abs(days_left)}d] {title!r} — deadline was {deadline_str}")
66
+ elif days_left == 0:
67
+ alerts.append(f"[DUE TODAY] {title!r} — deadline {deadline_str}")
68
+ else:
69
+ alerts.append(f"[DUE IN {days_left}d] {title!r} — deadline {deadline_str}")
70
+ updated_seen[eid] = today_str
71
+
72
+ if updated_seen != seen:
73
+ store_root.mkdir(parents=True, exist_ok=True)
74
+ tmp = seen_path.with_suffix(".tmp")
75
+ tmp.write_text(json.dumps(updated_seen, indent=2, ensure_ascii=False), encoding="utf-8")
76
+ os.replace(tmp, seen_path)
77
+
78
+ return alerts
@@ -0,0 +1,105 @@
1
+ """Persistence + state recompute for the reading queue.
2
+
3
+ Owns three files inside `<root>/`:
4
+ - queue.json — source-of-truth list of QueueEntry dicts
5
+ - queue-index.json — id -> {title, status, order} for fast lookups
6
+ - state.json — banner counts + last_updated timestamp
7
+
8
+ Reads return safe defaults if a file is missing. Writes self-initialize the
9
+ directory and use atomic rename so a crash mid-write can't leave a partial
10
+ JSON file in place.
11
+ """
12
+ from __future__ import annotations
13
+
14
+ import json
15
+ import os
16
+ from datetime import datetime
17
+ from pathlib import Path
18
+ from typing import Any
19
+
20
+ from pydantic import BaseModel
21
+
22
+
23
+ class BannerCounts(BaseModel):
24
+ queued: int = 0
25
+ reading: int = 0
26
+ done: int = 0
27
+
28
+
29
+ class ReadingQueueStore:
30
+ def __init__(self, root: Path) -> None:
31
+ self.root = root
32
+
33
+ @property
34
+ def queue_path(self) -> Path:
35
+ return self.root / "queue.json"
36
+
37
+ @property
38
+ def index_path(self) -> Path:
39
+ return self.root / "queue-index.json"
40
+
41
+ @property
42
+ def state_path(self) -> Path:
43
+ return self.root / "state.json"
44
+
45
+ def load_queue(self) -> list[dict[str, Any]]:
46
+ if not self.queue_path.exists():
47
+ return []
48
+ return json.loads(self.queue_path.read_text(encoding="utf-8"))
49
+
50
+ def load_index(self) -> dict[str, dict[str, Any]]:
51
+ if not self.index_path.exists():
52
+ return {}
53
+ return json.loads(self.index_path.read_text(encoding="utf-8"))
54
+
55
+ def save_queue(self, queue: list[dict[str, Any]]) -> None:
56
+ self.root.mkdir(parents=True, exist_ok=True)
57
+ self._atomic_write_json(self.queue_path, queue)
58
+ self._atomic_write_json(self.index_path, self._recompute_index(queue))
59
+ self._write_state(queue)
60
+
61
+ def banner_counts(self) -> BannerCounts:
62
+ if not self.state_path.exists():
63
+ return BannerCounts()
64
+ data = json.loads(self.state_path.read_text(encoding="utf-8"))
65
+ return BannerCounts(
66
+ queued=data.get("queued", 0),
67
+ reading=data.get("reading", 0),
68
+ done=data.get("done", 0),
69
+ )
70
+
71
+ @staticmethod
72
+ def list_database_pdfs(database_dir: Path) -> list[Path]:
73
+ """Return all PDFs found recursively in `database_dir`.
74
+ Missing directory yields an empty list, not an error.
75
+ """
76
+ if not database_dir.is_dir():
77
+ return []
78
+ return sorted(database_dir.rglob("*.pdf"))
79
+
80
+ @staticmethod
81
+ def _recompute_index(queue: list[dict[str, Any]]) -> dict[str, dict[str, Any]]:
82
+ return {
83
+ e["id"]: {
84
+ "title": e.get("title", ""),
85
+ "status": e["status"],
86
+ "order": e.get("order", 0),
87
+ }
88
+ for e in queue
89
+ }
90
+
91
+ def _write_state(self, queue: list[dict[str, Any]]) -> None:
92
+ state = {
93
+ "queued": sum(1 for e in queue if e["status"] == "queued"),
94
+ "reading": sum(1 for e in queue if e["status"] == "reading"),
95
+ "done": sum(1 for e in queue if e["status"] == "done"),
96
+ "last_updated": datetime.now().isoformat(),
97
+ }
98
+ self._atomic_write_json(self.state_path, state)
99
+
100
+ @staticmethod
101
+ def _atomic_write_json(path: Path, data: Any) -> None:
102
+ path.parent.mkdir(parents=True, exist_ok=True)
103
+ tmp = path.with_suffix(path.suffix + ".tmp")
104
+ tmp.write_text(json.dumps(data, indent=2, ensure_ascii=False), encoding="utf-8")
105
+ os.replace(tmp, path)