paper-manager 0.11.1 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -5,6 +5,20 @@
5
5
 
6
6
  A CLI tool for managing academic papers with knowledge base and vector search support.
7
7
 
8
+ ## Features
9
+
10
+ - **Semantic search** — FAISS vector indexing with configurable embedding models, query your papers by meaning rather than keywords
11
+ - **PDF metadata extraction** — automatically extracts title, author, keywords, DOI, and more from PDF files
12
+ - **DOI deduplication** — detects duplicate papers by DOI before adding, with `--force` override
13
+ - **Multi-format support** — import from PDF, TXT, MD, TEX, and other text-based formats
14
+ - **PDF-to-Markdown conversion** — optional high-quality conversion via [opendataloader-pdf](https://github.com/nicobailon/opendataloader-pdf) with image extraction
15
+ - **Dual-scope data model** — user-level (`~/.paper-manager/`) for global collections and project-level (`./.paper-manager/`) for project-specific papers, with automatic scope resolution
16
+ - **DOI-to-BibTeX** — convert DOI to BibTeX citation in one command
17
+ - **Machine-readable output** — `--json` and `--jq` flags on all read commands for scripting and automation
18
+ - **Literature notes** — attach key-value annotations to any paper
19
+ - **Local-first** — SQLite + FAISS + filesystem, no cloud dependencies
20
+ - **Agent skill** — installable as a [coding agent skill](https://github.com/vercel-labs/skills) for agent-driven paper management
21
+
8
22
  ## Installation
9
23
 
10
24
  ```bash
@@ -47,6 +47,11 @@
47
47
  "defaultEmbeddingModelId": {
48
48
  "type": "string",
49
49
  "minLength": 1
50
+ },
51
+ "email": {
52
+ "type": "string",
53
+ "format": "email",
54
+ "pattern": "^(?!\\.)(?!.*\\.\\.)([A-Za-z0-9_'+\\-\\.]*)[A-Za-z0-9_+-]@([A-Za-z0-9][A-Za-z0-9\\-]*\\.)+[A-Za-z]{2,}$"
50
55
  }
51
56
  },
52
57
  "required": ["embeddingModels"],
@@ -1,9 +1,10 @@
1
1
  import * as fs from "node:fs";
2
+ import * as os from "node:os";
2
3
  import * as path from "node:path";
3
4
  import chalk from "chalk";
4
5
  import cliProgress from "cli-progress";
5
6
  import { Command } from "commander";
6
- import { getFilesDir, getModelConfig, getProjectDataDir, getUserDataDir, getVectorStoreDir, } from "../config/index.js";
7
+ import { getConfig, getFilesDir, getModelConfig, getProjectDataDir, getUserDataDir, getVectorStoreDir, } from "../config/index.js";
7
8
  import * as projectKb from "../db/project/knowledge-bases.js";
8
9
  import * as projectLit from "../db/project/literatures.js";
9
10
  import * as userKb from "../db/user/knowledge-bases.js";
@@ -12,6 +13,7 @@ import { extractContent, extractPdfMetadata } from "../extractor/index.js";
12
13
  import { convertPdfToMarkdown, isOpendataLoaderAvailable, removeImageDir, saveConvertResult, } from "../extractor/markdown.js";
13
14
  import { log } from "../logger.js";
14
15
  import { splitDocuments } from "../text-splitter.js";
16
+ import { downloadPdf, lookupDoi, normalizeDoi, UnpaywallError } from "../unpaywall/index.js";
15
17
  import { addDocuments, createVectorStore } from "../vector-store/index.js";
16
18
  import { outputJson } from "./output.js";
17
19
  function resolveKnowledgeBase(id) {
@@ -33,11 +35,21 @@ export function createLiteratureCommand() {
33
35
  const lit = new Command("lit").description("Manage literatures");
34
36
  // ─── lit add ───────────────────────────────────────────────
35
37
  lit
36
- .command("add <knowledge-base-id> <lit-path>")
37
- .description("Add a literature from a file (PDF, TXT, MD, TEX, etc.)")
38
+ .command("add <knowledge-base-id> [lit-path]")
39
+ .description("Add a literature from a file (PDF, TXT, MD, TEX, etc.) or by DOI via Unpaywall")
38
40
  .option("-t, --title <title>", "Literature title")
39
41
  .option("-f, --force", "Force add even if a literature with the same DOI already exists")
42
+ .option("-d, --doi <doi>", "Add paper by DOI (downloads Open Access PDF via Unpaywall)")
40
43
  .action(async (kbId, litPath, options) => {
44
+ // Mutual exclusivity check
45
+ if (litPath && options.doi) {
46
+ log.error("Cannot specify both <lit-path> and --doi. Use one or the other.");
47
+ process.exit(1);
48
+ }
49
+ if (!litPath && !options.doi) {
50
+ log.error("Either <lit-path> or --doi is required.");
51
+ process.exit(1);
52
+ }
41
53
  const resolved = resolveKnowledgeBase(kbId);
42
54
  if (!resolved) {
43
55
  log.error(`Knowledge base not found: ${kbId}`);
@@ -46,108 +58,211 @@ export function createLiteratureCommand() {
46
58
  const { kb, scope } = resolved;
47
59
  const baseDir = getBaseDir(scope);
48
60
  const litOps = getLitOps(scope);
49
- // Resolve file path
50
- const absolutePath = path.resolve(litPath);
51
- if (!fs.existsSync(absolutePath)) {
52
- log.error(`File not found: ${absolutePath}`);
53
- process.exit(1);
54
- }
55
- log.info("Extracting content...");
56
- const docs = await extractContent(absolutePath);
57
- log.step(`Extracted ${String(docs.length)} pages.`);
58
- // Extract PDF metadata if available
59
- const isPdf = absolutePath.toLowerCase().endsWith(".pdf");
60
- const pdfMeta = isPdf ? await extractPdfMetadata(absolutePath) : null;
61
- if (pdfMeta) {
62
- const hasAny = pdfMeta.title ?? pdfMeta.author ?? pdfMeta.doi ?? pdfMeta.subject;
63
- if (hasAny || pdfMeta.keywords.length > 0) {
64
- log.info("Extracted PDF metadata:");
65
- if (pdfMeta.title)
66
- log.step(`Title: ${pdfMeta.title}`);
67
- if (pdfMeta.author)
68
- log.step(`Author: ${pdfMeta.author}`);
69
- if (pdfMeta.subject)
70
- log.step(`Subject: ${pdfMeta.subject}`);
71
- if (pdfMeta.doi)
72
- log.step(`DOI: ${pdfMeta.doi}`);
73
- if (pdfMeta.keywords.length > 0)
74
- log.step(`Keywords: ${pdfMeta.keywords.join(", ")}`);
75
- if (pdfMeta.creationDate)
76
- log.step(`Created: ${pdfMeta.creationDate.toISOString()}`);
77
- if (pdfMeta.creator)
78
- log.step(`Creator: ${pdfMeta.creator}`);
61
+ let absolutePath;
62
+ let tempDir = null;
63
+ let doiFromFlag = null;
64
+ let unpaywallMeta = null;
65
+ if (options.doi) {
66
+ // ─── DOI mode: lookup Unpaywall and download OA PDF ───
67
+ const normalizedDoi = normalizeDoi(options.doi);
68
+ doiFromFlag = normalizedDoi;
69
+ const email = getConfig("email");
70
+ if (!email) {
71
+ log.error('Email is required for Unpaywall API. Set it with: paper config set email "you@example.com"');
72
+ process.exit(1);
79
73
  }
80
- }
81
- // Check for duplicate DOI in the knowledge base
82
- if (pdfMeta?.doi && !options.force) {
83
- const existing = litOps.findLiteratureByDoi(kbId, pdfMeta.doi);
84
- if (existing) {
85
- log.error(`A literature with DOI "${pdfMeta.doi}" already exists in this knowledge base: ${existing.id} (${existing.title})`);
86
- log.info("Use --force to add anyway.");
74
+ // Check for duplicate DOI before downloading
75
+ if (!options.force) {
76
+ const existing = litOps.findLiteratureByDoi(kbId, normalizedDoi);
77
+ if (existing) {
78
+ log.error(`A literature with DOI "${normalizedDoi}" already exists in this knowledge base: ${existing.id} (${existing.title})`);
79
+ log.info("Use --force to add anyway.");
80
+ process.exit(1);
81
+ }
82
+ }
83
+ log.info(`Looking up DOI: ${normalizedDoi}`);
84
+ try {
85
+ unpaywallMeta = await lookupDoi(normalizedDoi, email);
86
+ }
87
+ catch (err) {
88
+ if (err instanceof UnpaywallError) {
89
+ log.error(err.message);
90
+ }
91
+ else {
92
+ log.error(`Unpaywall lookup failed: ${err instanceof Error ? err.message : String(err)}`);
93
+ }
94
+ process.exit(1);
95
+ }
96
+ if (!unpaywallMeta.is_oa) {
97
+ log.error(`Paper is not Open Access (status: ${unpaywallMeta.oa_status}).`);
98
+ log.info(`Add it manually: paper lit add ${kbId} <file>`);
99
+ process.exit(1);
100
+ }
101
+ const pdfUrl = unpaywallMeta.best_oa_location?.url_for_pdf;
102
+ if (!pdfUrl) {
103
+ const landingPage = unpaywallMeta.best_oa_location?.url_for_landing_page;
104
+ log.error("Paper is Open Access but no direct PDF URL is available.");
105
+ if (landingPage) {
106
+ log.info(`Landing page: ${landingPage}`);
107
+ }
108
+ log.info(`Download the PDF manually and use: paper lit add ${kbId} <file>`);
109
+ process.exit(1);
110
+ }
111
+ // Show Unpaywall metadata
112
+ log.info("Unpaywall metadata:");
113
+ if (unpaywallMeta.title)
114
+ log.step(`Title: ${unpaywallMeta.title}`);
115
+ if (unpaywallMeta.z_authors && unpaywallMeta.z_authors.length > 0) {
116
+ log.step(`Authors: ${unpaywallMeta.z_authors.map((a) => a.raw_author_name).join(", ")}`);
117
+ }
118
+ if (unpaywallMeta.journal_name)
119
+ log.step(`Journal: ${unpaywallMeta.journal_name}`);
120
+ if (unpaywallMeta.year)
121
+ log.step(`Year: ${String(unpaywallMeta.year)}`);
122
+ log.step(`OA Status: ${unpaywallMeta.oa_status}`);
123
+ // Download PDF to temp location
124
+ tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "paper-unpaywall-"));
125
+ absolutePath = path.join(tempDir, `${normalizedDoi.replace(/\//g, "_")}.pdf`);
126
+ log.info("Downloading PDF...");
127
+ try {
128
+ await downloadPdf(pdfUrl, absolutePath);
129
+ }
130
+ catch (err) {
131
+ fs.rmSync(tempDir, { recursive: true, force: true });
132
+ if (err instanceof UnpaywallError) {
133
+ log.error(err.message);
134
+ }
135
+ else {
136
+ log.error(`PDF download failed: ${err instanceof Error ? err.message : String(err)}`);
137
+ }
87
138
  process.exit(1);
88
139
  }
140
+ log.step("PDF downloaded.");
89
141
  }
90
- const title = options.title ?? pdfMeta?.title ?? path.basename(litPath, path.extname(litPath));
91
- // Create literature record
92
- const literature = litOps.createLiterature({
93
- title,
94
- titleTranslation: null,
95
- author: pdfMeta?.author ?? null,
96
- abstract: pdfMeta?.subject ?? null,
97
- summary: null,
98
- keywords: pdfMeta?.keywords ?? [],
99
- url: null,
100
- doi: pdfMeta?.doi ?? null,
101
- notes: {},
102
- knowledgeBaseId: kbId,
103
- });
104
- // Copy file to storage
105
- const filesDir = getFilesDir(baseDir);
106
- const ext = path.extname(litPath);
107
- fs.mkdirSync(filesDir, { recursive: true });
108
- fs.copyFileSync(absolutePath, path.join(filesDir, `${literature.id}${ext}`));
109
- // Convert PDF to Markdown if opendataloader is available
110
- if (isPdf && (await isOpendataLoaderAvailable())) {
111
- const result = await convertPdfToMarkdown(absolutePath);
112
- if (result) {
113
- saveConvertResult(filesDir, literature.id, result);
114
- log.step("Converted to Markdown via opendataloader-pdf.");
142
+ else {
143
+ // ─── File mode (existing behavior) ────────────────────
144
+ // litPath is guaranteed to be defined here by the mutual exclusivity check above
145
+ const filePath = litPath ?? "";
146
+ absolutePath = path.resolve(filePath);
147
+ if (!fs.existsSync(absolutePath)) {
148
+ log.error(`File not found: ${absolutePath}`);
149
+ process.exit(1);
115
150
  }
116
151
  }
117
- // Split text and add to vector store
118
- log.info("Splitting text...");
119
- const splitDocs = splitDocuments(docs, { chunkSize: 1000, chunkOverlap: 200 });
120
- log.step(`Created ${String(splitDocs.length)} chunks.`);
121
- // Add literature ID metadata to each chunk
122
- for (const doc of splitDocs) {
123
- doc.metadata = { ...doc.metadata, literatureId: literature.id };
124
- }
125
- const vectorDir = path.join(getVectorStoreDir(baseDir), kbId);
126
- const modelConfig = getModelConfig(kb.embeddingModelId);
127
- log.info("Embedding and storing vectors...");
128
- const bar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
129
- bar.start(splitDocs.length, 0);
130
- // Check if both FAISS index files exist (not just the directory)
131
- const hasIndex = fs.existsSync(path.join(vectorDir, "faiss.index")) &&
132
- fs.existsSync(path.join(vectorDir, "docstore.json"));
133
- if (hasIndex) {
134
- await addDocuments(splitDocs, modelConfig, vectorDir);
152
+ // ─── Shared flow ──────────────────────────────────────
153
+ try {
154
+ log.info("Extracting content...");
155
+ const docs = await extractContent(absolutePath);
156
+ log.step(`Extracted ${String(docs.length)} pages.`);
157
+ // Extract PDF metadata if available
158
+ const isPdf = absolutePath.toLowerCase().endsWith(".pdf");
159
+ const pdfMeta = isPdf ? await extractPdfMetadata(absolutePath) : null;
160
+ if (pdfMeta) {
161
+ const hasAny = pdfMeta.title ?? pdfMeta.author ?? pdfMeta.doi ?? pdfMeta.subject;
162
+ if (hasAny || pdfMeta.keywords.length > 0) {
163
+ log.info("Extracted PDF metadata:");
164
+ if (pdfMeta.title)
165
+ log.step(`Title: ${pdfMeta.title}`);
166
+ if (pdfMeta.author)
167
+ log.step(`Author: ${pdfMeta.author}`);
168
+ if (pdfMeta.subject)
169
+ log.step(`Subject: ${pdfMeta.subject}`);
170
+ if (pdfMeta.doi)
171
+ log.step(`DOI: ${pdfMeta.doi}`);
172
+ if (pdfMeta.keywords.length > 0)
173
+ log.step(`Keywords: ${pdfMeta.keywords.join(", ")}`);
174
+ if (pdfMeta.creationDate)
175
+ log.step(`Created: ${pdfMeta.creationDate.toISOString()}`);
176
+ if (pdfMeta.creator)
177
+ log.step(`Creator: ${pdfMeta.creator}`);
178
+ }
179
+ }
180
+ // Check for duplicate DOI (file mode only — DOI mode already checked above)
181
+ const effectiveDoi = doiFromFlag ?? pdfMeta?.doi ?? null;
182
+ if (effectiveDoi && !doiFromFlag && !options.force) {
183
+ const existing = litOps.findLiteratureByDoi(kbId, effectiveDoi);
184
+ if (existing) {
185
+ log.error(`A literature with DOI "${effectiveDoi}" already exists in this knowledge base: ${existing.id} (${existing.title})`);
186
+ log.info("Use --force to add anyway.");
187
+ process.exit(1);
188
+ }
189
+ }
190
+ // Resolve metadata: CLI option > Unpaywall > PDF metadata > fallback
191
+ const unpaywallAuthors = unpaywallMeta?.z_authors && unpaywallMeta.z_authors.length > 0
192
+ ? unpaywallMeta.z_authors.map((a) => a.raw_author_name).join(", ")
193
+ : null;
194
+ const title = options.title ??
195
+ unpaywallMeta?.title ??
196
+ pdfMeta?.title ??
197
+ (litPath
198
+ ? path.basename(litPath, path.extname(litPath))
199
+ : (effectiveDoi ?? "Untitled"));
200
+ // Create literature record
201
+ const literature = litOps.createLiterature({
202
+ title,
203
+ titleTranslation: null,
204
+ author: pdfMeta?.author ?? unpaywallAuthors,
205
+ abstract: pdfMeta?.subject ?? null,
206
+ summary: null,
207
+ keywords: pdfMeta?.keywords ?? [],
208
+ url: null,
209
+ doi: effectiveDoi,
210
+ notes: {},
211
+ knowledgeBaseId: kbId,
212
+ });
213
+ // Copy file to storage
214
+ const filesDir = getFilesDir(baseDir);
215
+ const ext = path.extname(absolutePath);
216
+ fs.mkdirSync(filesDir, { recursive: true });
217
+ fs.copyFileSync(absolutePath, path.join(filesDir, `${literature.id}${ext}`));
218
+ // Convert PDF to Markdown if opendataloader is available
219
+ if (isPdf && (await isOpendataLoaderAvailable())) {
220
+ const result = await convertPdfToMarkdown(absolutePath);
221
+ if (result) {
222
+ saveConvertResult(filesDir, literature.id, result);
223
+ log.step("Converted to Markdown via opendataloader-pdf.");
224
+ }
225
+ }
226
+ // Split text and add to vector store
227
+ log.info("Splitting text...");
228
+ const splitDocs = splitDocuments(docs, { chunkSize: 1000, chunkOverlap: 200 });
229
+ log.step(`Created ${String(splitDocs.length)} chunks.`);
230
+ // Add literature ID metadata to each chunk
231
+ for (const doc of splitDocs) {
232
+ doc.metadata = { ...doc.metadata, literatureId: literature.id };
233
+ }
234
+ const vectorDir = path.join(getVectorStoreDir(baseDir), kbId);
235
+ const modelConfig = getModelConfig(kb.embeddingModelId);
236
+ log.info("Embedding and storing vectors...");
237
+ const bar = new cliProgress.SingleBar({}, cliProgress.Presets.shades_classic);
238
+ bar.start(splitDocs.length, 0);
239
+ // Check if both FAISS index files exist (not just the directory)
240
+ const hasIndex = fs.existsSync(path.join(vectorDir, "faiss.index")) &&
241
+ fs.existsSync(path.join(vectorDir, "docstore.json"));
242
+ if (hasIndex) {
243
+ await addDocuments(splitDocs, modelConfig, vectorDir);
244
+ }
245
+ else {
246
+ await createVectorStore(splitDocs, modelConfig, vectorDir);
247
+ }
248
+ bar.update(splitDocs.length);
249
+ bar.stop();
250
+ log.success(`Literature added: ${literature.id}`);
251
+ log.label("Title:", literature.title);
252
+ if (literature.author)
253
+ log.label("Author:", literature.author);
254
+ if (literature.abstract)
255
+ log.label("Abstract:", literature.abstract);
256
+ if (literature.doi)
257
+ log.label("DOI:", literature.doi);
258
+ if (literature.keywords.length > 0)
259
+ log.label("Keywords:", literature.keywords.join(", "));
260
+ }
261
+ finally {
262
+ if (tempDir) {
263
+ fs.rmSync(tempDir, { recursive: true, force: true });
264
+ }
135
265
  }
136
- else {
137
- await createVectorStore(splitDocs, modelConfig, vectorDir);
138
- }
139
- bar.update(splitDocs.length);
140
- bar.stop();
141
- log.success(`Literature added: ${literature.id}`);
142
- log.label("Title:", literature.title);
143
- if (literature.author)
144
- log.label("Author:", literature.author);
145
- if (literature.abstract)
146
- log.label("Abstract:", literature.abstract);
147
- if (literature.doi)
148
- log.label("DOI:", literature.doi);
149
- if (literature.keywords.length > 0)
150
- log.label("Keywords:", literature.keywords.join(", "));
151
266
  });
152
267
  // ─── lit convert ────────────────────────────────────────────
153
268
  lit
@@ -5,12 +5,45 @@ import * as z from "zod";
5
5
  import { EmbeddingModelConfigSchema } from "../types/index.js";
6
6
  // ─── Path Utilities ─────────────────────────────────────────
7
7
  const USER_DATA_DIR = path.join(os.homedir(), ".paper-manager");
8
- const PROJECT_DATA_DIR = path.resolve(".paper-manager");
8
+ const DIR_NAME = ".paper-manager";
9
+ function findProjectDataDir() {
10
+ let dir = process.cwd();
11
+ while (true) {
12
+ const candidate = path.join(dir, DIR_NAME);
13
+ if (fs.existsSync(candidate))
14
+ return candidate;
15
+ const parent = path.dirname(dir);
16
+ if (parent === dir)
17
+ break;
18
+ dir = parent;
19
+ }
20
+ // Fallback: CWD (no .paper-manager/ found up the tree)
21
+ return path.resolve(DIR_NAME);
22
+ }
23
+ let cachedProjectDataDir;
9
24
  export function getUserDataDir() {
10
25
  return USER_DATA_DIR;
11
26
  }
27
+ /**
28
+ * Returns the project-level `.paper-manager/` directory path, traversing
29
+ * up from CWD. Falls back to CWD if not found. Result is cached per process.
30
+ */
12
31
  export function getProjectDataDir() {
13
- return PROJECT_DATA_DIR;
32
+ if (cachedProjectDataDir === undefined) {
33
+ cachedProjectDataDir = findProjectDataDir();
34
+ }
35
+ return cachedProjectDataDir;
36
+ }
37
+ /** @internal Reset cached project data dir. For testing only. */
38
+ export function resetProjectDataDirCache() {
39
+ cachedProjectDataDir = undefined;
40
+ }
41
+ /**
42
+ * Returns CWD-based `.paper-manager/` path without traversal.
43
+ * Used by `config init` to always create in the current directory.
44
+ */
45
+ export function getProjectInitDir() {
46
+ return path.resolve(DIR_NAME);
14
47
  }
15
48
  export function getFilesDir(base) {
16
49
  return path.join(base, "files");
@@ -22,12 +55,13 @@ function getUserConfigPath() {
22
55
  return path.join(USER_DATA_DIR, "config.json");
23
56
  }
24
57
  function getProjectConfigPath() {
25
- return path.join(PROJECT_DATA_DIR, "config.json");
58
+ return path.join(getProjectDataDir(), "config.json");
26
59
  }
27
60
  // ─── Config Schema Map ─────────────────────────────────────
28
61
  const configSchemas = {
29
62
  embeddingModels: z.record(z.string().min(1), EmbeddingModelConfigSchema),
30
63
  defaultEmbeddingModelId: z.string().min(1),
64
+ email: z.email(),
31
65
  };
32
66
  // ─── Config File I/O ────────────────────────────────────────
33
67
  export function readConfigFile(filePath) {
@@ -1,9 +1,9 @@
1
1
  import * as fs from "node:fs";
2
2
  import * as path from "node:path";
3
3
  import { initializeDatabase, openDatabase } from "../db/index.js";
4
- import { getFilesDir, getProjectDataDir, getUserDataDir, getVectorStoreDir, writeConfigFile, } from "./index.js";
4
+ import { getFilesDir, getProjectInitDir, getUserDataDir, getVectorStoreDir, writeConfigFile, } from "./index.js";
5
5
  export function initScope(options) {
6
- const baseDir = options?.user ? getUserDataDir() : getProjectDataDir();
6
+ const baseDir = options?.user ? getUserDataDir() : getProjectInitDir();
7
7
  const items = [];
8
8
  // 1. Base directory
9
9
  if (fs.existsSync(baseDir)) {
@@ -32,4 +32,5 @@ export const ConfigSchema = z.object({
32
32
  $schema: z.string().optional(),
33
33
  embeddingModels: z.record(z.string().min(1), EmbeddingModelConfigSchema).default({}),
34
34
  defaultEmbeddingModelId: z.string().min(1).optional(),
35
+ email: z.email().optional(),
35
36
  });
@@ -0,0 +1,71 @@
1
+ import { writeFile } from "node:fs/promises";
2
+ import * as z from "zod";
3
+ // ─── Unpaywall Response Schema ─────────────────────────────
4
+ const UnpaywallOaLocationSchema = z.object({
5
+ url_for_pdf: z.string().nullable(),
6
+ url_for_landing_page: z.string().nullable(),
7
+ license: z.string().nullable(),
8
+ version: z.string().nullable(),
9
+ host_type: z.string().nullable(),
10
+ });
11
+ const UnpaywallAuthorSchema = z.object({
12
+ raw_author_name: z.string(),
13
+ });
14
+ const UnpaywallResponseSchema = z.object({
15
+ is_oa: z.boolean(),
16
+ oa_status: z.string(),
17
+ title: z.string().nullable().optional(),
18
+ z_authors: z.array(UnpaywallAuthorSchema).nullable().optional(),
19
+ published_date: z.string().nullable().optional(),
20
+ journal_name: z.string().nullable().optional(),
21
+ year: z.number().nullable().optional(),
22
+ publisher: z.string().nullable().optional(),
23
+ best_oa_location: UnpaywallOaLocationSchema.nullable(),
24
+ doi: z.string(),
25
+ });
26
+ export class UnpaywallError extends Error {
27
+ code;
28
+ constructor(message, code) {
29
+ super(message);
30
+ this.name = "UnpaywallError";
31
+ this.code = code;
32
+ }
33
+ }
34
+ // ─── DOI Normalization ─────────────────────────────────────
35
+ export function normalizeDoi(input) {
36
+ return input.replace(/^https?:\/\/(dx\.)?doi\.org\//, "").replace(/^doi:/i, "");
37
+ }
38
+ // ─── API Client ────────────────────────────────────────────
39
+ export async function lookupDoi(doi, email) {
40
+ const url = `https://api.unpaywall.org/v2/${encodeURIComponent(doi)}?email=${encodeURIComponent(email)}`;
41
+ const response = await fetch(url, {
42
+ headers: { Accept: "application/json" },
43
+ redirect: "follow",
44
+ });
45
+ if (response.status === 404) {
46
+ throw new UnpaywallError(`DOI not found in Unpaywall: ${doi}`, "not_found");
47
+ }
48
+ if (!response.ok) {
49
+ throw new UnpaywallError(`Unpaywall API error: HTTP ${String(response.status)}`, "api_error");
50
+ }
51
+ const json = await response.json();
52
+ const result = UnpaywallResponseSchema.safeParse(json);
53
+ if (!result.success) {
54
+ throw new UnpaywallError(`Invalid Unpaywall API response: ${result.error.message}`, "parse_error");
55
+ }
56
+ return result.data;
57
+ }
58
+ // ─── PDF Download ──────────────────────────────────────────
59
+ export async function downloadPdf(url, destPath) {
60
+ const response = await fetch(url, { redirect: "follow" });
61
+ if (!response.ok) {
62
+ throw new UnpaywallError(`Failed to download PDF: HTTP ${String(response.status)}`, "download_error");
63
+ }
64
+ const contentType = response.headers.get("content-type") ?? "";
65
+ if (!contentType.includes("application/pdf") &&
66
+ !contentType.includes("application/octet-stream")) {
67
+ throw new UnpaywallError(`Expected PDF but received: ${contentType}`, "download_error");
68
+ }
69
+ const buffer = new Uint8Array(await response.arrayBuffer());
70
+ await writeFile(destPath, buffer);
71
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "paper-manager",
3
- "version": "0.11.1",
3
+ "version": "0.12.0",
4
4
  "description": "A paper management system.",
5
5
  "keywords": [],
6
6
  "homepage": "https://github.com/EurFelux/paper-manager",