paper-manager 0.8.1 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/dist/ai/embed.js +0 -1
  2. package/dist/ai/index.js +0 -1
  3. package/dist/ai/provider.js +0 -1
  4. package/dist/commands/config.js +0 -1
  5. package/dist/commands/dep.js +48 -0
  6. package/dist/commands/knowledge-base.js +25 -3
  7. package/dist/commands/literature.js +98 -19
  8. package/dist/commands/util.js +0 -1
  9. package/dist/config/index.js +0 -1
  10. package/dist/config/init.js +0 -1
  11. package/dist/db/index.js +0 -1
  12. package/dist/db/operations/knowledge-bases.js +0 -1
  13. package/dist/db/operations/literatures.js +0 -1
  14. package/dist/db/project/knowledge-bases.js +0 -1
  15. package/dist/db/project/literatures.js +0 -1
  16. package/dist/db/schema.js +0 -1
  17. package/dist/db/test-utils.js +0 -1
  18. package/dist/db/user/knowledge-bases.js +0 -1
  19. package/dist/db/user/literatures.js +0 -1
  20. package/dist/extractor/index.js +0 -1
  21. package/dist/extractor/markdown.js +88 -0
  22. package/dist/extractor/pdf.js +23 -29
  23. package/dist/extractor/text.js +2 -4
  24. package/dist/index.js +2 -1
  25. package/dist/lifecycle.js +0 -1
  26. package/dist/logger.js +0 -1
  27. package/dist/migrations.js +0 -1
  28. package/dist/text-splitter.js +56 -0
  29. package/dist/types/index.js +0 -1
  30. package/dist/vector-store/embeddings.js +0 -1
  31. package/dist/vector-store/index.js +76 -12
  32. package/package.json +5 -6
  33. package/dist/ai/embed.d.ts +0 -3
  34. package/dist/ai/embed.js.map +0 -1
  35. package/dist/ai/embed.test.d.ts +0 -1
  36. package/dist/ai/embed.test.js +0 -93
  37. package/dist/ai/embed.test.js.map +0 -1
  38. package/dist/ai/index.d.ts +0 -2
  39. package/dist/ai/index.js.map +0 -1
  40. package/dist/ai/provider.d.ts +0 -3
  41. package/dist/ai/provider.js.map +0 -1
  42. package/dist/commands/config.d.ts +0 -2
  43. package/dist/commands/config.js.map +0 -1
  44. package/dist/commands/knowledge-base.d.ts +0 -2
  45. package/dist/commands/knowledge-base.js.map +0 -1
  46. package/dist/commands/literature.d.ts +0 -2
  47. package/dist/commands/literature.js.map +0 -1
  48. package/dist/commands/util.d.ts +0 -2
  49. package/dist/commands/util.js.map +0 -1
  50. package/dist/config/index.d.ts +0 -39
  51. package/dist/config/index.js.map +0 -1
  52. package/dist/config/index.test.d.ts +0 -1
  53. package/dist/config/index.test.js +0 -143
  54. package/dist/config/index.test.js.map +0 -1
  55. package/dist/config/init.d.ts +0 -10
  56. package/dist/config/init.js.map +0 -1
  57. package/dist/config/init.test.d.ts +0 -1
  58. package/dist/config/init.test.js +0 -61
  59. package/dist/config/init.test.js.map +0 -1
  60. package/dist/db/index.d.ts +0 -7
  61. package/dist/db/index.js.map +0 -1
  62. package/dist/db/index.test.d.ts +0 -1
  63. package/dist/db/index.test.js +0 -32
  64. package/dist/db/index.test.js.map +0 -1
  65. package/dist/db/operations/knowledge-bases.d.ts +0 -11
  66. package/dist/db/operations/knowledge-bases.js.map +0 -1
  67. package/dist/db/operations/knowledge-bases.test.d.ts +0 -1
  68. package/dist/db/operations/knowledge-bases.test.js +0 -71
  69. package/dist/db/operations/knowledge-bases.test.js.map +0 -1
  70. package/dist/db/operations/literatures.d.ts +0 -16
  71. package/dist/db/operations/literatures.js.map +0 -1
  72. package/dist/db/operations/literatures.test.d.ts +0 -1
  73. package/dist/db/operations/literatures.test.js +0 -156
  74. package/dist/db/operations/literatures.test.js.map +0 -1
  75. package/dist/db/project/knowledge-bases.d.ts +0 -10
  76. package/dist/db/project/knowledge-bases.js.map +0 -1
  77. package/dist/db/project/literatures.d.ts +0 -10
  78. package/dist/db/project/literatures.js.map +0 -1
  79. package/dist/db/schema.d.ts +0 -372
  80. package/dist/db/schema.js.map +0 -1
  81. package/dist/db/test-utils.d.ts +0 -6
  82. package/dist/db/test-utils.js.map +0 -1
  83. package/dist/db/user/knowledge-bases.d.ts +0 -10
  84. package/dist/db/user/knowledge-bases.js.map +0 -1
  85. package/dist/db/user/literatures.d.ts +0 -10
  86. package/dist/db/user/literatures.js.map +0 -1
  87. package/dist/extractor/index.d.ts +0 -6
  88. package/dist/extractor/index.js.map +0 -1
  89. package/dist/extractor/pdf.d.ts +0 -13
  90. package/dist/extractor/pdf.js.map +0 -1
  91. package/dist/extractor/pdf.test.d.ts +0 -1
  92. package/dist/extractor/pdf.test.js +0 -106
  93. package/dist/extractor/pdf.test.js.map +0 -1
  94. package/dist/extractor/text.d.ts +0 -2
  95. package/dist/extractor/text.js.map +0 -1
  96. package/dist/index.d.ts +0 -2
  97. package/dist/index.js.map +0 -1
  98. package/dist/lifecycle.d.ts +0 -1
  99. package/dist/lifecycle.js.map +0 -1
  100. package/dist/logger.d.ts +0 -24
  101. package/dist/logger.js.map +0 -1
  102. package/dist/migrations.d.ts +0 -5
  103. package/dist/migrations.js.map +0 -1
  104. package/dist/pdf/extractor.d.ts +0 -2
  105. package/dist/pdf/extractor.js +0 -18
  106. package/dist/pdf/extractor.js.map +0 -1
  107. package/dist/types/index.d.ts +0 -61
  108. package/dist/types/index.js.map +0 -1
  109. package/dist/types/index.test.d.ts +0 -1
  110. package/dist/types/index.test.js +0 -100
  111. package/dist/types/index.test.js.map +0 -1
  112. package/dist/vector-store/embeddings.d.ts +0 -8
  113. package/dist/vector-store/embeddings.js.map +0 -1
  114. package/dist/vector-store/index.d.ts +0 -6
  115. package/dist/vector-store/index.js.map +0 -1
package/dist/ai/embed.js CHANGED
@@ -29,4 +29,3 @@ export async function embedMany(config, texts) {
29
29
  }
30
30
  return embeddings;
31
31
  }
32
- //# sourceMappingURL=embed.js.map
package/dist/ai/index.js CHANGED
@@ -1,3 +1,2 @@
1
1
  export { embed, embedMany } from "./embed.js";
2
2
  export { createEmbeddingModel } from "./provider.js";
3
- //# sourceMappingURL=index.js.map
@@ -6,4 +6,3 @@ export function createEmbeddingModel(config) {
6
6
  });
7
7
  return openai.embedding(config.model);
8
8
  }
9
- //# sourceMappingURL=provider.js.map
@@ -80,4 +80,3 @@ export function createConfigCommand() {
80
80
  });
81
81
  return config;
82
82
  }
83
- //# sourceMappingURL=config.js.map
@@ -0,0 +1,48 @@
1
+ import chalk from "chalk";
2
+ import { Command } from "commander";
3
+ import { checkOpendataLoaderStatus } from "../extractor/markdown.js";
4
+ import { log } from "../logger.js";
5
+ const KNOWN_DEPS = new Set(["opendataloader"]);
6
+ export function createDepCommand() {
7
+ const dep = new Command("dep").description("Manage external dependencies");
8
+ dep
9
+ .command("check <dep>")
10
+ .description("Check if an external dependency is available")
11
+ .action(async (depName) => {
12
+ if (!KNOWN_DEPS.has(depName)) {
13
+ log.error(`Unknown dependency: ${depName}`);
14
+ log.step(`Available: ${[...KNOWN_DEPS].join(", ")}`);
15
+ process.exit(1);
16
+ }
17
+ if (depName === "opendataloader") {
18
+ await checkOpendataLoader();
19
+ }
20
+ });
21
+ return dep;
22
+ }
23
+ async function checkOpendataLoader() {
24
+ log.info("Checking opendataloader-pdf...");
25
+ const status = await checkOpendataLoaderStatus();
26
+ const pkgIcon = status.packageInstalled ? chalk.green("✔") : chalk.red("✖");
27
+ const javaIcon = status.javaAvailable ? chalk.green("✔") : chalk.red("✖");
28
+ log.plain(` ${pkgIcon} @opendataloader/pdf package`);
29
+ if (status.javaAvailable) {
30
+ log.plain(` ${javaIcon} Java runtime (${status.javaVersion})`);
31
+ }
32
+ else {
33
+ log.plain(` ${javaIcon} Java runtime (not found)`);
34
+ }
35
+ log.newline();
36
+ if (status.packageInstalled && status.javaAvailable) {
37
+ log.success("opendataloader-pdf is ready.");
38
+ }
39
+ else {
40
+ log.error("opendataloader-pdf is not available.");
41
+ if (!status.packageInstalled) {
42
+ log.step("Install: pnpm add @opendataloader/pdf");
43
+ }
44
+ if (!status.javaAvailable) {
45
+ log.step("Install Java 11+: https://adoptium.net/");
46
+ }
47
+ }
48
+ }
@@ -57,6 +57,7 @@ export function createKnowledgeBaseCommand() {
57
57
  .description("List knowledge bases")
58
58
  .option("--user", "List user knowledge bases only")
59
59
  .option("--all", "List all knowledge bases (default)")
60
+ .option("--json", "Output as JSON")
60
61
  .action((options) => {
61
62
  let results = [];
62
63
  if (options.user) {
@@ -68,7 +69,16 @@ export function createKnowledgeBaseCommand() {
68
69
  results = [...projectKbs, ...userKbs];
69
70
  }
70
71
  if (results.length === 0) {
71
- log.info("No knowledge bases found.");
72
+ if (options.json) {
73
+ log.plain("[]");
74
+ }
75
+ else {
76
+ log.info("No knowledge bases found.");
77
+ }
78
+ return;
79
+ }
80
+ if (options.json) {
81
+ log.plain(JSON.stringify(results, null, 2));
72
82
  return;
73
83
  }
74
84
  for (const kb of results) {
@@ -151,6 +161,7 @@ export function createKnowledgeBaseCommand() {
151
161
  kb.command("query <id> <query-text>")
152
162
  .description("Query a knowledge base")
153
163
  .option("-k, --top-k <number>", "Number of results", "5")
164
+ .option("--json", "Output as JSON")
154
165
  .action(async (id, queryText, options) => {
155
166
  const resolved = resolveKnowledgeBase(id);
156
167
  if (!resolved) {
@@ -168,7 +179,19 @@ export function createKnowledgeBaseCommand() {
168
179
  const k = parseInt(options.topK, 10);
169
180
  const results = await queryVectorStore(modelConfig, vectorDir, queryText, k);
170
181
  if (results.length === 0) {
171
- log.info("No results found.");
182
+ if (options.json) {
183
+ log.plain("[]");
184
+ }
185
+ else {
186
+ log.info("No results found.");
187
+ }
188
+ return;
189
+ }
190
+ if (options.json) {
191
+ const output = results
192
+ .filter((doc) => doc != null)
193
+ .map((doc) => ({ pageContent: doc.pageContent, metadata: doc.metadata }));
194
+ log.plain(JSON.stringify(output, null, 2));
172
195
  return;
173
196
  }
174
197
  for (let i = 0; i < results.length; i++) {
@@ -185,4 +208,3 @@ export function createKnowledgeBaseCommand() {
185
208
  });
186
209
  return kb;
187
210
  }
188
- //# sourceMappingURL=knowledge-base.js.map
@@ -1,6 +1,5 @@
1
1
  import * as fs from "node:fs";
2
2
  import * as path from "node:path";
3
- import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters";
4
3
  import chalk from "chalk";
5
4
  import cliProgress from "cli-progress";
6
5
  import { Command } from "commander";
@@ -10,8 +9,10 @@ import * as projectLit from "../db/project/literatures.js";
10
9
  import * as userKb from "../db/user/knowledge-bases.js";
11
10
  import * as userLit from "../db/user/literatures.js";
12
11
  import { extractContent, extractPdfMetadata } from "../extractor/index.js";
12
+ import { convertPdfToMarkdown, isOpendataLoaderAvailable } from "../extractor/markdown.js";
13
13
  import { log } from "../logger.js";
14
- import { createVectorStore, loadVectorStore } from "../vector-store/index.js";
14
+ import { splitDocuments } from "../text-splitter.js";
15
+ import { addDocuments, createVectorStore } from "../vector-store/index.js";
15
16
  function resolveKnowledgeBase(id) {
16
17
  const pkb = projectKb.getKnowledgeBase(id);
17
18
  if (pkb)
@@ -94,13 +95,17 @@ export function createLiteratureCommand() {
94
95
  const ext = path.extname(litPath);
95
96
  fs.mkdirSync(filesDir, { recursive: true });
96
97
  fs.copyFileSync(absolutePath, path.join(filesDir, `${literature.id}${ext}`));
98
+ // Convert PDF to Markdown if opendataloader is available
99
+ if (isPdf && (await isOpendataLoaderAvailable())) {
100
+ const markdown = await convertPdfToMarkdown(absolutePath);
101
+ if (markdown) {
102
+ fs.writeFileSync(path.join(filesDir, `${literature.id}.md`), markdown, "utf-8");
103
+ log.step("Converted to Markdown via opendataloader-pdf.");
104
+ }
105
+ }
97
106
  // Split text and add to vector store
98
107
  log.info("Splitting text...");
99
- const splitter = new RecursiveCharacterTextSplitter({
100
- chunkSize: 1000,
101
- chunkOverlap: 200,
102
- });
103
- const splitDocs = await splitter.splitDocuments(docs);
108
+ const splitDocs = splitDocuments(docs, { chunkSize: 1000, chunkOverlap: 200 });
104
109
  log.step(`Created ${String(splitDocs.length)} chunks.`);
105
110
  // Add literature ID metadata to each chunk
106
111
  for (const doc of splitDocs) {
@@ -115,12 +120,9 @@ export function createLiteratureCommand() {
115
120
  const hasIndex = fs.existsSync(path.join(vectorDir, "faiss.index")) &&
116
121
  fs.existsSync(path.join(vectorDir, "docstore.json"));
117
122
  if (hasIndex) {
118
- const store = await loadVectorStore(modelConfig, vectorDir);
119
- await store.addDocuments(splitDocs);
120
- await store.save(vectorDir);
123
+ await addDocuments(splitDocs, modelConfig, vectorDir);
121
124
  }
122
125
  else {
123
- fs.mkdirSync(vectorDir, { recursive: true });
124
126
  await createVectorStore(splitDocs, modelConfig, vectorDir);
125
127
  }
126
128
  bar.update(splitDocs.length);
@@ -136,6 +138,40 @@ export function createLiteratureCommand() {
136
138
  if (literature.keywords.length > 0)
137
139
  log.label("Keywords:", literature.keywords.join(", "));
138
140
  });
141
+ // ─── lit convert ────────────────────────────────────────────
142
+ lit
143
+ .command("convert <id>")
144
+ .description("Convert an existing literature PDF to Markdown via opendataloader-pdf")
145
+ .action(async (id) => {
146
+ const found = findLiteratureWithScope(id);
147
+ if (!found) {
148
+ log.error(`Literature not found: ${id}`);
149
+ process.exit(1);
150
+ }
151
+ const filesDir = getFilesDir(getBaseDir(found.scope));
152
+ const pdfFile = findLiteratureFiles(filesDir, id).find((f) => f.endsWith(".pdf"));
153
+ if (!pdfFile) {
154
+ log.error(`No PDF file found for literature: ${id}`);
155
+ process.exit(1);
156
+ }
157
+ const mdPath = path.join(filesDir, `${id}.md`);
158
+ if (fs.existsSync(mdPath)) {
159
+ log.error("Markdown file already exists. Delete it first to reconvert.");
160
+ process.exit(1);
161
+ }
162
+ if (!(await isOpendataLoaderAvailable())) {
163
+ log.error("opendataloader-pdf is not available. Run `paper dep check opendataloader` for details.");
164
+ process.exit(1);
165
+ }
166
+ log.info("Converting PDF to Markdown...");
167
+ const markdown = await convertPdfToMarkdown(path.join(filesDir, pdfFile));
168
+ if (!markdown) {
169
+ log.error("Conversion failed.");
170
+ process.exit(1);
171
+ }
172
+ fs.writeFileSync(mdPath, markdown, "utf-8");
173
+ log.success(`Markdown saved: ${id}.md`);
174
+ });
139
175
  // ─── lit remove ────────────────────────────────────────────
140
176
  lit
141
177
  .command("remove <knowledge-base-id> <id>")
@@ -211,7 +247,8 @@ export function createLiteratureCommand() {
211
247
  lit
212
248
  .command("list <knowledge-base-id>")
213
249
  .description("List literatures in a knowledge base")
214
- .action((kbId) => {
250
+ .option("--json", "Output as JSON")
251
+ .action((kbId, options) => {
215
252
  const resolved = resolveKnowledgeBase(kbId);
216
253
  if (!resolved) {
217
254
  log.error(`Knowledge base not found: ${kbId}`);
@@ -220,14 +257,26 @@ export function createLiteratureCommand() {
220
257
  const litOps = getLitOps(resolved.scope);
221
258
  const literatures = litOps.listLiteratures(kbId);
222
259
  if (literatures.length === 0) {
223
- log.info("No literatures found.");
260
+ if (options.json) {
261
+ log.plain("[]");
262
+ }
263
+ else {
264
+ log.info("No literatures found.");
265
+ }
266
+ return;
267
+ }
268
+ if (options.json) {
269
+ log.plain(JSON.stringify(literatures, null, 2));
224
270
  return;
225
271
  }
272
+ const filesDir = getFilesDir(getBaseDir(resolved.scope));
226
273
  for (const l of literatures) {
227
274
  log.header(l.id);
228
275
  log.label("Title:", l.title);
229
276
  if (l.author)
230
277
  log.label("Author:", l.author);
278
+ const files = findLiteratureFiles(filesDir, l.id);
279
+ log.label("Files:", files.length > 0 ? files.join(", ") : "(none)");
231
280
  log.label("Created:", l.createdAt.toISOString());
232
281
  log.newline();
233
282
  }
@@ -241,6 +290,7 @@ export function createLiteratureCommand() {
241
290
  .option("-a, --author <author>", "Author substring")
242
291
  .option("-k, --keyword <keyword>", "Keyword substring")
243
292
  .option("--doi <doi>", "DOI substring")
293
+ .option("--json", "Output as JSON")
244
294
  .action((kbId, options) => {
245
295
  const resolved = resolveKnowledgeBase(kbId);
246
296
  if (!resolved) {
@@ -262,7 +312,16 @@ export function createLiteratureCommand() {
262
312
  doi: options.doi,
263
313
  });
264
314
  if (results.length === 0) {
265
- log.info("No literatures found.");
315
+ if (options.json) {
316
+ log.plain("[]");
317
+ }
318
+ else {
319
+ log.info("No literatures found.");
320
+ }
321
+ return;
322
+ }
323
+ if (options.json) {
324
+ log.plain(JSON.stringify(results, null, 2));
266
325
  return;
267
326
  }
268
327
  for (const l of results) {
@@ -283,7 +342,8 @@ export function createLiteratureCommand() {
283
342
  lit
284
343
  .command("show <knowledge-base-id> <id>")
285
344
  .description("Show literature details")
286
- .action((kbId, id) => {
345
+ .option("--json", "Output as JSON")
346
+ .action((kbId, id, options) => {
287
347
  const resolved = resolveKnowledgeBase(kbId);
288
348
  if (!resolved) {
289
349
  log.error(`Knowledge base not found: ${kbId}`);
@@ -295,19 +355,29 @@ export function createLiteratureCommand() {
295
355
  log.error(`Literature not found: ${id}`);
296
356
  process.exit(1);
297
357
  }
298
- printLiterature(literature);
358
+ if (options.json) {
359
+ log.plain(JSON.stringify(literature, null, 2));
360
+ return;
361
+ }
362
+ const filesDir = getFilesDir(getBaseDir(resolved.scope));
363
+ printLiterature(literature, filesDir);
299
364
  });
300
365
  // ─── lit note ──────────────────────────────────────────────
301
366
  const note = lit.command("note").description("Manage literature notes");
302
367
  note
303
368
  .command("list <literature-id>")
304
369
  .description("List all notes for a literature")
305
- .action((litId) => {
370
+ .option("--json", "Output as JSON")
371
+ .action((litId, options) => {
306
372
  const literature = findLiterature(litId);
307
373
  if (!literature) {
308
374
  log.error(`Literature not found: ${litId}`);
309
375
  process.exit(1);
310
376
  }
377
+ if (options.json) {
378
+ log.plain(JSON.stringify(literature.notes, null, 2));
379
+ return;
380
+ }
311
381
  const entries = Object.entries(literature.notes);
312
382
  if (entries.length === 0) {
313
383
  log.info("No notes found.");
@@ -363,7 +433,15 @@ function findLiteratureWithScope(id) {
363
433
  return { literature: uLit, scope: "user" };
364
434
  return null;
365
435
  }
366
- function printLiterature(lit) {
436
+ function findLiteratureFiles(filesDir, id) {
437
+ if (!fs.existsSync(filesDir))
438
+ return [];
439
+ return fs
440
+ .readdirSync(filesDir)
441
+ .filter((name) => name.startsWith(`${id}.`))
442
+ .sort();
443
+ }
444
+ function printLiterature(lit, filesDir) {
367
445
  log.header(lit.id);
368
446
  log.label("Title:", lit.title);
369
447
  if (lit.titleTranslation)
@@ -382,6 +460,8 @@ function printLiterature(lit) {
382
460
  log.label("DOI:", lit.doi);
383
461
  if (lit.knowledgeBaseId)
384
462
  log.label("Knowledge Base:", lit.knowledgeBaseId);
463
+ const files = findLiteratureFiles(filesDir, lit.id);
464
+ log.label("Files:", files.length > 0 ? files.join(", ") : "(none)");
385
465
  log.label("Created:", lit.createdAt.toISOString());
386
466
  log.label("Updated:", lit.updatedAt.toISOString());
387
467
  const noteEntries = Object.entries(lit.notes);
@@ -392,4 +472,3 @@ function printLiterature(lit) {
392
472
  }
393
473
  }
394
474
  }
395
- //# sourceMappingURL=literature.js.map
@@ -60,4 +60,3 @@ export function createUtilCommand() {
60
60
  });
61
61
  return util;
62
62
  }
63
- //# sourceMappingURL=util.js.map
@@ -104,4 +104,3 @@ export function getDefaultModelConfig() {
104
104
  }
105
105
  return getModelConfig(defaultId);
106
106
  }
107
- //# sourceMappingURL=index.js.map
@@ -52,4 +52,3 @@ export function initScope(options) {
52
52
  }
53
53
  return { baseDir, items };
54
54
  }
55
- //# sourceMappingURL=init.js.map
package/dist/db/index.js CHANGED
@@ -65,4 +65,3 @@ export function getProjectDb() {
65
65
  }
66
66
  return projectDb;
67
67
  }
68
- //# sourceMappingURL=index.js.map
@@ -48,4 +48,3 @@ export function deleteKnowledgeBase(db, id) {
48
48
  const result = db.delete(knowledgeBases).where(eq(knowledgeBases.id, id)).run();
49
49
  return result.changes > 0;
50
50
  }
51
- //# sourceMappingURL=knowledge-bases.js.map
@@ -99,4 +99,3 @@ export function deleteLiteraturesByKnowledgeBaseId(db, knowledgeBaseId) {
99
99
  export function getLiteraturesByKnowledgeBaseId(db, knowledgeBaseId) {
100
100
  return listLiteratures(db, knowledgeBaseId);
101
101
  }
102
- //# sourceMappingURL=literatures.js.map
@@ -15,4 +15,3 @@ export function updateKnowledgeBase(id, input) {
15
15
  export function deleteKnowledgeBase(id) {
16
16
  return ops.deleteKnowledgeBase(getProjectDb(), id);
17
17
  }
18
- //# sourceMappingURL=knowledge-bases.js.map
@@ -24,4 +24,3 @@ export function searchLiteratures(knowledgeBaseId, filters) {
24
24
  export function getLiteraturesByKnowledgeBaseId(knowledgeBaseId) {
25
25
  return ops.getLiteraturesByKnowledgeBaseId(getProjectDb(), knowledgeBaseId);
26
26
  }
27
- //# sourceMappingURL=literatures.js.map
package/dist/db/schema.js CHANGED
@@ -55,4 +55,3 @@ CREATE TABLE IF NOT EXISTS literatures (
55
55
  export const CREATE_LITERATURES_KB_INDEX = `
56
56
  CREATE INDEX IF NOT EXISTS idx_literatures_knowledge_base_id
57
57
  ON literatures(knowledge_base_id)`;
58
- //# sourceMappingURL=schema.js.map
@@ -9,4 +9,3 @@ export function createTestDb() {
9
9
  initializeDatabase(client);
10
10
  return drizzle(client);
11
11
  }
12
- //# sourceMappingURL=test-utils.js.map
@@ -15,4 +15,3 @@ export function updateKnowledgeBase(id, input) {
15
15
  export function deleteKnowledgeBase(id) {
16
16
  return ops.deleteKnowledgeBase(getUserDb(), id);
17
17
  }
18
- //# sourceMappingURL=knowledge-bases.js.map
@@ -24,4 +24,3 @@ export function searchLiteratures(knowledgeBaseId, filters) {
24
24
  export function getLiteraturesByKnowledgeBaseId(knowledgeBaseId) {
25
25
  return ops.getLiteraturesByKnowledgeBaseId(getUserDb(), knowledgeBaseId);
26
26
  }
27
- //# sourceMappingURL=literatures.js.map
@@ -20,4 +20,3 @@ export async function extractContent(filePath) {
20
20
  const ext = filePath.split(".").pop() ?? "unknown";
21
21
  throw new Error(`Unsupported file type: .${ext} (${String(mimeType)})`);
22
22
  }
23
- //# sourceMappingURL=index.js.map
@@ -0,0 +1,88 @@
1
+ import { execFile } from "node:child_process";
2
+ import { mkdirSync, readdirSync, readFileSync, rmSync } from "node:fs";
3
+ import { tmpdir } from "node:os";
4
+ import * as path from "node:path";
5
+ /**
6
+ * Check whether opendataloader-pdf is available (package installed + Java runtime).
7
+ * Result is cached after the first call.
8
+ */
9
+ export async function isOpendataLoaderAvailable() {
10
+ if (cachedAvailability !== undefined)
11
+ return cachedAvailability;
12
+ cachedAvailability = await detectAvailability();
13
+ return cachedAvailability;
14
+ }
15
+ /**
16
+ * Convert a PDF file to Markdown using opendataloader-pdf.
17
+ * Returns the markdown content on success, or null on failure.
18
+ */
19
+ export async function convertPdfToMarkdown(pdfPath) {
20
+ const outDir = path.join(tmpdir(), `odl-${Date.now()}`);
21
+ mkdirSync(outDir, { recursive: true });
22
+ try {
23
+ const { convert } = await import("@opendataloader/pdf");
24
+ await convert([pdfPath], {
25
+ outputDir: outDir,
26
+ format: "markdown",
27
+ quiet: true,
28
+ });
29
+ const mdFile = readdirSync(outDir).find((f) => f.endsWith(".md"));
30
+ if (!mdFile)
31
+ return null;
32
+ return readFileSync(path.join(outDir, mdFile), "utf-8");
33
+ }
34
+ catch {
35
+ return null;
36
+ }
37
+ finally {
38
+ rmSync(outDir, { recursive: true, force: true });
39
+ }
40
+ }
41
+ // ─── Internal ────────────────────────────────────────────
42
+ let cachedAvailability;
43
+ async function detectAvailability() {
44
+ const [hasPackage, hasJava] = await Promise.all([checkPackage(), checkJava()]);
45
+ return hasPackage && hasJava;
46
+ }
47
+ async function checkPackage() {
48
+ try {
49
+ await import("@opendataloader/pdf");
50
+ return true;
51
+ }
52
+ catch {
53
+ return false;
54
+ }
55
+ }
56
+ // execFile is safe — arguments are passed as an array, no shell interpolation.
57
+ async function checkJava() {
58
+ return new Promise((resolve) => {
59
+ execFile("java", ["-version"], (error) => {
60
+ resolve(!error);
61
+ });
62
+ });
63
+ }
64
+ /**
65
+ * Detailed availability check for the `dep check` command.
66
+ */
67
+ export async function checkOpendataLoaderStatus() {
68
+ const [packageInstalled, javaResult] = await Promise.all([checkPackage(), getJavaVersion()]);
69
+ return {
70
+ packageInstalled,
71
+ javaAvailable: javaResult !== null,
72
+ javaVersion: javaResult,
73
+ };
74
+ }
75
+ // execFile is safe — arguments are passed as an array, no shell interpolation.
76
+ function getJavaVersion() {
77
+ return new Promise((resolve) => {
78
+ execFile("java", ["-version"], (error, _stdout, stderr) => {
79
+ if (error) {
80
+ resolve(null);
81
+ return;
82
+ }
83
+ // Java prints version to stderr
84
+ const match = /version\s+"([^"]+)"/.exec(stderr);
85
+ resolve(match?.[1] ?? null);
86
+ });
87
+ });
88
+ }
@@ -1,44 +1,43 @@
1
1
  import { readFile } from "node:fs/promises";
2
- import { Document } from "@langchain/core/documents";
3
- import { PDFParse } from "pdf-parse";
2
+ import { extractText, getMeta } from "unpdf";
4
3
  export async function extractPdfContent(pdfPath) {
5
- const data = await readFile(pdfPath);
6
- const parser = new PDFParse({ data });
7
- const result = await parser.getText();
8
- await parser.destroy();
9
- return result.pages.map((page) => new Document({
10
- pageContent: page.text,
4
+ const data = new Uint8Array(await readFile(pdfPath));
5
+ const result = await extractText(data, { mergePages: false });
6
+ return result.text.map((pageText, i) => ({
7
+ pageContent: pageText,
11
8
  metadata: {
12
9
  source: pdfPath,
13
- pdf: { totalPages: result.total },
14
- loc: { pageNumber: page.num },
10
+ pdf: { totalPages: result.totalPages },
11
+ loc: { pageNumber: i + 1 },
15
12
  },
16
13
  }));
17
14
  }
18
15
  export async function extractPdfMetadata(pdfPath) {
19
- const data = await readFile(pdfPath);
20
- const parser = new PDFParse({ data });
21
- const result = await parser.getInfo();
22
- await parser.destroy();
23
- const info = result.info;
24
- const custom = info?.["Custom"];
25
- const title = nonEmptyStringOrNull(info?.["Title"]);
26
- const author = nonEmptyStringOrNull(info?.["Author"]);
27
- const subject = nonEmptyStringOrNull(info?.["Subject"]);
28
- const creator = nonEmptyStringOrNull(info?.["Creator"]);
29
- const creationDate = parsePdfDate(nonEmptyStringOrNull(info?.["CreationDate"]));
30
- const modDate = parsePdfDate(nonEmptyStringOrNull(info?.["ModDate"]));
31
- const rawKeywords = nonEmptyStringOrNull(info?.["Keywords"]);
16
+ const data = new Uint8Array(await readFile(pdfPath));
17
+ const { info } = await getMeta(data);
18
+ const custom = getRecord(info["Custom"]);
19
+ const title = nonEmptyStringOrNull(info["Title"]);
20
+ const author = nonEmptyStringOrNull(info["Author"]);
21
+ const subject = nonEmptyStringOrNull(info["Subject"]);
22
+ const creator = nonEmptyStringOrNull(info["Creator"]);
23
+ const creationDate = parsePdfDate(nonEmptyStringOrNull(info["CreationDate"]));
24
+ const modDate = parsePdfDate(nonEmptyStringOrNull(info["ModDate"]));
25
+ const rawKeywords = nonEmptyStringOrNull(info["Keywords"]);
32
26
  const keywords = rawKeywords
33
27
  ? rawKeywords
34
28
  .split(/[,;]/)
35
29
  .map((k) => k.trim())
36
30
  .filter(Boolean)
37
31
  : [];
38
- // DOI can appear in Custom fields (case-insensitive lookup)
39
32
  const doi = findCustomField(custom, "doi");
40
33
  return { title, author, subject, keywords, doi, creator, creationDate, modDate };
41
34
  }
35
+ function getRecord(value) {
36
+ if (typeof value === "object" && value !== null && !Array.isArray(value)) {
37
+ return value;
38
+ }
39
+ return undefined;
40
+ }
42
41
  function nonEmptyStringOrNull(value) {
43
42
  if (typeof value === "string" && value.trim().length > 0) {
44
43
  return value.trim();
@@ -53,7 +52,6 @@ function parsePdfDate(value) {
53
52
  if (!value)
54
53
  return null;
55
54
  const cleaned = value.replace(/^D:/, "");
56
- // Extract components: YYYY[MM[DD[HH[mm[SS]]]]]
57
55
  const match = /^(\d{4})(\d{2})?(\d{2})?(\d{2})?(\d{2})?(\d{2})?/.exec(cleaned);
58
56
  if (!match)
59
57
  return null;
@@ -66,9 +64,6 @@ function parsePdfDate(value) {
66
64
  const date = new Date(`${year}-${month}-${day}T${hour}:${min}:${sec}Z`);
67
65
  return Number.isNaN(date.getTime()) ? null : date;
68
66
  }
69
- /**
70
- * Case-insensitive lookup in the Custom fields dictionary.
71
- */
72
67
  function findCustomField(custom, key) {
73
68
  if (!custom)
74
69
  return null;
@@ -80,4 +75,3 @@ function findCustomField(custom, key) {
80
75
  }
81
76
  return null;
82
77
  }
83
- //# sourceMappingURL=pdf.js.map
@@ -1,5 +1,4 @@
1
1
  import { readFile, stat } from "node:fs/promises";
2
- import { Document } from "@langchain/core/documents";
3
2
  const MAX_FILE_SIZE = 10 * 1024 * 1024; // 10 MB
4
3
  export async function extractTextContent(filePath) {
5
4
  const fileStats = await stat(filePath);
@@ -8,10 +7,9 @@ export async function extractTextContent(filePath) {
8
7
  }
9
8
  const content = await readFile(filePath, "utf-8");
10
9
  return [
11
- new Document({
10
+ {
12
11
  pageContent: content,
13
12
  metadata: { source: filePath },
14
- }),
13
+ },
15
14
  ];
16
15
  }
17
- //# sourceMappingURL=text.js.map
package/dist/index.js CHANGED
@@ -2,6 +2,7 @@
2
2
  import { createRequire } from "node:module";
3
3
  import { Command } from "commander";
4
4
  import { createConfigCommand } from "./commands/config.js";
5
+ import { createDepCommand } from "./commands/dep.js";
5
6
  import { createKnowledgeBaseCommand } from "./commands/knowledge-base.js";
6
7
  import { createLiteratureCommand } from "./commands/literature.js";
7
8
  import { createUtilCommand } from "./commands/util.js";
@@ -14,8 +15,8 @@ program.hook("preAction", () => {
14
15
  startup();
15
16
  });
16
17
  program.addCommand(createConfigCommand());
18
+ program.addCommand(createDepCommand());
17
19
  program.addCommand(createKnowledgeBaseCommand());
18
20
  program.addCommand(createLiteratureCommand());
19
21
  program.addCommand(createUtilCommand());
20
22
  program.parse();
21
- //# sourceMappingURL=index.js.map
package/dist/lifecycle.js CHANGED
@@ -4,4 +4,3 @@ export function startup() {
4
4
  migratePdfsToFiles(getUserDataDir());
5
5
  migratePdfsToFiles(getProjectDataDir());
6
6
  }
7
- //# sourceMappingURL=lifecycle.js.map