paper-manager 0.8.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/dist/ai/embed.js +0 -1
  2. package/dist/ai/index.js +0 -1
  3. package/dist/ai/provider.js +0 -1
  4. package/dist/commands/config.js +0 -1
  5. package/dist/commands/knowledge-base.js +25 -3
  6. package/dist/commands/literature.js +39 -17
  7. package/dist/commands/util.js +0 -1
  8. package/dist/config/index.js +0 -1
  9. package/dist/config/init.js +0 -1
  10. package/dist/db/index.js +0 -1
  11. package/dist/db/operations/knowledge-bases.js +0 -1
  12. package/dist/db/operations/literatures.js +0 -1
  13. package/dist/db/project/knowledge-bases.js +0 -1
  14. package/dist/db/project/literatures.js +0 -1
  15. package/dist/db/schema.js +0 -1
  16. package/dist/db/test-utils.js +0 -1
  17. package/dist/db/user/knowledge-bases.js +0 -1
  18. package/dist/db/user/literatures.js +0 -1
  19. package/dist/extractor/index.js +0 -1
  20. package/dist/extractor/pdf.js +23 -29
  21. package/dist/extractor/text.js +2 -4
  22. package/dist/index.js +0 -1
  23. package/dist/lifecycle.js +0 -1
  24. package/dist/logger.js +0 -1
  25. package/dist/migrations.js +0 -1
  26. package/dist/text-splitter.js +56 -0
  27. package/dist/types/index.js +0 -1
  28. package/dist/vector-store/embeddings.js +0 -1
  29. package/dist/vector-store/index.js +76 -12
  30. package/package.json +2 -6
  31. package/dist/ai/embed.d.ts +0 -3
  32. package/dist/ai/embed.js.map +0 -1
  33. package/dist/ai/embed.test.d.ts +0 -1
  34. package/dist/ai/embed.test.js +0 -93
  35. package/dist/ai/embed.test.js.map +0 -1
  36. package/dist/ai/index.d.ts +0 -2
  37. package/dist/ai/index.js.map +0 -1
  38. package/dist/ai/provider.d.ts +0 -3
  39. package/dist/ai/provider.js.map +0 -1
  40. package/dist/commands/config.d.ts +0 -2
  41. package/dist/commands/config.js.map +0 -1
  42. package/dist/commands/knowledge-base.d.ts +0 -2
  43. package/dist/commands/knowledge-base.js.map +0 -1
  44. package/dist/commands/literature.d.ts +0 -2
  45. package/dist/commands/literature.js.map +0 -1
  46. package/dist/commands/util.d.ts +0 -2
  47. package/dist/commands/util.js.map +0 -1
  48. package/dist/config/index.d.ts +0 -39
  49. package/dist/config/index.js.map +0 -1
  50. package/dist/config/index.test.d.ts +0 -1
  51. package/dist/config/index.test.js +0 -143
  52. package/dist/config/index.test.js.map +0 -1
  53. package/dist/config/init.d.ts +0 -10
  54. package/dist/config/init.js.map +0 -1
  55. package/dist/config/init.test.d.ts +0 -1
  56. package/dist/config/init.test.js +0 -61
  57. package/dist/config/init.test.js.map +0 -1
  58. package/dist/db/index.d.ts +0 -7
  59. package/dist/db/index.js.map +0 -1
  60. package/dist/db/index.test.d.ts +0 -1
  61. package/dist/db/index.test.js +0 -32
  62. package/dist/db/index.test.js.map +0 -1
  63. package/dist/db/operations/knowledge-bases.d.ts +0 -11
  64. package/dist/db/operations/knowledge-bases.js.map +0 -1
  65. package/dist/db/operations/knowledge-bases.test.d.ts +0 -1
  66. package/dist/db/operations/knowledge-bases.test.js +0 -71
  67. package/dist/db/operations/knowledge-bases.test.js.map +0 -1
  68. package/dist/db/operations/literatures.d.ts +0 -16
  69. package/dist/db/operations/literatures.js.map +0 -1
  70. package/dist/db/operations/literatures.test.d.ts +0 -1
  71. package/dist/db/operations/literatures.test.js +0 -156
  72. package/dist/db/operations/literatures.test.js.map +0 -1
  73. package/dist/db/project/knowledge-bases.d.ts +0 -10
  74. package/dist/db/project/knowledge-bases.js.map +0 -1
  75. package/dist/db/project/literatures.d.ts +0 -10
  76. package/dist/db/project/literatures.js.map +0 -1
  77. package/dist/db/schema.d.ts +0 -372
  78. package/dist/db/schema.js.map +0 -1
  79. package/dist/db/test-utils.d.ts +0 -6
  80. package/dist/db/test-utils.js.map +0 -1
  81. package/dist/db/user/knowledge-bases.d.ts +0 -10
  82. package/dist/db/user/knowledge-bases.js.map +0 -1
  83. package/dist/db/user/literatures.d.ts +0 -10
  84. package/dist/db/user/literatures.js.map +0 -1
  85. package/dist/extractor/index.d.ts +0 -6
  86. package/dist/extractor/index.js.map +0 -1
  87. package/dist/extractor/pdf.d.ts +0 -13
  88. package/dist/extractor/pdf.js.map +0 -1
  89. package/dist/extractor/pdf.test.d.ts +0 -1
  90. package/dist/extractor/pdf.test.js +0 -106
  91. package/dist/extractor/pdf.test.js.map +0 -1
  92. package/dist/extractor/text.d.ts +0 -2
  93. package/dist/extractor/text.js.map +0 -1
  94. package/dist/index.d.ts +0 -2
  95. package/dist/index.js.map +0 -1
  96. package/dist/lifecycle.d.ts +0 -1
  97. package/dist/lifecycle.js.map +0 -1
  98. package/dist/logger.d.ts +0 -24
  99. package/dist/logger.js.map +0 -1
  100. package/dist/migrations.d.ts +0 -5
  101. package/dist/migrations.js.map +0 -1
  102. package/dist/pdf/extractor.d.ts +0 -2
  103. package/dist/pdf/extractor.js +0 -18
  104. package/dist/pdf/extractor.js.map +0 -1
  105. package/dist/types/index.d.ts +0 -61
  106. package/dist/types/index.js.map +0 -1
  107. package/dist/types/index.test.d.ts +0 -1
  108. package/dist/types/index.test.js +0 -100
  109. package/dist/types/index.test.js.map +0 -1
  110. package/dist/vector-store/embeddings.d.ts +0 -8
  111. package/dist/vector-store/embeddings.js.map +0 -1
  112. package/dist/vector-store/index.d.ts +0 -6
  113. package/dist/vector-store/index.js.map +0 -1
package/dist/ai/embed.js CHANGED
@@ -29,4 +29,3 @@ export async function embedMany(config, texts) {
29
29
  }
30
30
  return embeddings;
31
31
  }
32
- //# sourceMappingURL=embed.js.map
package/dist/ai/index.js CHANGED
@@ -1,3 +1,2 @@
1
1
  export { embed, embedMany } from "./embed.js";
2
2
  export { createEmbeddingModel } from "./provider.js";
3
- //# sourceMappingURL=index.js.map
@@ -6,4 +6,3 @@ export function createEmbeddingModel(config) {
6
6
  });
7
7
  return openai.embedding(config.model);
8
8
  }
9
- //# sourceMappingURL=provider.js.map
@@ -80,4 +80,3 @@ export function createConfigCommand() {
80
80
  });
81
81
  return config;
82
82
  }
83
- //# sourceMappingURL=config.js.map
@@ -57,6 +57,7 @@ export function createKnowledgeBaseCommand() {
57
57
  .description("List knowledge bases")
58
58
  .option("--user", "List user knowledge bases only")
59
59
  .option("--all", "List all knowledge bases (default)")
60
+ .option("--json", "Output as JSON")
60
61
  .action((options) => {
61
62
  let results = [];
62
63
  if (options.user) {
@@ -68,7 +69,16 @@ export function createKnowledgeBaseCommand() {
68
69
  results = [...projectKbs, ...userKbs];
69
70
  }
70
71
  if (results.length === 0) {
71
- log.info("No knowledge bases found.");
72
+ if (options.json) {
73
+ log.plain("[]");
74
+ }
75
+ else {
76
+ log.info("No knowledge bases found.");
77
+ }
78
+ return;
79
+ }
80
+ if (options.json) {
81
+ log.plain(JSON.stringify(results, null, 2));
72
82
  return;
73
83
  }
74
84
  for (const kb of results) {
@@ -151,6 +161,7 @@ export function createKnowledgeBaseCommand() {
151
161
  kb.command("query <id> <query-text>")
152
162
  .description("Query a knowledge base")
153
163
  .option("-k, --top-k <number>", "Number of results", "5")
164
+ .option("--json", "Output as JSON")
154
165
  .action(async (id, queryText, options) => {
155
166
  const resolved = resolveKnowledgeBase(id);
156
167
  if (!resolved) {
@@ -168,7 +179,19 @@ export function createKnowledgeBaseCommand() {
168
179
  const k = parseInt(options.topK, 10);
169
180
  const results = await queryVectorStore(modelConfig, vectorDir, queryText, k);
170
181
  if (results.length === 0) {
171
- log.info("No results found.");
182
+ if (options.json) {
183
+ log.plain("[]");
184
+ }
185
+ else {
186
+ log.info("No results found.");
187
+ }
188
+ return;
189
+ }
190
+ if (options.json) {
191
+ const output = results
192
+ .filter((doc) => doc != null)
193
+ .map((doc) => ({ pageContent: doc.pageContent, metadata: doc.metadata }));
194
+ log.plain(JSON.stringify(output, null, 2));
172
195
  return;
173
196
  }
174
197
  for (let i = 0; i < results.length; i++) {
@@ -185,4 +208,3 @@ export function createKnowledgeBaseCommand() {
185
208
  });
186
209
  return kb;
187
210
  }
188
- //# sourceMappingURL=knowledge-base.js.map
@@ -1,6 +1,5 @@
1
1
  import * as fs from "node:fs";
2
2
  import * as path from "node:path";
3
- import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters";
4
3
  import chalk from "chalk";
5
4
  import cliProgress from "cli-progress";
6
5
  import { Command } from "commander";
@@ -11,7 +10,8 @@ import * as userKb from "../db/user/knowledge-bases.js";
11
10
  import * as userLit from "../db/user/literatures.js";
12
11
  import { extractContent, extractPdfMetadata } from "../extractor/index.js";
13
12
  import { log } from "../logger.js";
14
- import { createVectorStore, loadVectorStore } from "../vector-store/index.js";
13
+ import { splitDocuments } from "../text-splitter.js";
14
+ import { addDocuments, createVectorStore } from "../vector-store/index.js";
15
15
  function resolveKnowledgeBase(id) {
16
16
  const pkb = projectKb.getKnowledgeBase(id);
17
17
  if (pkb)
@@ -96,11 +96,7 @@ export function createLiteratureCommand() {
96
96
  fs.copyFileSync(absolutePath, path.join(filesDir, `${literature.id}${ext}`));
97
97
  // Split text and add to vector store
98
98
  log.info("Splitting text...");
99
- const splitter = new RecursiveCharacterTextSplitter({
100
- chunkSize: 1000,
101
- chunkOverlap: 200,
102
- });
103
- const splitDocs = await splitter.splitDocuments(docs);
99
+ const splitDocs = splitDocuments(docs, { chunkSize: 1000, chunkOverlap: 200 });
104
100
  log.step(`Created ${String(splitDocs.length)} chunks.`);
105
101
  // Add literature ID metadata to each chunk
106
102
  for (const doc of splitDocs) {
@@ -115,12 +111,9 @@ export function createLiteratureCommand() {
115
111
  const hasIndex = fs.existsSync(path.join(vectorDir, "faiss.index")) &&
116
112
  fs.existsSync(path.join(vectorDir, "docstore.json"));
117
113
  if (hasIndex) {
118
- const store = await loadVectorStore(modelConfig, vectorDir);
119
- await store.addDocuments(splitDocs);
120
- await store.save(vectorDir);
114
+ await addDocuments(splitDocs, modelConfig, vectorDir);
121
115
  }
122
116
  else {
123
- fs.mkdirSync(vectorDir, { recursive: true });
124
117
  await createVectorStore(splitDocs, modelConfig, vectorDir);
125
118
  }
126
119
  bar.update(splitDocs.length);
@@ -211,7 +204,8 @@ export function createLiteratureCommand() {
211
204
  lit
212
205
  .command("list <knowledge-base-id>")
213
206
  .description("List literatures in a knowledge base")
214
- .action((kbId) => {
207
+ .option("--json", "Output as JSON")
208
+ .action((kbId, options) => {
215
209
  const resolved = resolveKnowledgeBase(kbId);
216
210
  if (!resolved) {
217
211
  log.error(`Knowledge base not found: ${kbId}`);
@@ -220,7 +214,16 @@ export function createLiteratureCommand() {
220
214
  const litOps = getLitOps(resolved.scope);
221
215
  const literatures = litOps.listLiteratures(kbId);
222
216
  if (literatures.length === 0) {
223
- log.info("No literatures found.");
217
+ if (options.json) {
218
+ log.plain("[]");
219
+ }
220
+ else {
221
+ log.info("No literatures found.");
222
+ }
223
+ return;
224
+ }
225
+ if (options.json) {
226
+ log.plain(JSON.stringify(literatures, null, 2));
224
227
  return;
225
228
  }
226
229
  for (const l of literatures) {
@@ -241,6 +244,7 @@ export function createLiteratureCommand() {
241
244
  .option("-a, --author <author>", "Author substring")
242
245
  .option("-k, --keyword <keyword>", "Keyword substring")
243
246
  .option("--doi <doi>", "DOI substring")
247
+ .option("--json", "Output as JSON")
244
248
  .action((kbId, options) => {
245
249
  const resolved = resolveKnowledgeBase(kbId);
246
250
  if (!resolved) {
@@ -262,7 +266,16 @@ export function createLiteratureCommand() {
262
266
  doi: options.doi,
263
267
  });
264
268
  if (results.length === 0) {
265
- log.info("No literatures found.");
269
+ if (options.json) {
270
+ log.plain("[]");
271
+ }
272
+ else {
273
+ log.info("No literatures found.");
274
+ }
275
+ return;
276
+ }
277
+ if (options.json) {
278
+ log.plain(JSON.stringify(results, null, 2));
266
279
  return;
267
280
  }
268
281
  for (const l of results) {
@@ -283,7 +296,8 @@ export function createLiteratureCommand() {
283
296
  lit
284
297
  .command("show <knowledge-base-id> <id>")
285
298
  .description("Show literature details")
286
- .action((kbId, id) => {
299
+ .option("--json", "Output as JSON")
300
+ .action((kbId, id, options) => {
287
301
  const resolved = resolveKnowledgeBase(kbId);
288
302
  if (!resolved) {
289
303
  log.error(`Knowledge base not found: ${kbId}`);
@@ -295,6 +309,10 @@ export function createLiteratureCommand() {
295
309
  log.error(`Literature not found: ${id}`);
296
310
  process.exit(1);
297
311
  }
312
+ if (options.json) {
313
+ log.plain(JSON.stringify(literature, null, 2));
314
+ return;
315
+ }
298
316
  printLiterature(literature);
299
317
  });
300
318
  // ─── lit note ──────────────────────────────────────────────
@@ -302,12 +320,17 @@ export function createLiteratureCommand() {
302
320
  note
303
321
  .command("list <literature-id>")
304
322
  .description("List all notes for a literature")
305
- .action((litId) => {
323
+ .option("--json", "Output as JSON")
324
+ .action((litId, options) => {
306
325
  const literature = findLiterature(litId);
307
326
  if (!literature) {
308
327
  log.error(`Literature not found: ${litId}`);
309
328
  process.exit(1);
310
329
  }
330
+ if (options.json) {
331
+ log.plain(JSON.stringify(literature.notes, null, 2));
332
+ return;
333
+ }
311
334
  const entries = Object.entries(literature.notes);
312
335
  if (entries.length === 0) {
313
336
  log.info("No notes found.");
@@ -392,4 +415,3 @@ function printLiterature(lit) {
392
415
  }
393
416
  }
394
417
  }
395
- //# sourceMappingURL=literature.js.map
@@ -60,4 +60,3 @@ export function createUtilCommand() {
60
60
  });
61
61
  return util;
62
62
  }
63
- //# sourceMappingURL=util.js.map
@@ -104,4 +104,3 @@ export function getDefaultModelConfig() {
104
104
  }
105
105
  return getModelConfig(defaultId);
106
106
  }
107
- //# sourceMappingURL=index.js.map
@@ -52,4 +52,3 @@ export function initScope(options) {
52
52
  }
53
53
  return { baseDir, items };
54
54
  }
55
- //# sourceMappingURL=init.js.map
package/dist/db/index.js CHANGED
@@ -65,4 +65,3 @@ export function getProjectDb() {
65
65
  }
66
66
  return projectDb;
67
67
  }
68
- //# sourceMappingURL=index.js.map
@@ -48,4 +48,3 @@ export function deleteKnowledgeBase(db, id) {
48
48
  const result = db.delete(knowledgeBases).where(eq(knowledgeBases.id, id)).run();
49
49
  return result.changes > 0;
50
50
  }
51
- //# sourceMappingURL=knowledge-bases.js.map
@@ -99,4 +99,3 @@ export function deleteLiteraturesByKnowledgeBaseId(db, knowledgeBaseId) {
99
99
  export function getLiteraturesByKnowledgeBaseId(db, knowledgeBaseId) {
100
100
  return listLiteratures(db, knowledgeBaseId);
101
101
  }
102
- //# sourceMappingURL=literatures.js.map
@@ -15,4 +15,3 @@ export function updateKnowledgeBase(id, input) {
15
15
  export function deleteKnowledgeBase(id) {
16
16
  return ops.deleteKnowledgeBase(getProjectDb(), id);
17
17
  }
18
- //# sourceMappingURL=knowledge-bases.js.map
@@ -24,4 +24,3 @@ export function searchLiteratures(knowledgeBaseId, filters) {
24
24
  export function getLiteraturesByKnowledgeBaseId(knowledgeBaseId) {
25
25
  return ops.getLiteraturesByKnowledgeBaseId(getProjectDb(), knowledgeBaseId);
26
26
  }
27
- //# sourceMappingURL=literatures.js.map
package/dist/db/schema.js CHANGED
@@ -55,4 +55,3 @@ CREATE TABLE IF NOT EXISTS literatures (
55
55
  export const CREATE_LITERATURES_KB_INDEX = `
56
56
  CREATE INDEX IF NOT EXISTS idx_literatures_knowledge_base_id
57
57
  ON literatures(knowledge_base_id)`;
58
- //# sourceMappingURL=schema.js.map
@@ -9,4 +9,3 @@ export function createTestDb() {
9
9
  initializeDatabase(client);
10
10
  return drizzle(client);
11
11
  }
12
- //# sourceMappingURL=test-utils.js.map
@@ -15,4 +15,3 @@ export function updateKnowledgeBase(id, input) {
15
15
  export function deleteKnowledgeBase(id) {
16
16
  return ops.deleteKnowledgeBase(getUserDb(), id);
17
17
  }
18
- //# sourceMappingURL=knowledge-bases.js.map
@@ -24,4 +24,3 @@ export function searchLiteratures(knowledgeBaseId, filters) {
24
24
  export function getLiteraturesByKnowledgeBaseId(knowledgeBaseId) {
25
25
  return ops.getLiteraturesByKnowledgeBaseId(getUserDb(), knowledgeBaseId);
26
26
  }
27
- //# sourceMappingURL=literatures.js.map
@@ -20,4 +20,3 @@ export async function extractContent(filePath) {
20
20
  const ext = filePath.split(".").pop() ?? "unknown";
21
21
  throw new Error(`Unsupported file type: .${ext} (${String(mimeType)})`);
22
22
  }
23
- //# sourceMappingURL=index.js.map
@@ -1,44 +1,43 @@
1
1
  import { readFile } from "node:fs/promises";
2
- import { Document } from "@langchain/core/documents";
3
- import { PDFParse } from "pdf-parse";
2
+ import { extractText, getMeta } from "unpdf";
4
3
  export async function extractPdfContent(pdfPath) {
5
- const data = await readFile(pdfPath);
6
- const parser = new PDFParse({ data });
7
- const result = await parser.getText();
8
- await parser.destroy();
9
- return result.pages.map((page) => new Document({
10
- pageContent: page.text,
4
+ const data = new Uint8Array(await readFile(pdfPath));
5
+ const result = await extractText(data, { mergePages: false });
6
+ return result.text.map((pageText, i) => ({
7
+ pageContent: pageText,
11
8
  metadata: {
12
9
  source: pdfPath,
13
- pdf: { totalPages: result.total },
14
- loc: { pageNumber: page.num },
10
+ pdf: { totalPages: result.totalPages },
11
+ loc: { pageNumber: i + 1 },
15
12
  },
16
13
  }));
17
14
  }
18
15
  export async function extractPdfMetadata(pdfPath) {
19
- const data = await readFile(pdfPath);
20
- const parser = new PDFParse({ data });
21
- const result = await parser.getInfo();
22
- await parser.destroy();
23
- const info = result.info;
24
- const custom = info?.["Custom"];
25
- const title = nonEmptyStringOrNull(info?.["Title"]);
26
- const author = nonEmptyStringOrNull(info?.["Author"]);
27
- const subject = nonEmptyStringOrNull(info?.["Subject"]);
28
- const creator = nonEmptyStringOrNull(info?.["Creator"]);
29
- const creationDate = parsePdfDate(nonEmptyStringOrNull(info?.["CreationDate"]));
30
- const modDate = parsePdfDate(nonEmptyStringOrNull(info?.["ModDate"]));
31
- const rawKeywords = nonEmptyStringOrNull(info?.["Keywords"]);
16
+ const data = new Uint8Array(await readFile(pdfPath));
17
+ const { info } = await getMeta(data);
18
+ const custom = getRecord(info["Custom"]);
19
+ const title = nonEmptyStringOrNull(info["Title"]);
20
+ const author = nonEmptyStringOrNull(info["Author"]);
21
+ const subject = nonEmptyStringOrNull(info["Subject"]);
22
+ const creator = nonEmptyStringOrNull(info["Creator"]);
23
+ const creationDate = parsePdfDate(nonEmptyStringOrNull(info["CreationDate"]));
24
+ const modDate = parsePdfDate(nonEmptyStringOrNull(info["ModDate"]));
25
+ const rawKeywords = nonEmptyStringOrNull(info["Keywords"]);
32
26
  const keywords = rawKeywords
33
27
  ? rawKeywords
34
28
  .split(/[,;]/)
35
29
  .map((k) => k.trim())
36
30
  .filter(Boolean)
37
31
  : [];
38
- // DOI can appear in Custom fields (case-insensitive lookup)
39
32
  const doi = findCustomField(custom, "doi");
40
33
  return { title, author, subject, keywords, doi, creator, creationDate, modDate };
41
34
  }
35
+ function getRecord(value) {
36
+ if (typeof value === "object" && value !== null && !Array.isArray(value)) {
37
+ return value;
38
+ }
39
+ return undefined;
40
+ }
42
41
  function nonEmptyStringOrNull(value) {
43
42
  if (typeof value === "string" && value.trim().length > 0) {
44
43
  return value.trim();
@@ -53,7 +52,6 @@ function parsePdfDate(value) {
53
52
  if (!value)
54
53
  return null;
55
54
  const cleaned = value.replace(/^D:/, "");
56
- // Extract components: YYYY[MM[DD[HH[mm[SS]]]]]
57
55
  const match = /^(\d{4})(\d{2})?(\d{2})?(\d{2})?(\d{2})?(\d{2})?/.exec(cleaned);
58
56
  if (!match)
59
57
  return null;
@@ -66,9 +64,6 @@ function parsePdfDate(value) {
66
64
  const date = new Date(`${year}-${month}-${day}T${hour}:${min}:${sec}Z`);
67
65
  return Number.isNaN(date.getTime()) ? null : date;
68
66
  }
69
- /**
70
- * Case-insensitive lookup in the Custom fields dictionary.
71
- */
72
67
  function findCustomField(custom, key) {
73
68
  if (!custom)
74
69
  return null;
@@ -80,4 +75,3 @@ function findCustomField(custom, key) {
80
75
  }
81
76
  return null;
82
77
  }
83
- //# sourceMappingURL=pdf.js.map
@@ -1,5 +1,4 @@
1
1
  import { readFile, stat } from "node:fs/promises";
2
- import { Document } from "@langchain/core/documents";
3
2
  const MAX_FILE_SIZE = 10 * 1024 * 1024; // 10 MB
4
3
  export async function extractTextContent(filePath) {
5
4
  const fileStats = await stat(filePath);
@@ -8,10 +7,9 @@ export async function extractTextContent(filePath) {
8
7
  }
9
8
  const content = await readFile(filePath, "utf-8");
10
9
  return [
11
- new Document({
10
+ {
12
11
  pageContent: content,
13
12
  metadata: { source: filePath },
14
- }),
13
+ },
15
14
  ];
16
15
  }
17
- //# sourceMappingURL=text.js.map
package/dist/index.js CHANGED
@@ -18,4 +18,3 @@ program.addCommand(createKnowledgeBaseCommand());
18
18
  program.addCommand(createLiteratureCommand());
19
19
  program.addCommand(createUtilCommand());
20
20
  program.parse();
21
- //# sourceMappingURL=index.js.map
package/dist/lifecycle.js CHANGED
@@ -4,4 +4,3 @@ export function startup() {
4
4
  migratePdfsToFiles(getUserDataDir());
5
5
  migratePdfsToFiles(getProjectDataDir());
6
6
  }
7
- //# sourceMappingURL=lifecycle.js.map
package/dist/logger.js CHANGED
@@ -41,4 +41,3 @@ export const log = {
41
41
  console.log(chalk.dim(`Total: ${n} ${label}`));
42
42
  },
43
43
  };
44
- //# sourceMappingURL=logger.js.map
@@ -25,4 +25,3 @@ export function migratePdfsToFiles(baseDir) {
25
25
  fs.rmSync(legacyPdfsDir, { recursive: true, force: true });
26
26
  }
27
27
  }
28
- //# sourceMappingURL=migrations.js.map
@@ -0,0 +1,56 @@
1
+ const DEFAULT_SEPARATORS = ["\n\n", "\n", " ", ""];
2
+ export function splitDocuments(docs, options) {
3
+ if (options.chunkOverlap >= options.chunkSize) {
4
+ throw new Error("chunkOverlap must be less than chunkSize");
5
+ }
6
+ const result = [];
7
+ for (const doc of docs) {
8
+ const chunks = splitText(doc.pageContent, options.chunkSize, options.chunkOverlap);
9
+ for (const chunk of chunks) {
10
+ result.push({ pageContent: chunk, metadata: { ...doc.metadata } });
11
+ }
12
+ }
13
+ return result;
14
+ }
15
+ function splitText(text, chunkSize, chunkOverlap) {
16
+ return recursiveSplit(text, DEFAULT_SEPARATORS, chunkSize, chunkOverlap);
17
+ }
18
+ function recursiveSplit(text, separators, chunkSize, chunkOverlap) {
19
+ if (text.length <= chunkSize)
20
+ return [text];
21
+ const separator = separators[0] ?? "";
22
+ const remaining = separators.slice(1);
23
+ const parts = separator === "" ? [...text] : text.split(separator);
24
+ const chunks = [];
25
+ let current = "";
26
+ for (const part of parts) {
27
+ const piece = current.length === 0 ? part : current + separator + part;
28
+ if (piece.length > chunkSize && current.length > 0) {
29
+ chunks.push(current);
30
+ // Overlap: keep the tail of the current chunk
31
+ if (chunkOverlap > 0 && current.length > chunkOverlap) {
32
+ current = current.slice(-chunkOverlap) + separator + part;
33
+ }
34
+ else {
35
+ current = part;
36
+ }
37
+ }
38
+ else {
39
+ current = piece;
40
+ }
41
+ }
42
+ if (current.length > 0) {
43
+ chunks.push(current);
44
+ }
45
+ // Recursively split any chunks that are still too large
46
+ const finalChunks = [];
47
+ for (const chunk of chunks) {
48
+ if (chunk.length > chunkSize && remaining.length > 0) {
49
+ finalChunks.push(...recursiveSplit(chunk, remaining, chunkSize, chunkOverlap));
50
+ }
51
+ else {
52
+ finalChunks.push(chunk);
53
+ }
54
+ }
55
+ return finalChunks;
56
+ }
@@ -33,4 +33,3 @@ export const ConfigSchema = z.object({
33
33
  embeddingModels: z.record(z.string().min(1), EmbeddingModelConfigSchema).default({}),
34
34
  defaultEmbeddingModelId: z.string().min(1).optional(),
35
35
  });
36
- //# sourceMappingURL=index.js.map
@@ -13,4 +13,3 @@ export class AiSdkEmbeddings extends Embeddings {
13
13
  return embedMany(this.config, documents);
14
14
  }
15
15
  }
16
- //# sourceMappingURL=embeddings.js.map
@@ -1,17 +1,81 @@
1
- import { FaissStore } from "@langchain/community/vectorstores/faiss";
2
- import { AiSdkEmbeddings } from "./embeddings.js";
1
+ import { randomUUID } from "node:crypto";
2
+ import * as fs from "node:fs/promises";
3
+ import * as path from "node:path";
4
+ import { embed, embedMany } from "../ai/embed.js";
5
+ async function importFaiss() {
6
+ const { IndexFlatL2 } = (await import("faiss-node")).default;
7
+ return { IndexFlatL2 };
8
+ }
9
+ function parseDocstore(raw) {
10
+ const parsed = JSON.parse(raw);
11
+ if (!Array.isArray(parsed) ||
12
+ parsed.length !== 2 ||
13
+ !Array.isArray(parsed[0]) ||
14
+ typeof parsed[1] !== "object" ||
15
+ parsed[1] === null) {
16
+ throw new Error("Corrupt docstore.json: expected [entries, mapping] tuple");
17
+ }
18
+ // Safe after validation: parsed[0] is Array, parsed[1] is non-null object
19
+ const entries = parsed[0];
20
+ const mapping = parsed[1];
21
+ return [entries, mapping];
22
+ }
3
23
  export async function createVectorStore(docs, config, directory) {
4
- const embeddings = new AiSdkEmbeddings(config);
5
- const vectorStore = await FaissStore.fromDocuments(docs, embeddings);
6
- await vectorStore.save(directory);
7
- return vectorStore;
24
+ if (docs.length === 0)
25
+ return;
26
+ const texts = docs.map((d) => d.pageContent);
27
+ const vectors = await embedMany(config, texts);
28
+ const dimension = vectors[0].length;
29
+ const { IndexFlatL2 } = await importFaiss();
30
+ const index = new IndexFlatL2(dimension);
31
+ const mapping = {};
32
+ const docEntries = [];
33
+ for (let i = 0; i < vectors.length; i++) {
34
+ const id = randomUUID();
35
+ index.add(vectors[i]);
36
+ mapping[i] = id;
37
+ docEntries.push([id, docs[i]]);
38
+ }
39
+ await fs.mkdir(directory, { recursive: true });
40
+ index.write(path.join(directory, "faiss.index"));
41
+ await fs.writeFile(path.join(directory, "docstore.json"), JSON.stringify([docEntries, mapping]));
8
42
  }
9
- export async function loadVectorStore(config, directory) {
10
- const embeddings = new AiSdkEmbeddings(config);
11
- return FaissStore.load(directory, embeddings);
43
+ export async function addDocuments(docs, config, directory) {
44
+ if (docs.length === 0)
45
+ return;
46
+ const texts = docs.map((d) => d.pageContent);
47
+ const vectors = await embedMany(config, texts);
48
+ const { IndexFlatL2 } = await importFaiss();
49
+ const index = IndexFlatL2.read(path.join(directory, "faiss.index"));
50
+ const raw = await fs.readFile(path.join(directory, "docstore.json"), "utf-8");
51
+ const [existingDocs, mapping] = parseDocstore(raw);
52
+ const baseId = index.ntotal();
53
+ for (let i = 0; i < vectors.length; i++) {
54
+ const id = randomUUID();
55
+ index.add(vectors[i]);
56
+ mapping[baseId + i] = id;
57
+ existingDocs.push([id, docs[i]]);
58
+ }
59
+ index.write(path.join(directory, "faiss.index"));
60
+ await fs.writeFile(path.join(directory, "docstore.json"), JSON.stringify([existingDocs, mapping]));
12
61
  }
13
62
  export async function queryVectorStore(config, directory, query, k = 5) {
14
- const store = await loadVectorStore(config, directory);
15
- return store.similaritySearch(query, k);
63
+ const { IndexFlatL2 } = await importFaiss();
64
+ const index = IndexFlatL2.read(path.join(directory, "faiss.index"));
65
+ const raw = await fs.readFile(path.join(directory, "docstore.json"), "utf-8");
66
+ const [docEntries, mapping] = parseDocstore(raw);
67
+ const docMap = new Map(docEntries);
68
+ const queryVector = await embed(config, query);
69
+ const total = index.ntotal();
70
+ if (total === 0)
71
+ return [];
72
+ const effectiveK = Math.min(k, total);
73
+ const result = index.search(queryVector, effectiveK);
74
+ return result.labels
75
+ .filter((label) => label >= 0)
76
+ .map((label) => {
77
+ const docId = mapping[label];
78
+ return docId ? docMap.get(docId) : undefined;
79
+ })
80
+ .filter((doc) => doc != null);
16
81
  }
17
- //# sourceMappingURL=index.js.map
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "paper-manager",
3
- "version": "0.8.1",
3
+ "version": "0.9.0",
4
4
  "description": "A paper management system.",
5
5
  "keywords": [],
6
6
  "homepage": "https://github.com/EurFelux/paper-manager",
@@ -21,9 +21,6 @@
21
21
  "dependencies": {
22
22
  "@ai-sdk/openai": "^3.0.37",
23
23
  "@ai-sdk/provider": "^3.0.8",
24
- "@langchain/community": "^1.1.20",
25
- "@langchain/core": "^1.1.29",
26
- "@langchain/textsplitters": "^1.0.1",
27
24
  "ai": "^6.0.105",
28
25
  "better-sqlite3": "^12.6.2",
29
26
  "chalk": "^5.6.2",
@@ -31,9 +28,8 @@
31
28
  "commander": "^14.0.3",
32
29
  "drizzle-orm": "^0.45.1",
33
30
  "faiss-node": "^0.5.1",
34
- "langchain": "^1.2.28",
35
31
  "mime-types": "^3.0.2",
36
- "pdf-parse": "^2.4.5",
32
+ "unpdf": "^1.4.0",
37
33
  "zod": "^4.3.6"
38
34
  },
39
35
  "devDependencies": {
@@ -1,3 +0,0 @@
1
- import type { EmbeddingModelConfig } from "../types/index.js";
2
- export declare function embed(config: EmbeddingModelConfig, text: string): Promise<number[]>;
3
- export declare function embedMany(config: EmbeddingModelConfig, texts: string[]): Promise<number[][]>;
@@ -1 +0,0 @@
1
- {"version":3,"file":"embed.js","sourceRoot":"","sources":["../../src/ai/embed.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,IAAI,OAAO,EAAE,SAAS,IAAI,WAAW,EAAE,MAAM,IAAI,CAAC;AAGhE,OAAO,EAAE,oBAAoB,EAAE,MAAM,eAAe,CAAC;AAErD,SAAS,oBAAoB,CAC3B,MAA4B;IAE5B,IAAI,MAAM,CAAC,UAAU,IAAI,IAAI;QAAE,OAAO,SAAS,CAAC;IAChD,OAAO,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,EAAE,UAAU,EAAE,MAAM,CAAC,UAAU,EAAE,EAAE,CAAC;AAClE,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,KAAK,CAAC,MAA4B,EAAE,IAAY;IACpE,MAAM,KAAK,GAAG,oBAAoB,CAAC,MAAM,CAAC,CAAC;IAC3C,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC;QAC3B,KAAK;QACL,KAAK,EAAE,IAAI;QACX,eAAe,EAAE,oBAAoB,CAAC,MAAM,CAAC;KAC9C,CAAC,CAAC;IACH,OAAO,MAAM,CAAC,SAAS,CAAC;AAC1B,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,SAAS,CAC7B,MAA4B,EAC5B,KAAe;IAEf,MAAM,KAAK,GAAG,oBAAoB,CAAC,MAAM,CAAC,CAAC;IAC3C,MAAM,eAAe,GAAG,oBAAoB,CAAC,MAAM,CAAC,CAAC;IAErD,IAAI,MAAM,CAAC,SAAS,IAAI,IAAI,IAAI,KAAK,CAAC,MAAM,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;QACjE,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,eAAe,EAAE,CAAC,CAAC;QAC5E,OAAO,MAAM,CAAC,UAAU,CAAC;IAC3B,CAAC;IAED,MAAM,UAAU,GAAe,EAAE,CAAC;IAClC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;QACxD,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,SAAS,CAAC,CAAC;QACnD,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,eAAe,EAAE,CAAC,CAAC;QAC5E,UAAU,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,UAAU,CAAC,CAAC;IACxC,CAAC;IACD,OAAO,UAAU,CAAC;AACpB,CAAC"}
@@ -1 +0,0 @@
1
- export {};