paper-manager 0.8.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/dist/ai/embed.js +0 -1
  2. package/dist/ai/index.js +0 -1
  3. package/dist/ai/provider.js +0 -1
  4. package/dist/commands/config.js +0 -1
  5. package/dist/commands/knowledge-base.js +25 -3
  6. package/dist/commands/literature.js +84 -16
  7. package/dist/commands/util.js +0 -1
  8. package/dist/config/index.js +0 -1
  9. package/dist/config/init.js +0 -1
  10. package/dist/db/index.js +6 -2
  11. package/dist/db/operations/knowledge-bases.js +0 -1
  12. package/dist/db/operations/literatures.js +19 -2
  13. package/dist/db/project/knowledge-bases.js +0 -1
  14. package/dist/db/project/literatures.js +3 -1
  15. package/dist/db/schema.js +5 -3
  16. package/dist/db/test-utils.js +0 -1
  17. package/dist/db/user/knowledge-bases.js +0 -1
  18. package/dist/db/user/literatures.js +3 -1
  19. package/dist/extractor/index.js +0 -1
  20. package/dist/extractor/pdf.js +23 -29
  21. package/dist/extractor/text.js +2 -4
  22. package/dist/index.js +0 -1
  23. package/dist/lifecycle.js +0 -1
  24. package/dist/logger.js +0 -1
  25. package/dist/migrations.js +0 -1
  26. package/dist/text-splitter.js +56 -0
  27. package/dist/types/index.js +0 -1
  28. package/dist/vector-store/embeddings.js +0 -1
  29. package/dist/vector-store/index.js +76 -12
  30. package/package.json +2 -6
  31. package/dist/ai/embed.d.ts +0 -3
  32. package/dist/ai/embed.js.map +0 -1
  33. package/dist/ai/embed.test.d.ts +0 -1
  34. package/dist/ai/embed.test.js +0 -93
  35. package/dist/ai/embed.test.js.map +0 -1
  36. package/dist/ai/index.d.ts +0 -2
  37. package/dist/ai/index.js.map +0 -1
  38. package/dist/ai/provider.d.ts +0 -3
  39. package/dist/ai/provider.js.map +0 -1
  40. package/dist/commands/config.d.ts +0 -2
  41. package/dist/commands/config.js.map +0 -1
  42. package/dist/commands/knowledge-base.d.ts +0 -2
  43. package/dist/commands/knowledge-base.js.map +0 -1
  44. package/dist/commands/literature.d.ts +0 -2
  45. package/dist/commands/literature.js.map +0 -1
  46. package/dist/commands/util.d.ts +0 -2
  47. package/dist/commands/util.js.map +0 -1
  48. package/dist/config/index.d.ts +0 -39
  49. package/dist/config/index.js.map +0 -1
  50. package/dist/config/index.test.d.ts +0 -1
  51. package/dist/config/index.test.js +0 -143
  52. package/dist/config/index.test.js.map +0 -1
  53. package/dist/config/init.d.ts +0 -10
  54. package/dist/config/init.js.map +0 -1
  55. package/dist/config/init.test.d.ts +0 -1
  56. package/dist/config/init.test.js +0 -61
  57. package/dist/config/init.test.js.map +0 -1
  58. package/dist/db/index.d.ts +0 -7
  59. package/dist/db/index.js.map +0 -1
  60. package/dist/db/index.test.d.ts +0 -1
  61. package/dist/db/index.test.js +0 -32
  62. package/dist/db/index.test.js.map +0 -1
  63. package/dist/db/operations/knowledge-bases.d.ts +0 -11
  64. package/dist/db/operations/knowledge-bases.js.map +0 -1
  65. package/dist/db/operations/knowledge-bases.test.d.ts +0 -1
  66. package/dist/db/operations/knowledge-bases.test.js +0 -71
  67. package/dist/db/operations/knowledge-bases.test.js.map +0 -1
  68. package/dist/db/operations/literatures.d.ts +0 -9
  69. package/dist/db/operations/literatures.js.map +0 -1
  70. package/dist/db/operations/literatures.test.d.ts +0 -1
  71. package/dist/db/operations/literatures.test.js +0 -156
  72. package/dist/db/operations/literatures.test.js.map +0 -1
  73. package/dist/db/project/knowledge-bases.d.ts +0 -10
  74. package/dist/db/project/knowledge-bases.js.map +0 -1
  75. package/dist/db/project/literatures.d.ts +0 -8
  76. package/dist/db/project/literatures.js.map +0 -1
  77. package/dist/db/schema.d.ts +0 -371
  78. package/dist/db/schema.js.map +0 -1
  79. package/dist/db/test-utils.d.ts +0 -6
  80. package/dist/db/test-utils.js.map +0 -1
  81. package/dist/db/user/knowledge-bases.d.ts +0 -10
  82. package/dist/db/user/knowledge-bases.js.map +0 -1
  83. package/dist/db/user/literatures.d.ts +0 -8
  84. package/dist/db/user/literatures.js.map +0 -1
  85. package/dist/extractor/index.d.ts +0 -6
  86. package/dist/extractor/index.js.map +0 -1
  87. package/dist/extractor/pdf.d.ts +0 -13
  88. package/dist/extractor/pdf.js.map +0 -1
  89. package/dist/extractor/pdf.test.d.ts +0 -1
  90. package/dist/extractor/pdf.test.js +0 -106
  91. package/dist/extractor/pdf.test.js.map +0 -1
  92. package/dist/extractor/text.d.ts +0 -2
  93. package/dist/extractor/text.js.map +0 -1
  94. package/dist/index.d.ts +0 -2
  95. package/dist/index.js.map +0 -1
  96. package/dist/lifecycle.d.ts +0 -1
  97. package/dist/lifecycle.js.map +0 -1
  98. package/dist/logger.d.ts +0 -24
  99. package/dist/logger.js.map +0 -1
  100. package/dist/migrations.d.ts +0 -5
  101. package/dist/migrations.js.map +0 -1
  102. package/dist/pdf/extractor.d.ts +0 -2
  103. package/dist/pdf/extractor.js +0 -18
  104. package/dist/pdf/extractor.js.map +0 -1
  105. package/dist/types/index.d.ts +0 -61
  106. package/dist/types/index.js.map +0 -1
  107. package/dist/types/index.test.d.ts +0 -1
  108. package/dist/types/index.test.js +0 -100
  109. package/dist/types/index.test.js.map +0 -1
  110. package/dist/vector-store/embeddings.d.ts +0 -8
  111. package/dist/vector-store/embeddings.js.map +0 -1
  112. package/dist/vector-store/index.d.ts +0 -6
  113. package/dist/vector-store/index.js.map +0 -1
package/dist/ai/embed.js CHANGED
@@ -29,4 +29,3 @@ export async function embedMany(config, texts) {
29
29
  }
30
30
  return embeddings;
31
31
  }
32
- //# sourceMappingURL=embed.js.map
package/dist/ai/index.js CHANGED
@@ -1,3 +1,2 @@
1
1
  export { embed, embedMany } from "./embed.js";
2
2
  export { createEmbeddingModel } from "./provider.js";
3
- //# sourceMappingURL=index.js.map
@@ -6,4 +6,3 @@ export function createEmbeddingModel(config) {
6
6
  });
7
7
  return openai.embedding(config.model);
8
8
  }
9
- //# sourceMappingURL=provider.js.map
@@ -80,4 +80,3 @@ export function createConfigCommand() {
80
80
  });
81
81
  return config;
82
82
  }
83
- //# sourceMappingURL=config.js.map
@@ -57,6 +57,7 @@ export function createKnowledgeBaseCommand() {
57
57
  .description("List knowledge bases")
58
58
  .option("--user", "List user knowledge bases only")
59
59
  .option("--all", "List all knowledge bases (default)")
60
+ .option("--json", "Output as JSON")
60
61
  .action((options) => {
61
62
  let results = [];
62
63
  if (options.user) {
@@ -68,7 +69,16 @@ export function createKnowledgeBaseCommand() {
68
69
  results = [...projectKbs, ...userKbs];
69
70
  }
70
71
  if (results.length === 0) {
71
- log.info("No knowledge bases found.");
72
+ if (options.json) {
73
+ log.plain("[]");
74
+ }
75
+ else {
76
+ log.info("No knowledge bases found.");
77
+ }
78
+ return;
79
+ }
80
+ if (options.json) {
81
+ log.plain(JSON.stringify(results, null, 2));
72
82
  return;
73
83
  }
74
84
  for (const kb of results) {
@@ -151,6 +161,7 @@ export function createKnowledgeBaseCommand() {
151
161
  kb.command("query <id> <query-text>")
152
162
  .description("Query a knowledge base")
153
163
  .option("-k, --top-k <number>", "Number of results", "5")
164
+ .option("--json", "Output as JSON")
154
165
  .action(async (id, queryText, options) => {
155
166
  const resolved = resolveKnowledgeBase(id);
156
167
  if (!resolved) {
@@ -168,7 +179,19 @@ export function createKnowledgeBaseCommand() {
168
179
  const k = parseInt(options.topK, 10);
169
180
  const results = await queryVectorStore(modelConfig, vectorDir, queryText, k);
170
181
  if (results.length === 0) {
171
- log.info("No results found.");
182
+ if (options.json) {
183
+ log.plain("[]");
184
+ }
185
+ else {
186
+ log.info("No results found.");
187
+ }
188
+ return;
189
+ }
190
+ if (options.json) {
191
+ const output = results
192
+ .filter((doc) => doc != null)
193
+ .map((doc) => ({ pageContent: doc.pageContent, metadata: doc.metadata }));
194
+ log.plain(JSON.stringify(output, null, 2));
172
195
  return;
173
196
  }
174
197
  for (let i = 0; i < results.length; i++) {
@@ -185,4 +208,3 @@ export function createKnowledgeBaseCommand() {
185
208
  });
186
209
  return kb;
187
210
  }
188
- //# sourceMappingURL=knowledge-base.js.map
@@ -1,6 +1,5 @@
1
1
  import * as fs from "node:fs";
2
2
  import * as path from "node:path";
3
- import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters";
4
3
  import chalk from "chalk";
5
4
  import cliProgress from "cli-progress";
6
5
  import { Command } from "commander";
@@ -11,7 +10,8 @@ import * as userKb from "../db/user/knowledge-bases.js";
11
10
  import * as userLit from "../db/user/literatures.js";
12
11
  import { extractContent, extractPdfMetadata } from "../extractor/index.js";
13
12
  import { log } from "../logger.js";
14
- import { createVectorStore, loadVectorStore } from "../vector-store/index.js";
13
+ import { splitDocuments } from "../text-splitter.js";
14
+ import { addDocuments, createVectorStore } from "../vector-store/index.js";
15
15
  function resolveKnowledgeBase(id) {
16
16
  const pkb = projectKb.getKnowledgeBase(id);
17
17
  if (pkb)
@@ -96,11 +96,7 @@ export function createLiteratureCommand() {
96
96
  fs.copyFileSync(absolutePath, path.join(filesDir, `${literature.id}${ext}`));
97
97
  // Split text and add to vector store
98
98
  log.info("Splitting text...");
99
- const splitter = new RecursiveCharacterTextSplitter({
100
- chunkSize: 1000,
101
- chunkOverlap: 200,
102
- });
103
- const splitDocs = await splitter.splitDocuments(docs);
99
+ const splitDocs = splitDocuments(docs, { chunkSize: 1000, chunkOverlap: 200 });
104
100
  log.step(`Created ${String(splitDocs.length)} chunks.`);
105
101
  // Add literature ID metadata to each chunk
106
102
  for (const doc of splitDocs) {
@@ -115,12 +111,9 @@ export function createLiteratureCommand() {
115
111
  const hasIndex = fs.existsSync(path.join(vectorDir, "faiss.index")) &&
116
112
  fs.existsSync(path.join(vectorDir, "docstore.json"));
117
113
  if (hasIndex) {
118
- const store = await loadVectorStore(modelConfig, vectorDir);
119
- await store.addDocuments(splitDocs);
120
- await store.save(vectorDir);
114
+ await addDocuments(splitDocs, modelConfig, vectorDir);
121
115
  }
122
116
  else {
123
- fs.mkdirSync(vectorDir, { recursive: true });
124
117
  await createVectorStore(splitDocs, modelConfig, vectorDir);
125
118
  }
126
119
  bar.update(splitDocs.length);
@@ -211,7 +204,8 @@ export function createLiteratureCommand() {
211
204
  lit
212
205
  .command("list <knowledge-base-id>")
213
206
  .description("List literatures in a knowledge base")
214
- .action((kbId) => {
207
+ .option("--json", "Output as JSON")
208
+ .action((kbId, options) => {
215
209
  const resolved = resolveKnowledgeBase(kbId);
216
210
  if (!resolved) {
217
211
  log.error(`Knowledge base not found: ${kbId}`);
@@ -220,7 +214,16 @@ export function createLiteratureCommand() {
220
214
  const litOps = getLitOps(resolved.scope);
221
215
  const literatures = litOps.listLiteratures(kbId);
222
216
  if (literatures.length === 0) {
223
- log.info("No literatures found.");
217
+ if (options.json) {
218
+ log.plain("[]");
219
+ }
220
+ else {
221
+ log.info("No literatures found.");
222
+ }
223
+ return;
224
+ }
225
+ if (options.json) {
226
+ log.plain(JSON.stringify(literatures, null, 2));
224
227
  return;
225
228
  }
226
229
  for (const l of literatures) {
@@ -233,11 +236,68 @@ export function createLiteratureCommand() {
233
236
  }
234
237
  log.count(literatures.length, literatures.length === 1 ? "literature" : "literatures");
235
238
  });
239
+ // ─── lit search ────────────────────────────────────────────
240
+ lit
241
+ .command("search <knowledge-base-id>")
242
+ .description("Search literatures in a knowledge base by metadata")
243
+ .option("-t, --title <title>", "Title substring")
244
+ .option("-a, --author <author>", "Author substring")
245
+ .option("-k, --keyword <keyword>", "Keyword substring")
246
+ .option("--doi <doi>", "DOI substring")
247
+ .option("--json", "Output as JSON")
248
+ .action((kbId, options) => {
249
+ const resolved = resolveKnowledgeBase(kbId);
250
+ if (!resolved) {
251
+ log.error(`Knowledge base not found: ${kbId}`);
252
+ process.exit(1);
253
+ }
254
+ if (options.title === undefined &&
255
+ options.author === undefined &&
256
+ options.keyword === undefined &&
257
+ options.doi === undefined) {
258
+ log.error("At least one filter (--title, --author, --keyword, --doi) is required.");
259
+ process.exit(1);
260
+ }
261
+ const litOps = getLitOps(resolved.scope);
262
+ const results = litOps.searchLiteratures(kbId, {
263
+ title: options.title,
264
+ author: options.author,
265
+ keyword: options.keyword,
266
+ doi: options.doi,
267
+ });
268
+ if (results.length === 0) {
269
+ if (options.json) {
270
+ log.plain("[]");
271
+ }
272
+ else {
273
+ log.info("No literatures found.");
274
+ }
275
+ return;
276
+ }
277
+ if (options.json) {
278
+ log.plain(JSON.stringify(results, null, 2));
279
+ return;
280
+ }
281
+ for (const l of results) {
282
+ log.header(l.id);
283
+ log.label("Title:", l.title);
284
+ if (l.author)
285
+ log.label("Author:", l.author);
286
+ if (l.doi)
287
+ log.label("DOI:", l.doi);
288
+ if (l.keywords.length > 0)
289
+ log.label("Keywords:", l.keywords.join(", "));
290
+ log.label("Created:", l.createdAt.toISOString());
291
+ log.newline();
292
+ }
293
+ log.count(results.length, results.length === 1 ? "literature" : "literatures");
294
+ });
236
295
  // ─── lit show ──────────────────────────────────────────────
237
296
  lit
238
297
  .command("show <knowledge-base-id> <id>")
239
298
  .description("Show literature details")
240
- .action((kbId, id) => {
299
+ .option("--json", "Output as JSON")
300
+ .action((kbId, id, options) => {
241
301
  const resolved = resolveKnowledgeBase(kbId);
242
302
  if (!resolved) {
243
303
  log.error(`Knowledge base not found: ${kbId}`);
@@ -249,6 +309,10 @@ export function createLiteratureCommand() {
249
309
  log.error(`Literature not found: ${id}`);
250
310
  process.exit(1);
251
311
  }
312
+ if (options.json) {
313
+ log.plain(JSON.stringify(literature, null, 2));
314
+ return;
315
+ }
252
316
  printLiterature(literature);
253
317
  });
254
318
  // ─── lit note ──────────────────────────────────────────────
@@ -256,12 +320,17 @@ export function createLiteratureCommand() {
256
320
  note
257
321
  .command("list <literature-id>")
258
322
  .description("List all notes for a literature")
259
- .action((litId) => {
323
+ .option("--json", "Output as JSON")
324
+ .action((litId, options) => {
260
325
  const literature = findLiterature(litId);
261
326
  if (!literature) {
262
327
  log.error(`Literature not found: ${litId}`);
263
328
  process.exit(1);
264
329
  }
330
+ if (options.json) {
331
+ log.plain(JSON.stringify(literature.notes, null, 2));
332
+ return;
333
+ }
265
334
  const entries = Object.entries(literature.notes);
266
335
  if (entries.length === 0) {
267
336
  log.info("No notes found.");
@@ -346,4 +415,3 @@ function printLiterature(lit) {
346
415
  }
347
416
  }
348
417
  }
349
- //# sourceMappingURL=literature.js.map
@@ -60,4 +60,3 @@ export function createUtilCommand() {
60
60
  });
61
61
  return util;
62
62
  }
63
- //# sourceMappingURL=util.js.map
@@ -104,4 +104,3 @@ export function getDefaultModelConfig() {
104
104
  }
105
105
  return getModelConfig(defaultId);
106
106
  }
107
- //# sourceMappingURL=index.js.map
@@ -52,4 +52,3 @@ export function initScope(options) {
52
52
  }
53
53
  return { baseDir, items };
54
54
  }
55
- //# sourceMappingURL=init.js.map
package/dist/db/index.js CHANGED
@@ -3,7 +3,7 @@ import * as path from "node:path";
3
3
  import Database from "better-sqlite3";
4
4
  import { drizzle } from "drizzle-orm/better-sqlite3";
5
5
  import { getProjectDataDir, getUserDataDir } from "../config/index.js";
6
- import { CREATE_KNOWLEDGE_BASES_TABLE, CREATE_LITERATURES_TABLE } from "./schema.js";
6
+ import { CREATE_KNOWLEDGE_BASES_TABLE, CREATE_LITERATURES_KB_INDEX, CREATE_LITERATURES_TABLE, } from "./schema.js";
7
7
  // ─── Database Connection ────────────────────────────────────
8
8
  export function openDatabase(dbPath) {
9
9
  const dir = path.dirname(dbPath);
@@ -16,6 +16,7 @@ export function openDatabase(dbPath) {
16
16
  export function initializeDatabase(db) {
17
17
  db.exec(CREATE_KNOWLEDGE_BASES_TABLE);
18
18
  db.exec(CREATE_LITERATURES_TABLE);
19
+ db.exec(CREATE_LITERATURES_KB_INDEX);
19
20
  migrateDatabase(db);
20
21
  }
21
22
  // ─── Migrations ─────────────────────────────────────────────
@@ -27,6 +28,10 @@ const MIGRATIONS = [
27
28
  db.exec("ALTER TABLE literatures ADD COLUMN doi TEXT");
28
29
  }
29
30
  },
31
+ // v1 → v2: add index on literatures.knowledge_base_id for faster KB-scoped lookups
32
+ (db) => {
33
+ db.exec(CREATE_LITERATURES_KB_INDEX);
34
+ },
30
35
  ];
31
36
  function migrateDatabase(db) {
32
37
  const currentVersion = db.pragma("user_version", { simple: true }) ?? 0;
@@ -60,4 +65,3 @@ export function getProjectDb() {
60
65
  }
61
66
  return projectDb;
62
67
  }
63
- //# sourceMappingURL=index.js.map
@@ -48,4 +48,3 @@ export function deleteKnowledgeBase(db, id) {
48
48
  const result = db.delete(knowledgeBases).where(eq(knowledgeBases.id, id)).run();
49
49
  return result.changes > 0;
50
50
  }
51
- //# sourceMappingURL=knowledge-bases.js.map
@@ -1,5 +1,5 @@
1
1
  import * as crypto from "node:crypto";
2
- import { desc, eq } from "drizzle-orm";
2
+ import { and, desc, eq, like } from "drizzle-orm";
3
3
  import { literatures } from "../schema.js";
4
4
  export function createLiterature(db, input) {
5
5
  const id = crypto.randomUUID();
@@ -39,6 +39,24 @@ export function listLiteratures(db, knowledgeBaseId) {
39
39
  .orderBy(desc(literatures.createdAt))
40
40
  .all();
41
41
  }
42
+ export function searchLiteratures(db, knowledgeBaseId, filters) {
43
+ const conditions = [eq(literatures.knowledgeBaseId, knowledgeBaseId)];
44
+ if (filters.title)
45
+ conditions.push(like(literatures.title, `%${filters.title}%`));
46
+ if (filters.author)
47
+ conditions.push(like(literatures.author, `%${filters.author}%`));
48
+ if (filters.doi)
49
+ conditions.push(like(literatures.doi, `%${filters.doi}%`));
50
+ // keywords stored as JSON text; LIKE over raw text matches substrings
51
+ if (filters.keyword)
52
+ conditions.push(like(literatures.keywords, `%${filters.keyword}%`));
53
+ return db
54
+ .select()
55
+ .from(literatures)
56
+ .where(and(...conditions))
57
+ .orderBy(desc(literatures.createdAt))
58
+ .all();
59
+ }
42
60
  export function updateLiterature(db, id, input) {
43
61
  const updates = {};
44
62
  if (input.title !== undefined)
@@ -81,4 +99,3 @@ export function deleteLiteraturesByKnowledgeBaseId(db, knowledgeBaseId) {
81
99
  export function getLiteraturesByKnowledgeBaseId(db, knowledgeBaseId) {
82
100
  return listLiteratures(db, knowledgeBaseId);
83
101
  }
84
- //# sourceMappingURL=literatures.js.map
@@ -15,4 +15,3 @@ export function updateKnowledgeBase(id, input) {
15
15
  export function deleteKnowledgeBase(id) {
16
16
  return ops.deleteKnowledgeBase(getProjectDb(), id);
17
17
  }
18
- //# sourceMappingURL=knowledge-bases.js.map
@@ -18,7 +18,9 @@ export function deleteLiterature(id) {
18
18
  export function deleteLiteraturesByKnowledgeBaseId(knowledgeBaseId) {
19
19
  return ops.deleteLiteraturesByKnowledgeBaseId(getProjectDb(), knowledgeBaseId);
20
20
  }
21
+ export function searchLiteratures(knowledgeBaseId, filters) {
22
+ return ops.searchLiteratures(getProjectDb(), knowledgeBaseId, filters);
23
+ }
21
24
  export function getLiteraturesByKnowledgeBaseId(knowledgeBaseId) {
22
25
  return ops.getLiteraturesByKnowledgeBaseId(getProjectDb(), knowledgeBaseId);
23
26
  }
24
- //# sourceMappingURL=literatures.js.map
package/dist/db/schema.js CHANGED
@@ -1,4 +1,4 @@
1
- import { integer, sqliteTable, text } from "drizzle-orm/sqlite-core";
1
+ import { index, integer, sqliteTable, text } from "drizzle-orm/sqlite-core";
2
2
  // ─── Drizzle Table Definitions ──────────────────────────────
3
3
  export const knowledgeBases = sqliteTable("knowledge_bases", {
4
4
  id: text("id").primaryKey(),
@@ -24,7 +24,7 @@ export const literatures = sqliteTable("literatures", {
24
24
  }),
25
25
  createdAt: integer("created_at", { mode: "timestamp_ms" }).notNull(),
26
26
  updatedAt: integer("updated_at", { mode: "timestamp_ms" }).notNull(),
27
- });
27
+ }, (t) => [index("idx_literatures_knowledge_base_id").on(t.knowledgeBaseId)]);
28
28
  // ─── Bootstrap SQL ──────────────────────────────────────────
29
29
  export const CREATE_KNOWLEDGE_BASES_TABLE = `
30
30
  CREATE TABLE IF NOT EXISTS knowledge_bases (
@@ -52,4 +52,6 @@ CREATE TABLE IF NOT EXISTS literatures (
52
52
  updated_at INTEGER NOT NULL,
53
53
  FOREIGN KEY (knowledge_base_id) REFERENCES knowledge_bases(id) ON DELETE SET NULL
54
54
  )`;
55
- //# sourceMappingURL=schema.js.map
55
+ export const CREATE_LITERATURES_KB_INDEX = `
56
+ CREATE INDEX IF NOT EXISTS idx_literatures_knowledge_base_id
57
+ ON literatures(knowledge_base_id)`;
@@ -9,4 +9,3 @@ export function createTestDb() {
9
9
  initializeDatabase(client);
10
10
  return drizzle(client);
11
11
  }
12
- //# sourceMappingURL=test-utils.js.map
@@ -15,4 +15,3 @@ export function updateKnowledgeBase(id, input) {
15
15
  export function deleteKnowledgeBase(id) {
16
16
  return ops.deleteKnowledgeBase(getUserDb(), id);
17
17
  }
18
- //# sourceMappingURL=knowledge-bases.js.map
@@ -18,7 +18,9 @@ export function deleteLiterature(id) {
18
18
  export function deleteLiteraturesByKnowledgeBaseId(knowledgeBaseId) {
19
19
  return ops.deleteLiteraturesByKnowledgeBaseId(getUserDb(), knowledgeBaseId);
20
20
  }
21
+ export function searchLiteratures(knowledgeBaseId, filters) {
22
+ return ops.searchLiteratures(getUserDb(), knowledgeBaseId, filters);
23
+ }
21
24
  export function getLiteraturesByKnowledgeBaseId(knowledgeBaseId) {
22
25
  return ops.getLiteraturesByKnowledgeBaseId(getUserDb(), knowledgeBaseId);
23
26
  }
24
- //# sourceMappingURL=literatures.js.map
@@ -20,4 +20,3 @@ export async function extractContent(filePath) {
20
20
  const ext = filePath.split(".").pop() ?? "unknown";
21
21
  throw new Error(`Unsupported file type: .${ext} (${String(mimeType)})`);
22
22
  }
23
- //# sourceMappingURL=index.js.map
@@ -1,44 +1,43 @@
1
1
  import { readFile } from "node:fs/promises";
2
- import { Document } from "@langchain/core/documents";
3
- import { PDFParse } from "pdf-parse";
2
+ import { extractText, getMeta } from "unpdf";
4
3
  export async function extractPdfContent(pdfPath) {
5
- const data = await readFile(pdfPath);
6
- const parser = new PDFParse({ data });
7
- const result = await parser.getText();
8
- await parser.destroy();
9
- return result.pages.map((page) => new Document({
10
- pageContent: page.text,
4
+ const data = new Uint8Array(await readFile(pdfPath));
5
+ const result = await extractText(data, { mergePages: false });
6
+ return result.text.map((pageText, i) => ({
7
+ pageContent: pageText,
11
8
  metadata: {
12
9
  source: pdfPath,
13
- pdf: { totalPages: result.total },
14
- loc: { pageNumber: page.num },
10
+ pdf: { totalPages: result.totalPages },
11
+ loc: { pageNumber: i + 1 },
15
12
  },
16
13
  }));
17
14
  }
18
15
  export async function extractPdfMetadata(pdfPath) {
19
- const data = await readFile(pdfPath);
20
- const parser = new PDFParse({ data });
21
- const result = await parser.getInfo();
22
- await parser.destroy();
23
- const info = result.info;
24
- const custom = info?.["Custom"];
25
- const title = nonEmptyStringOrNull(info?.["Title"]);
26
- const author = nonEmptyStringOrNull(info?.["Author"]);
27
- const subject = nonEmptyStringOrNull(info?.["Subject"]);
28
- const creator = nonEmptyStringOrNull(info?.["Creator"]);
29
- const creationDate = parsePdfDate(nonEmptyStringOrNull(info?.["CreationDate"]));
30
- const modDate = parsePdfDate(nonEmptyStringOrNull(info?.["ModDate"]));
31
- const rawKeywords = nonEmptyStringOrNull(info?.["Keywords"]);
16
+ const data = new Uint8Array(await readFile(pdfPath));
17
+ const { info } = await getMeta(data);
18
+ const custom = getRecord(info["Custom"]);
19
+ const title = nonEmptyStringOrNull(info["Title"]);
20
+ const author = nonEmptyStringOrNull(info["Author"]);
21
+ const subject = nonEmptyStringOrNull(info["Subject"]);
22
+ const creator = nonEmptyStringOrNull(info["Creator"]);
23
+ const creationDate = parsePdfDate(nonEmptyStringOrNull(info["CreationDate"]));
24
+ const modDate = parsePdfDate(nonEmptyStringOrNull(info["ModDate"]));
25
+ const rawKeywords = nonEmptyStringOrNull(info["Keywords"]);
32
26
  const keywords = rawKeywords
33
27
  ? rawKeywords
34
28
  .split(/[,;]/)
35
29
  .map((k) => k.trim())
36
30
  .filter(Boolean)
37
31
  : [];
38
- // DOI can appear in Custom fields (case-insensitive lookup)
39
32
  const doi = findCustomField(custom, "doi");
40
33
  return { title, author, subject, keywords, doi, creator, creationDate, modDate };
41
34
  }
35
+ function getRecord(value) {
36
+ if (typeof value === "object" && value !== null && !Array.isArray(value)) {
37
+ return value;
38
+ }
39
+ return undefined;
40
+ }
42
41
  function nonEmptyStringOrNull(value) {
43
42
  if (typeof value === "string" && value.trim().length > 0) {
44
43
  return value.trim();
@@ -53,7 +52,6 @@ function parsePdfDate(value) {
53
52
  if (!value)
54
53
  return null;
55
54
  const cleaned = value.replace(/^D:/, "");
56
- // Extract components: YYYY[MM[DD[HH[mm[SS]]]]]
57
55
  const match = /^(\d{4})(\d{2})?(\d{2})?(\d{2})?(\d{2})?(\d{2})?/.exec(cleaned);
58
56
  if (!match)
59
57
  return null;
@@ -66,9 +64,6 @@ function parsePdfDate(value) {
66
64
  const date = new Date(`${year}-${month}-${day}T${hour}:${min}:${sec}Z`);
67
65
  return Number.isNaN(date.getTime()) ? null : date;
68
66
  }
69
- /**
70
- * Case-insensitive lookup in the Custom fields dictionary.
71
- */
72
67
  function findCustomField(custom, key) {
73
68
  if (!custom)
74
69
  return null;
@@ -80,4 +75,3 @@ function findCustomField(custom, key) {
80
75
  }
81
76
  return null;
82
77
  }
83
- //# sourceMappingURL=pdf.js.map
@@ -1,5 +1,4 @@
1
1
  import { readFile, stat } from "node:fs/promises";
2
- import { Document } from "@langchain/core/documents";
3
2
  const MAX_FILE_SIZE = 10 * 1024 * 1024; // 10 MB
4
3
  export async function extractTextContent(filePath) {
5
4
  const fileStats = await stat(filePath);
@@ -8,10 +7,9 @@ export async function extractTextContent(filePath) {
8
7
  }
9
8
  const content = await readFile(filePath, "utf-8");
10
9
  return [
11
- new Document({
10
+ {
12
11
  pageContent: content,
13
12
  metadata: { source: filePath },
14
- }),
13
+ },
15
14
  ];
16
15
  }
17
- //# sourceMappingURL=text.js.map
package/dist/index.js CHANGED
@@ -18,4 +18,3 @@ program.addCommand(createKnowledgeBaseCommand());
18
18
  program.addCommand(createLiteratureCommand());
19
19
  program.addCommand(createUtilCommand());
20
20
  program.parse();
21
- //# sourceMappingURL=index.js.map
package/dist/lifecycle.js CHANGED
@@ -4,4 +4,3 @@ export function startup() {
4
4
  migratePdfsToFiles(getUserDataDir());
5
5
  migratePdfsToFiles(getProjectDataDir());
6
6
  }
7
- //# sourceMappingURL=lifecycle.js.map
package/dist/logger.js CHANGED
@@ -41,4 +41,3 @@ export const log = {
41
41
  console.log(chalk.dim(`Total: ${n} ${label}`));
42
42
  },
43
43
  };
44
- //# sourceMappingURL=logger.js.map
@@ -25,4 +25,3 @@ export function migratePdfsToFiles(baseDir) {
25
25
  fs.rmSync(legacyPdfsDir, { recursive: true, force: true });
26
26
  }
27
27
  }
28
- //# sourceMappingURL=migrations.js.map
@@ -0,0 +1,56 @@
1
+ const DEFAULT_SEPARATORS = ["\n\n", "\n", " ", ""];
2
+ export function splitDocuments(docs, options) {
3
+ if (options.chunkOverlap >= options.chunkSize) {
4
+ throw new Error("chunkOverlap must be less than chunkSize");
5
+ }
6
+ const result = [];
7
+ for (const doc of docs) {
8
+ const chunks = splitText(doc.pageContent, options.chunkSize, options.chunkOverlap);
9
+ for (const chunk of chunks) {
10
+ result.push({ pageContent: chunk, metadata: { ...doc.metadata } });
11
+ }
12
+ }
13
+ return result;
14
+ }
15
+ function splitText(text, chunkSize, chunkOverlap) {
16
+ return recursiveSplit(text, DEFAULT_SEPARATORS, chunkSize, chunkOverlap);
17
+ }
18
+ function recursiveSplit(text, separators, chunkSize, chunkOverlap) {
19
+ if (text.length <= chunkSize)
20
+ return [text];
21
+ const separator = separators[0] ?? "";
22
+ const remaining = separators.slice(1);
23
+ const parts = separator === "" ? [...text] : text.split(separator);
24
+ const chunks = [];
25
+ let current = "";
26
+ for (const part of parts) {
27
+ const piece = current.length === 0 ? part : current + separator + part;
28
+ if (piece.length > chunkSize && current.length > 0) {
29
+ chunks.push(current);
30
+ // Overlap: keep the tail of the current chunk
31
+ if (chunkOverlap > 0 && current.length > chunkOverlap) {
32
+ current = current.slice(-chunkOverlap) + separator + part;
33
+ }
34
+ else {
35
+ current = part;
36
+ }
37
+ }
38
+ else {
39
+ current = piece;
40
+ }
41
+ }
42
+ if (current.length > 0) {
43
+ chunks.push(current);
44
+ }
45
+ // Recursively split any chunks that are still too large
46
+ const finalChunks = [];
47
+ for (const chunk of chunks) {
48
+ if (chunk.length > chunkSize && remaining.length > 0) {
49
+ finalChunks.push(...recursiveSplit(chunk, remaining, chunkSize, chunkOverlap));
50
+ }
51
+ else {
52
+ finalChunks.push(chunk);
53
+ }
54
+ }
55
+ return finalChunks;
56
+ }
@@ -33,4 +33,3 @@ export const ConfigSchema = z.object({
33
33
  embeddingModels: z.record(z.string().min(1), EmbeddingModelConfigSchema).default({}),
34
34
  defaultEmbeddingModelId: z.string().min(1).optional(),
35
35
  });
36
- //# sourceMappingURL=index.js.map
@@ -13,4 +13,3 @@ export class AiSdkEmbeddings extends Embeddings {
13
13
  return embedMany(this.config, documents);
14
14
  }
15
15
  }
16
- //# sourceMappingURL=embeddings.js.map