paper-manager 0.5.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/README.md +9 -1
  2. package/dist/commands/literature.js +56 -21
  3. package/dist/commands/literature.js.map +1 -1
  4. package/dist/commands/util.d.ts +2 -0
  5. package/dist/commands/util.js +63 -0
  6. package/dist/commands/util.js.map +1 -0
  7. package/dist/db/index.d.ts +4 -6
  8. package/dist/db/index.js +29 -42
  9. package/dist/db/index.js.map +1 -1
  10. package/dist/db/index.test.js +26 -73
  11. package/dist/db/index.test.js.map +1 -1
  12. package/dist/db/operations/knowledge-bases.d.ts +6 -6
  13. package/dist/db/operations/knowledge-bases.js +37 -34
  14. package/dist/db/operations/knowledge-bases.js.map +1 -1
  15. package/dist/db/operations/knowledge-bases.test.js +3 -5
  16. package/dist/db/operations/knowledge-bases.test.js.map +1 -1
  17. package/dist/db/operations/literatures.d.ts +8 -8
  18. package/dist/db/operations/literatures.js +63 -66
  19. package/dist/db/operations/literatures.js.map +1 -1
  20. package/dist/db/operations/literatures.test.js +6 -9
  21. package/dist/db/operations/literatures.test.js.map +1 -1
  22. package/dist/db/schema.d.ts +370 -1
  23. package/dist/db/schema.js +29 -0
  24. package/dist/db/schema.js.map +1 -1
  25. package/dist/db/test-utils.d.ts +3 -3
  26. package/dist/db/test-utils.js +6 -8
  27. package/dist/db/test-utils.js.map +1 -1
  28. package/dist/extractor/index.d.ts +3 -2
  29. package/dist/extractor/index.js +2 -2
  30. package/dist/extractor/index.js.map +1 -1
  31. package/dist/extractor/pdf.d.ts +11 -0
  32. package/dist/extractor/pdf.js +65 -0
  33. package/dist/extractor/pdf.js.map +1 -1
  34. package/dist/extractor/pdf.test.d.ts +1 -0
  35. package/dist/extractor/pdf.test.js +106 -0
  36. package/dist/extractor/pdf.test.js.map +1 -0
  37. package/dist/index.js +2 -0
  38. package/dist/index.js.map +1 -1
  39. package/dist/types/index.d.ts +5 -26
  40. package/dist/types/index.js +4 -19
  41. package/dist/types/index.js.map +1 -1
  42. package/dist/types/index.test.js +12 -69
  43. package/dist/types/index.test.js.map +1 -1
  44. package/package.json +3 -1
@@ -1,2 +1,371 @@
1
+ export declare const knowledgeBases: import("drizzle-orm/sqlite-core").SQLiteTableWithColumns<{
2
+ name: "knowledge_bases";
3
+ schema: undefined;
4
+ columns: {
5
+ id: import("drizzle-orm/sqlite-core").SQLiteColumn<{
6
+ name: "id";
7
+ tableName: "knowledge_bases";
8
+ dataType: "string";
9
+ columnType: "SQLiteText";
10
+ data: string;
11
+ driverParam: string;
12
+ notNull: true;
13
+ hasDefault: false;
14
+ isPrimaryKey: true;
15
+ isAutoincrement: false;
16
+ hasRuntimeDefault: false;
17
+ enumValues: [string, ...string[]];
18
+ baseColumn: never;
19
+ identity: undefined;
20
+ generated: undefined;
21
+ }, {}, {
22
+ length: number | undefined;
23
+ }>;
24
+ name: import("drizzle-orm/sqlite-core").SQLiteColumn<{
25
+ name: "name";
26
+ tableName: "knowledge_bases";
27
+ dataType: "string";
28
+ columnType: "SQLiteText";
29
+ data: string;
30
+ driverParam: string;
31
+ notNull: true;
32
+ hasDefault: false;
33
+ isPrimaryKey: false;
34
+ isAutoincrement: false;
35
+ hasRuntimeDefault: false;
36
+ enumValues: [string, ...string[]];
37
+ baseColumn: never;
38
+ identity: undefined;
39
+ generated: undefined;
40
+ }, {}, {
41
+ length: number | undefined;
42
+ }>;
43
+ description: import("drizzle-orm/sqlite-core").SQLiteColumn<{
44
+ name: "description";
45
+ tableName: "knowledge_bases";
46
+ dataType: "string";
47
+ columnType: "SQLiteText";
48
+ data: string;
49
+ driverParam: string;
50
+ notNull: true;
51
+ hasDefault: false;
52
+ isPrimaryKey: false;
53
+ isAutoincrement: false;
54
+ hasRuntimeDefault: false;
55
+ enumValues: [string, ...string[]];
56
+ baseColumn: never;
57
+ identity: undefined;
58
+ generated: undefined;
59
+ }, {}, {
60
+ length: number | undefined;
61
+ }>;
62
+ embeddingModelId: import("drizzle-orm/sqlite-core").SQLiteColumn<{
63
+ name: "embedding_model_id";
64
+ tableName: "knowledge_bases";
65
+ dataType: "string";
66
+ columnType: "SQLiteText";
67
+ data: string;
68
+ driverParam: string;
69
+ notNull: true;
70
+ hasDefault: false;
71
+ isPrimaryKey: false;
72
+ isAutoincrement: false;
73
+ hasRuntimeDefault: false;
74
+ enumValues: [string, ...string[]];
75
+ baseColumn: never;
76
+ identity: undefined;
77
+ generated: undefined;
78
+ }, {}, {
79
+ length: number | undefined;
80
+ }>;
81
+ createdAt: import("drizzle-orm/sqlite-core").SQLiteColumn<{
82
+ name: "created_at";
83
+ tableName: "knowledge_bases";
84
+ dataType: "date";
85
+ columnType: "SQLiteTimestamp";
86
+ data: Date;
87
+ driverParam: number;
88
+ notNull: true;
89
+ hasDefault: false;
90
+ isPrimaryKey: false;
91
+ isAutoincrement: false;
92
+ hasRuntimeDefault: false;
93
+ enumValues: undefined;
94
+ baseColumn: never;
95
+ identity: undefined;
96
+ generated: undefined;
97
+ }, {}, {}>;
98
+ updatedAt: import("drizzle-orm/sqlite-core").SQLiteColumn<{
99
+ name: "updated_at";
100
+ tableName: "knowledge_bases";
101
+ dataType: "date";
102
+ columnType: "SQLiteTimestamp";
103
+ data: Date;
104
+ driverParam: number;
105
+ notNull: true;
106
+ hasDefault: false;
107
+ isPrimaryKey: false;
108
+ isAutoincrement: false;
109
+ hasRuntimeDefault: false;
110
+ enumValues: undefined;
111
+ baseColumn: never;
112
+ identity: undefined;
113
+ generated: undefined;
114
+ }, {}, {}>;
115
+ };
116
+ dialect: "sqlite";
117
+ }>;
118
+ export declare const literatures: import("drizzle-orm/sqlite-core").SQLiteTableWithColumns<{
119
+ name: "literatures";
120
+ schema: undefined;
121
+ columns: {
122
+ id: import("drizzle-orm/sqlite-core").SQLiteColumn<{
123
+ name: "id";
124
+ tableName: "literatures";
125
+ dataType: "string";
126
+ columnType: "SQLiteText";
127
+ data: string;
128
+ driverParam: string;
129
+ notNull: true;
130
+ hasDefault: false;
131
+ isPrimaryKey: true;
132
+ isAutoincrement: false;
133
+ hasRuntimeDefault: false;
134
+ enumValues: [string, ...string[]];
135
+ baseColumn: never;
136
+ identity: undefined;
137
+ generated: undefined;
138
+ }, {}, {
139
+ length: number | undefined;
140
+ }>;
141
+ title: import("drizzle-orm/sqlite-core").SQLiteColumn<{
142
+ name: "title";
143
+ tableName: "literatures";
144
+ dataType: "string";
145
+ columnType: "SQLiteText";
146
+ data: string;
147
+ driverParam: string;
148
+ notNull: true;
149
+ hasDefault: false;
150
+ isPrimaryKey: false;
151
+ isAutoincrement: false;
152
+ hasRuntimeDefault: false;
153
+ enumValues: [string, ...string[]];
154
+ baseColumn: never;
155
+ identity: undefined;
156
+ generated: undefined;
157
+ }, {}, {
158
+ length: number | undefined;
159
+ }>;
160
+ titleTranslation: import("drizzle-orm/sqlite-core").SQLiteColumn<{
161
+ name: "title_translation";
162
+ tableName: "literatures";
163
+ dataType: "string";
164
+ columnType: "SQLiteText";
165
+ data: string;
166
+ driverParam: string;
167
+ notNull: false;
168
+ hasDefault: false;
169
+ isPrimaryKey: false;
170
+ isAutoincrement: false;
171
+ hasRuntimeDefault: false;
172
+ enumValues: [string, ...string[]];
173
+ baseColumn: never;
174
+ identity: undefined;
175
+ generated: undefined;
176
+ }, {}, {
177
+ length: number | undefined;
178
+ }>;
179
+ author: import("drizzle-orm/sqlite-core").SQLiteColumn<{
180
+ name: "author";
181
+ tableName: "literatures";
182
+ dataType: "string";
183
+ columnType: "SQLiteText";
184
+ data: string;
185
+ driverParam: string;
186
+ notNull: false;
187
+ hasDefault: false;
188
+ isPrimaryKey: false;
189
+ isAutoincrement: false;
190
+ hasRuntimeDefault: false;
191
+ enumValues: [string, ...string[]];
192
+ baseColumn: never;
193
+ identity: undefined;
194
+ generated: undefined;
195
+ }, {}, {
196
+ length: number | undefined;
197
+ }>;
198
+ abstract: import("drizzle-orm/sqlite-core").SQLiteColumn<{
199
+ name: "abstract";
200
+ tableName: "literatures";
201
+ dataType: "string";
202
+ columnType: "SQLiteText";
203
+ data: string;
204
+ driverParam: string;
205
+ notNull: false;
206
+ hasDefault: false;
207
+ isPrimaryKey: false;
208
+ isAutoincrement: false;
209
+ hasRuntimeDefault: false;
210
+ enumValues: [string, ...string[]];
211
+ baseColumn: never;
212
+ identity: undefined;
213
+ generated: undefined;
214
+ }, {}, {
215
+ length: number | undefined;
216
+ }>;
217
+ summary: import("drizzle-orm/sqlite-core").SQLiteColumn<{
218
+ name: "summary";
219
+ tableName: "literatures";
220
+ dataType: "string";
221
+ columnType: "SQLiteText";
222
+ data: string;
223
+ driverParam: string;
224
+ notNull: false;
225
+ hasDefault: false;
226
+ isPrimaryKey: false;
227
+ isAutoincrement: false;
228
+ hasRuntimeDefault: false;
229
+ enumValues: [string, ...string[]];
230
+ baseColumn: never;
231
+ identity: undefined;
232
+ generated: undefined;
233
+ }, {}, {
234
+ length: number | undefined;
235
+ }>;
236
+ keywords: import("drizzle-orm/sqlite-core").SQLiteColumn<{
237
+ name: "keywords";
238
+ tableName: "literatures";
239
+ dataType: "json";
240
+ columnType: "SQLiteTextJson";
241
+ data: string[];
242
+ driverParam: string;
243
+ notNull: true;
244
+ hasDefault: false;
245
+ isPrimaryKey: false;
246
+ isAutoincrement: false;
247
+ hasRuntimeDefault: false;
248
+ enumValues: undefined;
249
+ baseColumn: never;
250
+ identity: undefined;
251
+ generated: undefined;
252
+ }, {}, {
253
+ $type: string[];
254
+ }>;
255
+ url: import("drizzle-orm/sqlite-core").SQLiteColumn<{
256
+ name: "url";
257
+ tableName: "literatures";
258
+ dataType: "string";
259
+ columnType: "SQLiteText";
260
+ data: string;
261
+ driverParam: string;
262
+ notNull: false;
263
+ hasDefault: false;
264
+ isPrimaryKey: false;
265
+ isAutoincrement: false;
266
+ hasRuntimeDefault: false;
267
+ enumValues: [string, ...string[]];
268
+ baseColumn: never;
269
+ identity: undefined;
270
+ generated: undefined;
271
+ }, {}, {
272
+ length: number | undefined;
273
+ }>;
274
+ doi: import("drizzle-orm/sqlite-core").SQLiteColumn<{
275
+ name: "doi";
276
+ tableName: "literatures";
277
+ dataType: "string";
278
+ columnType: "SQLiteText";
279
+ data: string;
280
+ driverParam: string;
281
+ notNull: false;
282
+ hasDefault: false;
283
+ isPrimaryKey: false;
284
+ isAutoincrement: false;
285
+ hasRuntimeDefault: false;
286
+ enumValues: [string, ...string[]];
287
+ baseColumn: never;
288
+ identity: undefined;
289
+ generated: undefined;
290
+ }, {}, {
291
+ length: number | undefined;
292
+ }>;
293
+ notes: import("drizzle-orm/sqlite-core").SQLiteColumn<{
294
+ name: "notes";
295
+ tableName: "literatures";
296
+ dataType: "json";
297
+ columnType: "SQLiteTextJson";
298
+ data: Record<string, string>;
299
+ driverParam: string;
300
+ notNull: true;
301
+ hasDefault: false;
302
+ isPrimaryKey: false;
303
+ isAutoincrement: false;
304
+ hasRuntimeDefault: false;
305
+ enumValues: undefined;
306
+ baseColumn: never;
307
+ identity: undefined;
308
+ generated: undefined;
309
+ }, {}, {
310
+ $type: Record<string, string>;
311
+ }>;
312
+ knowledgeBaseId: import("drizzle-orm/sqlite-core").SQLiteColumn<{
313
+ name: "knowledge_base_id";
314
+ tableName: "literatures";
315
+ dataType: "string";
316
+ columnType: "SQLiteText";
317
+ data: string;
318
+ driverParam: string;
319
+ notNull: false;
320
+ hasDefault: false;
321
+ isPrimaryKey: false;
322
+ isAutoincrement: false;
323
+ hasRuntimeDefault: false;
324
+ enumValues: [string, ...string[]];
325
+ baseColumn: never;
326
+ identity: undefined;
327
+ generated: undefined;
328
+ }, {}, {
329
+ length: number | undefined;
330
+ }>;
331
+ createdAt: import("drizzle-orm/sqlite-core").SQLiteColumn<{
332
+ name: "created_at";
333
+ tableName: "literatures";
334
+ dataType: "date";
335
+ columnType: "SQLiteTimestamp";
336
+ data: Date;
337
+ driverParam: number;
338
+ notNull: true;
339
+ hasDefault: false;
340
+ isPrimaryKey: false;
341
+ isAutoincrement: false;
342
+ hasRuntimeDefault: false;
343
+ enumValues: undefined;
344
+ baseColumn: never;
345
+ identity: undefined;
346
+ generated: undefined;
347
+ }, {}, {}>;
348
+ updatedAt: import("drizzle-orm/sqlite-core").SQLiteColumn<{
349
+ name: "updated_at";
350
+ tableName: "literatures";
351
+ dataType: "date";
352
+ columnType: "SQLiteTimestamp";
353
+ data: Date;
354
+ driverParam: number;
355
+ notNull: true;
356
+ hasDefault: false;
357
+ isPrimaryKey: false;
358
+ isAutoincrement: false;
359
+ hasRuntimeDefault: false;
360
+ enumValues: undefined;
361
+ baseColumn: never;
362
+ identity: undefined;
363
+ generated: undefined;
364
+ }, {}, {}>;
365
+ };
366
+ dialect: "sqlite";
367
+ }>;
368
+ export type KnowledgeBaseMetadata = typeof knowledgeBases.$inferSelect;
369
+ export type LiteratureMetadata = typeof literatures.$inferSelect;
1
370
  export declare const CREATE_KNOWLEDGE_BASES_TABLE = "\nCREATE TABLE IF NOT EXISTS knowledge_bases (\n id TEXT PRIMARY KEY,\n name TEXT NOT NULL UNIQUE,\n description TEXT NOT NULL,\n embedding_model_id TEXT NOT NULL,\n created_at INTEGER NOT NULL,\n updated_at INTEGER NOT NULL\n)";
2
- export declare const CREATE_LITERATURES_TABLE = "\nCREATE TABLE IF NOT EXISTS literatures (\n id TEXT PRIMARY KEY,\n title TEXT NOT NULL,\n title_translation TEXT,\n author TEXT,\n abstract TEXT,\n summary TEXT,\n keywords TEXT NOT NULL DEFAULT '[]',\n url TEXT,\n notes TEXT NOT NULL DEFAULT '{}',\n knowledge_base_id TEXT,\n created_at INTEGER NOT NULL,\n updated_at INTEGER NOT NULL,\n FOREIGN KEY (knowledge_base_id) REFERENCES knowledge_bases(id) ON DELETE SET NULL\n)";
371
+ export declare const CREATE_LITERATURES_TABLE = "\nCREATE TABLE IF NOT EXISTS literatures (\n id TEXT PRIMARY KEY,\n title TEXT NOT NULL,\n title_translation TEXT,\n author TEXT,\n abstract TEXT,\n summary TEXT,\n keywords TEXT NOT NULL DEFAULT '[]',\n url TEXT,\n doi TEXT,\n notes TEXT NOT NULL DEFAULT '{}',\n knowledge_base_id TEXT,\n created_at INTEGER NOT NULL,\n updated_at INTEGER NOT NULL,\n FOREIGN KEY (knowledge_base_id) REFERENCES knowledge_bases(id) ON DELETE SET NULL\n)";
package/dist/db/schema.js CHANGED
@@ -1,3 +1,31 @@
1
+ import { integer, sqliteTable, text } from "drizzle-orm/sqlite-core";
2
+ // ─── Drizzle Table Definitions ──────────────────────────────
3
+ export const knowledgeBases = sqliteTable("knowledge_bases", {
4
+ id: text("id").primaryKey(),
5
+ name: text("name").notNull().unique(),
6
+ description: text("description").notNull(),
7
+ embeddingModelId: text("embedding_model_id").notNull(),
8
+ createdAt: integer("created_at", { mode: "timestamp_ms" }).notNull(),
9
+ updatedAt: integer("updated_at", { mode: "timestamp_ms" }).notNull(),
10
+ });
11
+ export const literatures = sqliteTable("literatures", {
12
+ id: text("id").primaryKey(),
13
+ title: text("title").notNull(),
14
+ titleTranslation: text("title_translation"),
15
+ author: text("author"),
16
+ abstract: text("abstract"),
17
+ summary: text("summary"),
18
+ keywords: text("keywords", { mode: "json" }).$type().notNull(),
19
+ url: text("url"),
20
+ doi: text("doi"),
21
+ notes: text("notes", { mode: "json" }).$type().notNull(),
22
+ knowledgeBaseId: text("knowledge_base_id").references(() => knowledgeBases.id, {
23
+ onDelete: "set null",
24
+ }),
25
+ createdAt: integer("created_at", { mode: "timestamp_ms" }).notNull(),
26
+ updatedAt: integer("updated_at", { mode: "timestamp_ms" }).notNull(),
27
+ });
28
+ // ─── Bootstrap SQL ──────────────────────────────────────────
1
29
  export const CREATE_KNOWLEDGE_BASES_TABLE = `
2
30
  CREATE TABLE IF NOT EXISTS knowledge_bases (
3
31
  id TEXT PRIMARY KEY,
@@ -17,6 +45,7 @@ CREATE TABLE IF NOT EXISTS literatures (
17
45
  summary TEXT,
18
46
  keywords TEXT NOT NULL DEFAULT '[]',
19
47
  url TEXT,
48
+ doi TEXT,
20
49
  notes TEXT NOT NULL DEFAULT '{}',
21
50
  knowledge_base_id TEXT,
22
51
  created_at INTEGER NOT NULL,
@@ -1 +1 @@
1
- {"version":3,"file":"schema.js","sourceRoot":"","sources":["../../src/db/schema.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,MAAM,4BAA4B,GAAG;;;;;;;;EAQ1C,CAAC;AAEH,MAAM,CAAC,MAAM,wBAAwB,GAAG;;;;;;;;;;;;;;;EAetC,CAAC"}
1
+ {"version":3,"file":"schema.js","sourceRoot":"","sources":["../../src/db/schema.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,MAAM,yBAAyB,CAAC;AAErE,+DAA+D;AAE/D,MAAM,CAAC,MAAM,cAAc,GAAG,WAAW,CAAC,iBAAiB,EAAE;IAC3D,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,UAAU,EAAE;IAC3B,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC,OAAO,EAAE,CAAC,MAAM,EAAE;IACrC,WAAW,EAAE,IAAI,CAAC,aAAa,CAAC,CAAC,OAAO,EAAE;IAC1C,gBAAgB,EAAE,IAAI,CAAC,oBAAoB,CAAC,CAAC,OAAO,EAAE;IACtD,SAAS,EAAE,OAAO,CAAC,YAAY,EAAE,EAAE,IAAI,EAAE,cAAc,EAAE,CAAC,CAAC,OAAO,EAAE;IACpE,SAAS,EAAE,OAAO,CAAC,YAAY,EAAE,EAAE,IAAI,EAAE,cAAc,EAAE,CAAC,CAAC,OAAO,EAAE;CACrE,CAAC,CAAC;AAEH,MAAM,CAAC,MAAM,WAAW,GAAG,WAAW,CAAC,aAAa,EAAE;IACpD,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,UAAU,EAAE;IAC3B,KAAK,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE;IAC9B,gBAAgB,EAAE,IAAI,CAAC,mBAAmB,CAAC;IAC3C,MAAM,EAAE,IAAI,CAAC,QAAQ,CAAC;IACtB,QAAQ,EAAE,IAAI,CAAC,UAAU,CAAC;IAC1B,OAAO,EAAE,IAAI,CAAC,SAAS,CAAC;IACxB,QAAQ,EAAE,IAAI,CAAC,UAAU,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC,KAAK,EAAY,CAAC,OAAO,EAAE;IACxE,GAAG,EAAE,IAAI,CAAC,KAAK,CAAC;IAChB,GAAG,EAAE,IAAI,CAAC,KAAK,CAAC;IAChB,KAAK,EAAE,IAAI,CAAC,OAAO,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC,KAAK,EAA0B,CAAC,OAAO,EAAE;IAChF,eAAe,EAAE,IAAI,CAAC,mBAAmB,CAAC,CAAC,UAAU,CAAC,GAAG,EAAE,CAAC,cAAc,CAAC,EAAE,EAAE;QAC7E,QAAQ,EAAE,UAAU;KACrB,CAAC;IACF,SAAS,EAAE,OAAO,CAAC,YAAY,EAAE,EAAE,IAAI,EAAE,cAAc,EAAE,CAAC,CAAC,OAAO,EAAE;IACpE,SAAS,EAAE,OAAO,CAAC,YAAY,EAAE,EAAE,IAAI,EAAE,cAAc,EAAE,CAAC,CAAC,OAAO,EAAE;CACrE,CAAC,CAAC;AAOH,+DAA+D;AAE/D,MAAM,CAAC,MAAM,4BAA4B,GAAG;;;;;;;;EAQ1C,CAAC;AAEH,MAAM,CAAC,MAAM,wBAAwB,GAAG;;;;;;;;;;;;;;;;EAgBtC,CAAC"}
@@ -1,6 +1,6 @@
1
- import type BetterSqlite3 from "better-sqlite3";
1
+ import type { AppDatabase } from "./index.js";
2
2
  /**
3
- * Creates an in-memory SQLite database with the schema initialized.
3
+ * Creates an in-memory SQLite database wrapped with Drizzle.
4
4
  * Each call returns a fresh, isolated database instance.
5
5
  */
6
- export declare function createTestDb(): BetterSqlite3.Database;
6
+ export declare function createTestDb(): AppDatabase;
@@ -1,14 +1,12 @@
1
- import Database from "better-sqlite3";
2
- import { initializeDatabase } from "./index.js";
1
+ import { drizzle } from "drizzle-orm/better-sqlite3";
2
+ import { initializeDatabase, openDatabase } from "./index.js";
3
3
  /**
4
- * Creates an in-memory SQLite database with the schema initialized.
4
+ * Creates an in-memory SQLite database wrapped with Drizzle.
5
5
  * Each call returns a fresh, isolated database instance.
6
6
  */
7
7
  export function createTestDb() {
8
- const db = new Database(":memory:");
9
- db.pragma("journal_mode = WAL");
10
- db.pragma("foreign_keys = ON");
11
- initializeDatabase(db);
12
- return db;
8
+ const client = openDatabase(":memory:");
9
+ initializeDatabase(client);
10
+ return drizzle(client);
13
11
  }
14
12
  //# sourceMappingURL=test-utils.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"test-utils.js","sourceRoot":"","sources":["../../src/db/test-utils.ts"],"names":[],"mappings":"AACA,OAAO,QAAQ,MAAM,gBAAgB,CAAC;AAEtC,OAAO,EAAE,kBAAkB,EAAE,MAAM,YAAY,CAAC;AAEhD;;;GAGG;AACH,MAAM,UAAU,YAAY;IAC1B,MAAM,EAAE,GAAG,IAAI,QAAQ,CAAC,UAAU,CAAC,CAAC;IACpC,EAAE,CAAC,MAAM,CAAC,oBAAoB,CAAC,CAAC;IAChC,EAAE,CAAC,MAAM,CAAC,mBAAmB,CAAC,CAAC;IAC/B,kBAAkB,CAAC,EAAE,CAAC,CAAC;IACvB,OAAO,EAAE,CAAC;AACZ,CAAC"}
1
+ {"version":3,"file":"test-utils.js","sourceRoot":"","sources":["../../src/db/test-utils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,4BAA4B,CAAC;AAGrD,OAAO,EAAE,kBAAkB,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAE9D;;;GAGG;AACH,MAAM,UAAU,YAAY;IAC1B,MAAM,MAAM,GAAG,YAAY,CAAC,UAAU,CAAC,CAAC;IACxC,kBAAkB,CAAC,MAAM,CAAC,CAAC;IAC3B,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC;AACzB,CAAC"}
@@ -1,5 +1,6 @@
1
1
  import type { Document } from "@langchain/core/documents";
2
- import { extractPdfContent } from "./pdf.js";
2
+ import { extractPdfContent, extractPdfMetadata } from "./pdf.js";
3
3
  import { extractTextContent } from "./text.js";
4
- export { extractPdfContent, extractTextContent };
4
+ export { extractPdfContent, extractPdfMetadata, extractTextContent };
5
+ export type { PdfMetadata } from "./pdf.js";
5
6
  export declare function extractContent(filePath: string): Promise<Document[]>;
@@ -1,7 +1,7 @@
1
1
  import mime from "mime-types";
2
- import { extractPdfContent } from "./pdf.js";
2
+ import { extractPdfContent, extractPdfMetadata } from "./pdf.js";
3
3
  import { extractTextContent } from "./text.js";
4
- export { extractPdfContent, extractTextContent };
4
+ export { extractPdfContent, extractPdfMetadata, extractTextContent };
5
5
  const TEXT_LIKE_MIME_TYPES = new Set([
6
6
  "application/x-tex",
7
7
  "application/x-latex",
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/extractor/index.ts"],"names":[],"mappings":"AACA,OAAO,IAAI,MAAM,YAAY,CAAC;AAE9B,OAAO,EAAE,iBAAiB,EAAE,MAAM,UAAU,CAAC;AAC7C,OAAO,EAAE,kBAAkB,EAAE,MAAM,WAAW,CAAC;AAE/C,OAAO,EAAE,iBAAiB,EAAE,kBAAkB,EAAE,CAAC;AAEjD,MAAM,oBAAoB,GAAwB,IAAI,GAAG,CAAC;IACxD,mBAAmB;IACnB,qBAAqB;CACtB,CAAC,CAAC;AAEH,SAAS,UAAU,CAAC,QAAgB;IAClC,OAAO,QAAQ,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,oBAAoB,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;AAC5E,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,QAAgB;IACnD,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;IAEvC,IAAI,QAAQ,KAAK,iBAAiB,EAAE,CAAC;QACnC,OAAO,iBAAiB,CAAC,QAAQ,CAAC,CAAC;IACrC,CAAC;IAED,IAAI,OAAO,QAAQ,KAAK,QAAQ,IAAI,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;QACzD,OAAO,kBAAkB,CAAC,QAAQ,CAAC,CAAC;IACtC,CAAC;IAED,MAAM,GAAG,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,SAAS,CAAC;IACnD,MAAM,IAAI,KAAK,CAAC,2BAA2B,GAAG,KAAK,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;AAC1E,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/extractor/index.ts"],"names":[],"mappings":"AACA,OAAO,IAAI,MAAM,YAAY,CAAC;AAE9B,OAAO,EAAE,iBAAiB,EAAE,kBAAkB,EAAE,MAAM,UAAU,CAAC;AACjE,OAAO,EAAE,kBAAkB,EAAE,MAAM,WAAW,CAAC;AAE/C,OAAO,EAAE,iBAAiB,EAAE,kBAAkB,EAAE,kBAAkB,EAAE,CAAC;AAGrE,MAAM,oBAAoB,GAAwB,IAAI,GAAG,CAAC;IACxD,mBAAmB;IACnB,qBAAqB;CACtB,CAAC,CAAC;AAEH,SAAS,UAAU,CAAC,QAAgB;IAClC,OAAO,QAAQ,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,oBAAoB,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;AAC5E,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,QAAgB;IACnD,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;IAEvC,IAAI,QAAQ,KAAK,iBAAiB,EAAE,CAAC;QACnC,OAAO,iBAAiB,CAAC,QAAQ,CAAC,CAAC;IACrC,CAAC;IAED,IAAI,OAAO,QAAQ,KAAK,QAAQ,IAAI,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;QACzD,OAAO,kBAAkB,CAAC,QAAQ,CAAC,CAAC;IACtC,CAAC;IAED,MAAM,GAAG,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,IAAI,SAAS,CAAC;IACnD,MAAM,IAAI,KAAK,CAAC,2BAA2B,GAAG,KAAK,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;AAC1E,CAAC"}
@@ -1,2 +1,13 @@
1
1
  import { Document } from "@langchain/core/documents";
2
+ export interface PdfMetadata {
3
+ title: string | null;
4
+ author: string | null;
5
+ subject: string | null;
6
+ keywords: string[];
7
+ doi: string | null;
8
+ creator: string | null;
9
+ creationDate: Date | null;
10
+ modDate: Date | null;
11
+ }
2
12
  export declare function extractPdfContent(pdfPath: string): Promise<Document[]>;
13
+ export declare function extractPdfMetadata(pdfPath: string): Promise<PdfMetadata>;
@@ -15,4 +15,69 @@ export async function extractPdfContent(pdfPath) {
15
15
  },
16
16
  }));
17
17
  }
18
+ export async function extractPdfMetadata(pdfPath) {
19
+ const data = await readFile(pdfPath);
20
+ const parser = new PDFParse({ data });
21
+ const result = await parser.getInfo();
22
+ await parser.destroy();
23
+ const info = result.info;
24
+ const custom = info?.["Custom"];
25
+ const title = nonEmptyStringOrNull(info?.["Title"]);
26
+ const author = nonEmptyStringOrNull(info?.["Author"]);
27
+ const subject = nonEmptyStringOrNull(info?.["Subject"]);
28
+ const creator = nonEmptyStringOrNull(info?.["Creator"]);
29
+ const creationDate = parsePdfDate(nonEmptyStringOrNull(info?.["CreationDate"]));
30
+ const modDate = parsePdfDate(nonEmptyStringOrNull(info?.["ModDate"]));
31
+ const rawKeywords = nonEmptyStringOrNull(info?.["Keywords"]);
32
+ const keywords = rawKeywords
33
+ ? rawKeywords
34
+ .split(/[,;]/)
35
+ .map((k) => k.trim())
36
+ .filter(Boolean)
37
+ : [];
38
+ // DOI can appear in Custom fields (case-insensitive lookup)
39
+ const doi = findCustomField(custom, "doi");
40
+ return { title, author, subject, keywords, doi, creator, creationDate, modDate };
41
+ }
42
+ function nonEmptyStringOrNull(value) {
43
+ if (typeof value === "string" && value.trim().length > 0) {
44
+ return value.trim();
45
+ }
46
+ return null;
47
+ }
48
+ /**
49
+ * Parse PDF date format: D:YYYYMMDDHHmmSSOHH'mm
50
+ * Examples: "D:20231215120000Z", "D:20231215", "D:20231215120000+08'00"
51
+ */
52
+ function parsePdfDate(value) {
53
+ if (!value)
54
+ return null;
55
+ const cleaned = value.replace(/^D:/, "");
56
+ // Extract components: YYYY[MM[DD[HH[mm[SS]]]]]
57
+ const match = /^(\d{4})(\d{2})?(\d{2})?(\d{2})?(\d{2})?(\d{2})?/.exec(cleaned);
58
+ if (!match)
59
+ return null;
60
+ const year = match[1];
61
+ const month = match[2] ?? "01";
62
+ const day = match[3] ?? "01";
63
+ const hour = match[4] ?? "00";
64
+ const min = match[5] ?? "00";
65
+ const sec = match[6] ?? "00";
66
+ const date = new Date(`${year}-${month}-${day}T${hour}:${min}:${sec}Z`);
67
+ return Number.isNaN(date.getTime()) ? null : date;
68
+ }
69
+ /**
70
+ * Case-insensitive lookup in the Custom fields dictionary.
71
+ */
72
+ function findCustomField(custom, key) {
73
+ if (!custom)
74
+ return null;
75
+ const lowerKey = key.toLowerCase();
76
+ for (const [k, v] of Object.entries(custom)) {
77
+ if (k.toLowerCase() === lowerKey) {
78
+ return nonEmptyStringOrNull(v);
79
+ }
80
+ }
81
+ return null;
82
+ }
18
83
  //# sourceMappingURL=pdf.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"pdf.js","sourceRoot":"","sources":["../../src/extractor/pdf.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAE5C,OAAO,EAAE,QAAQ,EAAE,MAAM,2BAA2B,CAAC;AACrD,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAErC,MAAM,CAAC,KAAK,UAAU,iBAAiB,CAAC,OAAe;IACrD,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,OAAO,CAAC,CAAC;IACrC,MAAM,MAAM,GAAG,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC;IACtC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,OAAO,EAAE,CAAC;IACtC,MAAM,MAAM,CAAC,OAAO,EAAE,CAAC;IAEvB,OAAO,MAAM,CAAC,KAAK,CAAC,GAAG,CACrB,CAAC,IAAI,EAAE,EAAE,CACP,IAAI,QAAQ,CAAC;QACX,WAAW,EAAE,IAAI,CAAC,IAAI;QACtB,QAAQ,EAAE;YACR,MAAM,EAAE,OAAO;YACf,GAAG,EAAE,EAAE,UAAU,EAAE,MAAM,CAAC,KAAK,EAAE;YACjC,GAAG,EAAE,EAAE,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE;SAC9B;KACF,CAAC,CACL,CAAC;AACJ,CAAC"}
1
+ {"version":3,"file":"pdf.js","sourceRoot":"","sources":["../../src/extractor/pdf.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAE5C,OAAO,EAAE,QAAQ,EAAE,MAAM,2BAA2B,CAAC;AACrD,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAarC,MAAM,CAAC,KAAK,UAAU,iBAAiB,CAAC,OAAe;IACrD,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,OAAO,CAAC,CAAC;IACrC,MAAM,MAAM,GAAG,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC;IACtC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,OAAO,EAAE,CAAC;IACtC,MAAM,MAAM,CAAC,OAAO,EAAE,CAAC;IAEvB,OAAO,MAAM,CAAC,KAAK,CAAC,GAAG,CACrB,CAAC,IAAI,EAAE,EAAE,CACP,IAAI,QAAQ,CAAC;QACX,WAAW,EAAE,IAAI,CAAC,IAAI;QACtB,QAAQ,EAAE;YACR,MAAM,EAAE,OAAO;YACf,GAAG,EAAE,EAAE,UAAU,EAAE,MAAM,CAAC,KAAK,EAAE;YACjC,GAAG,EAAE,EAAE,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE;SAC9B;KACF,CAAC,CACL,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,OAAe;IACtD,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,OAAO,CAAC,CAAC;IACrC,MAAM,MAAM,GAAG,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC;IACtC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,OAAO,EAAE,CAAC;IACtC,MAAM,MAAM,CAAC,OAAO,EAAE,CAAC;IAEvB,MAAM,IAAI,GAAG,MAAM,CAAC,IAA2C,CAAC;IAChE,MAAM,MAAM,GAAG,IAAI,EAAE,CAAC,QAAQ,CAAwC,CAAC;IAEvE,MAAM,KAAK,GAAG,oBAAoB,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC;IACpD,MAAM,MAAM,GAAG,oBAAoB,CAAC,IAAI,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC;IACtD,MAAM,OAAO,GAAG,oBAAoB,CAAC,IAAI,EAAE,CAAC,SAAS,CAAC,CAAC,CAAC;IACxD,MAAM,OAAO,GAAG,oBAAoB,CAAC,IAAI,EAAE,CAAC,SAAS,CAAC,CAAC,CAAC;IACxD,MAAM,YAAY,GAAG,YAAY,CAAC,oBAAoB,CAAC,IAAI,EAAE,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC;IAChF,MAAM,OAAO,GAAG,YAAY,CAAC,oBAAoB,CAAC,IAAI,EAAE,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;IAEtE,MAAM,WAAW,GAAG,oBAAoB,CAAC,IAAI,EAAE,CAAC,UAAU,CAAC,CAAC,CAAC;IAC7D,MAAM,QAAQ,GAAG,WAAW;QAC1B,CAAC,CAAC,WAAW;aACR,KAAK,CAAC,MAAM,CAAC;aACb,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;aACpB,MAAM,CAAC,OAAO,CAAC;QACpB,CAAC,CAAC,EAAE,CAAC;IAEP,4DAA4D;IAC5D,MAAM,GAAG,GAAG,eAAe,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;IAE3C,OAAO,EAAE,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,GAAG,EAAE,OAAO,EAAE,YAAY,EAAE,OAAO,EAAE,CAAC;AACnF,CAAC;AAED,SAAS,oBAAoB,CAAC,KAAc;IAC1C,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACzD,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;IACtB,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;GAGG;AACH,SAAS,YAAY,CAAC,KAAoB;IACxC,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAC;IACxB,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IAEzC,+CAA+C;IAC/C,MAAM,KAAK,GAAG,kDAAkD,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC/E,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAC;IAExB,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAE,CAAC;IACvB,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;IAC/B,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;IAC7B,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;IAC9B,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;IAC7B,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC;IAE7B,MAAM,IAAI,GAAG,IAAI,IAAI,CAAC,GAAG,IAAI,IAAI,KAAK,IAAI,GAAG,IAAI,IAAI,IAAI,GAAG,IAAI,GAAG,GAAG,CAAC,CAAC;IACxE,OAAO,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;AACpD,CAAC;AAED;;GAEG;AACH,SAAS,eAAe,CAAC,MAA2C,EAAE,GAAW;IAC/E,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IACzB,MAAM,QAAQ,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC;IACnC,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;QAC5C,IAAI,CAAC,CAAC,WAAW,EAAE,KAAK,QAAQ,EAAE,CAAC;YACjC,OAAO,oBAAoB,CAAC,CAAC,CAAC,CAAC;QACjC,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC"}
@@ -0,0 +1 @@
1
+ export {};