@djolex999/vir-cli 0.10.0 → 0.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/cli.js +73 -4
- package/dist/cli.js.map +1 -1
- package/dist/config.js +15 -0
- package/dist/config.js.map +1 -1
- package/dist/mcp/server.js +21 -4
- package/dist/mcp/server.js.map +1 -1
- package/dist/output/json.js +6 -0
- package/dist/output/json.js.map +1 -1
- package/dist/pipeline/embeddingSweep.js +31 -1
- package/dist/pipeline/embeddingSweep.js.map +1 -1
- package/dist/pipeline/pdfDistiller.js +141 -0
- package/dist/pipeline/pdfDistiller.js.map +1 -0
- package/dist/pipeline/pdfReader.js +76 -0
- package/dist/pipeline/pdfReader.js.map +1 -0
- package/dist/pipeline/run.js +229 -27
- package/dist/pipeline/run.js.map +1 -1
- package/dist/pipeline/writer.js +52 -1
- package/dist/pipeline/writer.js.map +1 -1
- package/dist/search/retriever.js +4 -3
- package/dist/search/retriever.js.map +1 -1
- package/dist/state/db.js +143 -0
- package/dist/state/db.js.map +1 -1
- package/package.json +2 -1
package/dist/output/json.js
CHANGED
|
@@ -15,6 +15,7 @@ const CATEGORY_DIRS = {
|
|
|
15
15
|
tools: "tool",
|
|
16
16
|
articles: "article",
|
|
17
17
|
topics: "topic",
|
|
18
|
+
pdfs: "pdf",
|
|
18
19
|
};
|
|
19
20
|
const WIRE_CATEGORIES = new Set([
|
|
20
21
|
"pattern",
|
|
@@ -23,6 +24,7 @@ const WIRE_CATEGORIES = new Set([
|
|
|
23
24
|
"tool",
|
|
24
25
|
"article",
|
|
25
26
|
"topic",
|
|
27
|
+
"pdf",
|
|
26
28
|
]);
|
|
27
29
|
// Minimal YAML-block parser, kebab-flat. Mirrors mcp/server.ts deliberately —
|
|
28
30
|
// the JSON contract is its own isolated surface and must not couple to the MCP
|
|
@@ -58,6 +60,10 @@ function categoryOf(fm, relPath, topicsDir) {
|
|
|
58
60
|
return "article";
|
|
59
61
|
if (fm.type === "topic")
|
|
60
62
|
return "topic";
|
|
63
|
+
// PDF sub-taxonomy (paper/reference/notes/other) collapses to the single
|
|
64
|
+
// "pdf" wire bucket, exactly like articles collapse to "article".
|
|
65
|
+
if (fm.type === "pdf")
|
|
66
|
+
return "pdf";
|
|
61
67
|
if (fm.category && WIRE_CATEGORIES.has(fm.category)) {
|
|
62
68
|
return fm.category;
|
|
63
69
|
}
|
package/dist/output/json.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"json.js","sourceRoot":"","sources":["../../src/output/json.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AACH,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;
|
|
1
|
+
{"version":3,"file":"json.js","sourceRoot":"","sources":["../../src/output/json.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AACH,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AA4CrC,+EAA+E;AAC/E,4EAA4E;AAC5E,+EAA+E;AAC/E,MAAM,aAAa,GAAqC;IACtD,QAAQ,EAAE,SAAS;IACnB,OAAO,EAAE,QAAQ;IACjB,SAAS,EAAE,UAAU;IACrB,KAAK,EAAE,MAAM;IACb,QAAQ,EAAE,SAAS;IACnB,MAAM,EAAE,OAAO;IACf,IAAI,EAAE,KAAK;CACZ,CAAC;AAEF,MAAM,eAAe,GAAG,IAAI,GAAG,CAAS;IACtC,SAAS;IACT,QAAQ;IACR,UAAU;IACV,MAAM;IACN,SAAS;IACT,OAAO;IACP,KAAK;CACN,CAAC,CAAC;AAEH,8EAA8E;AAC9E,+EAA+E;AAC/E,6EAA6E;AAC7E,SAAS,gBAAgB,CAAC,OAAe;IACvC,MAAM,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,uBAAuB,CAAC,CAAC;IACjD,MAAM,KAAK,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IACrB,IAAI,KAAK,KAAK,SAAS;QAAE,OAAO,EAAE,CAAC;IACnC,MAAM,GAAG,GAA2B,EAAE,CAAC;IACvC,KAAK,MAAM,IAAI,IAAI,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;QACrC,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QAC9B,IAAI,GAAG,KAAK,CAAC,CAAC;YAAE,SAAS;QACzB,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QACtC,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAC/B,IAAI,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QACrC,IACE,CAAC,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;YAC1C,CAAC,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,EAC1C,CAAC;YACD,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC9C,CAAC;QACD,GAAG,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC;IACjB,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,OAAO,CAAC,OAAe;IAC9B,MAAM,IAAI,GAAG,OAAO,CAAC,OAAO,CAAC,wBAAwB,EAAE,EAAE,CAAC,CAAC;IAC3D,OAAO,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;AACxD,CAAC;AAED,SAAS,UAAU,CACjB,EAA0B,EAC1B,OAAe,EACf,SAAiB;IAEjB,IAAI,EAAE,CAAC,IAAI,KAAK,SAAS;QAAE,OAAO,SAAS,CAAC;IAC5C,IAAI,EAAE,CAAC,IAAI,KAAK,OAAO;QAAE,OAAO,OAAO,CAAC;IACxC,yEAAyE;IACzE,kEAAkE;IAClE,IAAI,EAAE,CAAC,IAAI,KAAK,KAAK;QAAE,OAAO,KAAK,CAAC;IACpC,IAAI,EAAE,CAAC,QAAQ,IAAI,eAAe,CAAC,GAAG,CAAC,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC;QACpD,OAAO,EAAE,CAAC,QAA4B,CAAC;IACzC,CAAC;IACD,2EAA2E;IAC3E,8EAA8E;IAC9E,MAAM,GAAG,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IACxC,MAAM,GAAG,GAAG,GAAG,KAAK,SAAS,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC;IAC/C,OAAO,aAAa,CAAC,GAAG,CAAC,IAAI,SAAS,CAAC;AACzC,CAAC;AAED,0EAA0E;AAC1E,8EAA8E;AAC9E,6EAA6E;AAC7E,iFAAiF;AACjF,gFAAgF;AAChF,2EAA2E;AAC3E,6EAA6E;AAC7E,MAAM,UAAU,iBAAiB,CAC/B,IAAiB,EACjB,SAAiB,EACjB,SAAS,GAAG,QAAQ;IAEpB,MAAM,GAAG,GAAqB,EAAE,CAAC;IACjC,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;QACrB,MAAM,EAAE,GAAG,gBAAgB,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;QACvC,MAAM,OAAO,GAAG,QAAQ,CAAC,SAAS,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC;QAChD,MAAM,IAAI,GAAG,MAAM,CAAC,EAAE,CAAC,UAAU,CAAC,CAAC;QACnC,GAAG,CAAC,IAAI,CAAC;YACP,IAAI,EAAE,OAAO;YACb,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,QAAQ,EAAE,UAAU,CAAC,EAAE,EAAE,OAAO,EAAE,SAAS,CAAC;YAC5C,UAAU,EAAE,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAC5C,OAAO,EAAE,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC;YAC3B,OAAO,EAAE,EAAE,CAAC,OAAO,IAAI,EAAE,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI;YAChE,IAAI,EAAE,EAAE,CAAC,IAAI,IAAI,EAAE;SACpB,CAAC,CAAC;IACL,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,MAAM,UAAU,YAAY,CAC1B,IAAkB,EAClB,OAAe;IAEf,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;AAClC,CAAC;AAED,8EAA8E;AAC9E,+EAA+E;AAC/E,mEAAmE;AACnE,MAAM,UAAU,oBAAoB,CAClC,SAAkB,EAClB,UAAyB,EACzB,YAAoB,EACpB,MAAY,IAAI,IAAI,EAAE;IAEtB,IAAI,CAAC,SAAS,IAAI,CAAC,UAAU;QAAE,OAAO,MAAM,CAAC;IAC7C,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;IACtC,IAAI,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC;QAAE,OAAO,MAAM,CAAC;IACxC,MAAM,QAAQ,GAAG,CAAC,GAAG,YAAY,GAAG,SAAS,CAAC;IAC9C,OAAO,GAAG,CAAC,OAAO,EAAE,GAAG,MAAM,IAAI,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC;AAC7D,CAAC;AAgBD,MAAM,UAAU,iBAAiB,CAAC,CAAe;IAC/C,OAAO;QACL,MAAM,EAAE,oBAAoB,CAC1B,CAAC,CAAC,eAAe,EACjB,CAAC,CAAC,UAAU,EACZ,CAAC,CAAC,YAAY,EACd,CAAC,CAAC,GAAG,CACN;QACD,UAAU,EAAE,CAAC,CAAC,UAAU;QACxB,aAAa,EAAE,CAAC,CAAC,aAAa;QAC9B,QAAQ,EAAE,CAAC,CAAC,QAAQ;QACpB,SAAS,EAAE,CAAC,CAAC,SAAS;QACtB,WAAW,EAAE,CAAC,CAAC,WAAW;QAC1B,MAAM,EAAE,EAAE,SAAS,EAAE,CAAC,CAAC,eAAe,EAAE,KAAK,EAAE,CAAC,CAAC,WAAW,EAAE;QAC9D,OAAO,EAAE,CAAC,CAAC,OAAO;KACnB,CAAC;AACJ,CAAC"}
|
|
@@ -35,6 +35,17 @@ export function selectArticleEmbeddingTargets(rows) {
|
|
|
35
35
|
r.embedding === null &&
|
|
36
36
|
r.notePath !== null);
|
|
37
37
|
}
|
|
38
|
+
// PDF counterpart, mirroring db.listPdfEmbeddingTargets's SQL filter — the exact
|
|
39
|
+
// complement of getPdfEmbeddings(). Same gates as articles (skipped/error,
|
|
40
|
+
// content present, note_path set, NULL embedding); PDFs have no archived column.
|
|
41
|
+
export function selectPdfEmbeddingTargets(rows) {
|
|
42
|
+
return rows.filter((r) => r.skipped === 0 &&
|
|
43
|
+
r.error === null &&
|
|
44
|
+
r.content !== null &&
|
|
45
|
+
r.content !== "" &&
|
|
46
|
+
r.embedding === null &&
|
|
47
|
+
r.notePath !== null);
|
|
48
|
+
}
|
|
38
49
|
// Best-effort self-heal sweep. A write-time embedding miss (Ollama down when a
|
|
39
50
|
// note was distilled) leaves `embedding = NULL`, which makes the note invisible
|
|
40
51
|
// to the embedding-search path: it never enters getEmbeddings()'s candidate set,
|
|
@@ -52,12 +63,16 @@ export async function sweepEmbeddings(db) {
|
|
|
52
63
|
const targets = db.listEmbeddingTargets();
|
|
53
64
|
const topicTargets = db.listTopicEmbeddingTargets();
|
|
54
65
|
const articleTargets = db.listArticleEmbeddingTargets();
|
|
66
|
+
const pdfTargets = db.listPdfEmbeddingTargets();
|
|
55
67
|
if (!(await isOllamaAvailableCached())) {
|
|
56
68
|
return {
|
|
57
69
|
ran: false,
|
|
58
70
|
embedded: 0,
|
|
59
71
|
errors: 0,
|
|
60
|
-
pending: targets.length +
|
|
72
|
+
pending: targets.length +
|
|
73
|
+
topicTargets.length +
|
|
74
|
+
articleTargets.length +
|
|
75
|
+
pdfTargets.length,
|
|
61
76
|
};
|
|
62
77
|
}
|
|
63
78
|
let embedded = 0;
|
|
@@ -104,6 +119,21 @@ export async function sweepEmbeddings(db) {
|
|
|
104
119
|
db.storeArticleEmbedding(a.path, vec);
|
|
105
120
|
embedded += 1;
|
|
106
121
|
}
|
|
122
|
+
// PDFs heal the same way (their own table). A paper distilled while Ollama was
|
|
123
|
+
// down left `embedding` NULL; back-fill keyed by the source path (the PK) via
|
|
124
|
+
// storePdfEmbedding — shipped WITH the getPdfEmbeddings read path so the new
|
|
125
|
+
// pdfs table can't reopen the NULL-embedding blind spot (the 0.8.2/0.8.3 trap).
|
|
126
|
+
for (const p of pdfTargets) {
|
|
127
|
+
if (!p.content || p.content.trim().length === 0)
|
|
128
|
+
continue;
|
|
129
|
+
const vec = await embeddingForNote(p.content);
|
|
130
|
+
if (!vec) {
|
|
131
|
+
errors += 1;
|
|
132
|
+
continue;
|
|
133
|
+
}
|
|
134
|
+
db.storePdfEmbedding(p.path, vec);
|
|
135
|
+
embedded += 1;
|
|
136
|
+
}
|
|
107
137
|
return { ran: true, embedded, errors, pending: errors };
|
|
108
138
|
}
|
|
109
139
|
//# sourceMappingURL=embeddingSweep.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"embeddingSweep.js","sourceRoot":"","sources":["../../src/pipeline/embeddingSweep.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,
|
|
1
|
+
{"version":3,"file":"embeddingSweep.js","sourceRoot":"","sources":["../../src/pipeline/embeddingSweep.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,eAAe,GAMhB,MAAM,gBAAgB,CAAC;AACxB,OAAO,EACL,gBAAgB,EAChB,uBAAuB,GACxB,MAAM,uBAAuB,CAAC;AAE/B,+EAA+E;AAC/E,qEAAqE;AACrE,8EAA8E;AAC9E,8EAA8E;AAC9E,gFAAgF;AAChF,WAAW;AACX,MAAM,UAAU,sBAAsB,CACpC,IAAS;IAET,OAAO,IAAI,CAAC,MAAM,CAChB,CAAC,CAAC,EAAE,EAAE,CACJ,CAAC,CAAC,OAAO,KAAK,CAAC;QACf,CAAC,CAAC,KAAK,KAAK,IAAI;QAChB,CAAC,CAAC,OAAO,KAAK,IAAI;QAClB,CAAC,CAAC,OAAO,KAAK,EAAE;QAChB,CAAC,CAAC,SAAS,KAAK,IAAI;QACpB,CAAC,CAAC,KAAK,KAAK,IAAI;QAChB,CAAC,CAAC,QAAQ,KAAK,IAAI;QACnB,CAAC,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,KAAK,CAAC,CAC1B,CAAC;AACJ,CAAC;AAED,yDAAyD;AACzD,2EAA2E;AAC3E,8EAA8E;AAC9E,oCAAoC;AACpC,MAAM,UAAU,2BAA2B,CACzC,IAAS;IAET,OAAO,IAAI,CAAC,MAAM,CAChB,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,KAAK,IAAI,IAAI,CAAC,CAAC,OAAO,KAAK,IAAI,IAAI,CAAC,CAAC,OAAO,KAAK,EAAE,CACtE,CAAC;AACJ,CAAC;AAED,+EAA+E;AAC/E,8EAA8E;AAC9E,6EAA6E;AAC7E,oEAAoE;AACpE,MAAM,UAAU,6BAA6B,CAE3C,IAAS;IACT,OAAO,IAAI,CAAC,MAAM,CAChB,CAAC,CAAC,EAAE,EAAE,CACJ,CAAC,CAAC,OAAO,KAAK,CAAC;QACf,CAAC,CAAC,KAAK,KAAK,IAAI;QAChB,CAAC,CAAC,OAAO,KAAK,IAAI;QAClB,CAAC,CAAC,OAAO,KAAK,EAAE;QAChB,CAAC,CAAC,SAAS,KAAK,IAAI;QACpB,CAAC,CAAC,QAAQ,KAAK,IAAI,CACtB,CAAC;AACJ,CAAC;AAED,iFAAiF;AACjF,2EAA2E;AAC3E,iFAAiF;AACjF,MAAM,UAAU,yBAAyB,CACvC,IAAS;IAET,OAAO,IAAI,CAAC,MAAM,CAChB,CAAC,CAAC,EAAE,EAAE,CACJ,CAAC,CAAC,OAAO,KAAK,CAAC;QACf,CAAC,CAAC,KAAK,KAAK,IAAI;QAChB,CAAC,CAAC,OAAO,KAAK,IAAI;QAClB,CAAC,CAAC,OAAO,KAAK,EAAE;QAChB,CAAC,CAAC,SAAS,KAAK,IAAI;QACpB,CAAC,CAAC,QAAQ,KAAK,IAAI,CACtB,CAAC;AACJ,CAAC;AAaD,+EAA+E;AAC/E,gFAAgF;AAChF,iFAAiF;AACjF,uEAAuE;AACvE,2EAA2E;AAC3E,gFAAgF;AAChF,2EAA2E;AAC3E,wCAAwC;AACxC,EAAE;AACF,2EAA2E;AAC3E,2EAA2E;AAC3E,6EAA6E;AAC7E,oCAAoC;AACpC,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,EAAW;IAC/C,MAAM,OAAO,GAAG,EAAE,CAAC,oBAAoB,EAAE,CAAC;IAC1C,MAAM,YAAY,GAAG,EAAE,CAAC,yBAAyB,EAAE,CAAC;IACpD,MAAM,cAAc,GAAG,EAAE,CAAC,2BAA2B,EAAE,CAAC;IACxD,MAAM,UAAU,GAAG,EAAE,CAAC,uBAAuB,EAAE,CAAC;IAChD,IAAI,CAAC,CAAC,MAAM,uBAAuB,EAAE,CAAC,EAAE,CAAC;QACvC,OAAO;YACL,GAAG,EAAE,KAAK;YACV,QAAQ,EAAE,CAAC;YACX,MAAM,EAAE,CAAC;YACT,OAAO,EACL,OAAO,CAAC,MAAM;gBACd,YAAY,CAAC,MAAM;gBACnB,cAAc,CAAC,MAAM;gBACrB,UAAU,CAAC,MAAM;SACpB,CAAC;IACJ,CAAC;IACD,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;QACxB,IAAI,CAAC,CAAC,CAAC,OAAO,IAAI,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAC1D,MAAM,GAAG,GAAG,MAAM,gBAAgB,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;QAC9C,IAAI,CAAC,GAAG,EAAE,CAAC;YACT,MAAM,IAAI,CAAC,CAAC;YACZ,SAAS;QACX,CAAC;QACD,EAAE,CAAC,cAAc,CAAC,eAAe,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,GAAG,CAAC,CAAC;QAChD,QAAQ,IAAI,CAAC,CAAC;IAChB,CAAC;IACD,+EAA+E;IAC/E,0EAA0E;IAC1E,6EAA6E;IAC7E,2EAA2E;IAC3E,KAAK,MAAM,CAAC,IAAI,YAAY,EAAE,CAAC;QAC7B,IAAI,CAAC,CAAC,CAAC,OAAO,IAAI,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAC1D,MAAM,GAAG,GAAG,MAAM,gBAAgB,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;QAC9C,IAAI,CAAC,GAAG,EAAE,CAAC;YACT,MAAM,IAAI,CAAC,CAAC;YACZ,SAAS;QACX,CAAC;QACD,EAAE,CAAC,mBAAmB,CAAC,CAAC,CAAC,EAAE,EAAE,GAAG,CAAC,CAAC;QAClC,QAAQ,IAAI,CAAC,CAAC;IAChB,CAAC;IACD,8EAA8E;IAC9E,6EAA6E;IAC7E,+EAA+E;IAC/E,8EAA8E;IAC9E,qCAAqC;IACrC,KAAK,MAAM,CAAC,IAAI,cAAc,EAAE,CAAC;QAC/B,IAAI,CAAC,CAAC,CAAC,OAAO,IAAI,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAC1D,MAAM,GAAG,GAAG,MAAM,gBAAgB,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;QAC9C,IAAI,CAAC,GAAG,EAAE,CAAC;YACT,MAAM,IAAI,CAAC,CAAC;YACZ,SAAS;QACX,CAAC;QACD,EAAE,CAAC,qBAAqB,CAAC,CAAC,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QACtC,QAAQ,IAAI,CAAC,CAAC;IAChB,CAAC;IACD,+EAA+E;IAC/E,8EAA8E;IAC9E,6EAA6E;IAC7E,gFAAgF;IAChF,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;QAC3B,IAAI,CAAC,CAAC,CAAC,OAAO,IAAI,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;YAAE,SAAS;QAC1D,MAAM,GAAG,GAAG,MAAM,gBAAgB,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;QAC9C,IAAI,CAAC,GAAG,EAAE,CAAC;YACT,MAAM,IAAI,CAAC,CAAC;YACZ,SAAS;QACX,CAAC;QACD,EAAE,CAAC,iBAAiB,CAAC,CAAC,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QAClC,QAAQ,IAAI,CAAC,CAAC;IAChB,CAAC;IACD,OAAO,EAAE,GAAG,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;AAC1D,CAAC"}
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import { callLLM, maybeAnthropicClient, normalizeModelName, withRateLimitRetry, } from "./distiller.js";
|
|
4
|
+
import { scrub } from "./scrubber.js";
|
|
5
|
+
export const PDF_CATEGORIES = [
|
|
6
|
+
"paper",
|
|
7
|
+
"reference",
|
|
8
|
+
"notes",
|
|
9
|
+
"other",
|
|
10
|
+
];
|
|
11
|
+
// Neutral fallback when the model returns an unrecognized category: "other"
|
|
12
|
+
// claims the least about the document's intent.
|
|
13
|
+
const DEFAULT_PDF_CATEGORY = "other";
|
|
14
|
+
export const PDFS_SUBDIR = "pdfs";
|
|
15
|
+
// Papers are long → bound the distill input so a single paper can't blow up
|
|
16
|
+
// token cost (hybrid routing would otherwise push it to Sonnet on size).
|
|
17
|
+
// Mirrors articleDistiller's 24k-char bound.
|
|
18
|
+
const CLASSIFY_EXCERPT_CHARS = 3000;
|
|
19
|
+
const MAX_BODY_CHARS = 24_000;
|
|
20
|
+
export async function distillPdf(parsed, cfg) {
|
|
21
|
+
const client = maybeAnthropicClient(cfg);
|
|
22
|
+
const classifyModel = normalizeModelName(cfg.models.classify, cfg.provider);
|
|
23
|
+
const distillModel = normalizeModelName(cfg.models.distill, cfg.provider);
|
|
24
|
+
// Extracted text still flows to a provider — scrub keys/paths/emails.
|
|
25
|
+
const body = scrub(parsed.text).slice(0, MAX_BODY_CHARS);
|
|
26
|
+
if (body.trim().length === 0)
|
|
27
|
+
return null;
|
|
28
|
+
const clsText = await withRateLimitRetry(() => callLLM(cfg, client, {
|
|
29
|
+
prompt: classifyPrompt(parsed, body),
|
|
30
|
+
model: classifyModel,
|
|
31
|
+
maxTokens: 200,
|
|
32
|
+
cost: { stage: "pdf-classify" },
|
|
33
|
+
}));
|
|
34
|
+
const classification = parsePdfClassification(clsText);
|
|
35
|
+
const markdown = (await withRateLimitRetry(() => callLLM(cfg, client, {
|
|
36
|
+
prompt: distillPrompt(parsed, body),
|
|
37
|
+
model: distillModel,
|
|
38
|
+
maxTokens: 1500,
|
|
39
|
+
cost: { stage: "pdf-distill" },
|
|
40
|
+
}))).trim();
|
|
41
|
+
if (markdown.length === 0)
|
|
42
|
+
return null;
|
|
43
|
+
return { classification, markdown };
|
|
44
|
+
}
|
|
45
|
+
function classifyPrompt(parsed, body) {
|
|
46
|
+
return `Classify this PDF document into exactly one category. Output JSON only:
|
|
47
|
+
{ "category": "paper" | "reference" | "notes" | "other",
|
|
48
|
+
"confidence": number (0..1) }
|
|
49
|
+
|
|
50
|
+
paper = a research/academic paper (abstract, methods, results, contributions)
|
|
51
|
+
reference = documentation, a spec, manual, datasheet, or material to look up later
|
|
52
|
+
notes = lecture notes, slides, a course handout, or personal study notes
|
|
53
|
+
other = anything that doesn't fit the above
|
|
54
|
+
|
|
55
|
+
Title: ${parsed.title}
|
|
56
|
+
|
|
57
|
+
${body.slice(0, CLASSIFY_EXCERPT_CHARS)}`;
|
|
58
|
+
}
|
|
59
|
+
function distillPrompt(parsed, body) {
|
|
60
|
+
return `Distill this PDF document into a durable knowledge note. Output markdown
|
|
61
|
+
only — no preamble, start with '## Summary'. Use these sections:
|
|
62
|
+
|
|
63
|
+
- ## Summary (2-3 sentences, in your own words)
|
|
64
|
+
- ## Key Points (bullet list: the main claims, findings, methods, or arguments)
|
|
65
|
+
- ## Methods & Findings (for a paper: the approach and what it concluded; omit
|
|
66
|
+
this section entirely if the document isn't a study)
|
|
67
|
+
- ## Related (plain-English topics this connects to, one per bullet — these get
|
|
68
|
+
turned into wikilinks automatically, so write them as short noun phrases)
|
|
69
|
+
|
|
70
|
+
COPYRIGHT — strict: this is someone else's IP. Never reproduce more than 15
|
|
71
|
+
consecutive words verbatim from the source. Paraphrase everything in your own
|
|
72
|
+
words. Do not reproduce figures, tables, equations, or full passages. Summarize
|
|
73
|
+
and cite by context, never quote at length.
|
|
74
|
+
|
|
75
|
+
Title: ${parsed.title}
|
|
76
|
+
Source: ${parsed.filePath}
|
|
77
|
+
Document:
|
|
78
|
+
${body}`;
|
|
79
|
+
}
|
|
80
|
+
export function parsePdfClassification(text) {
|
|
81
|
+
const match = text.match(/\{[\s\S]*\}/);
|
|
82
|
+
if (!match)
|
|
83
|
+
return { category: DEFAULT_PDF_CATEGORY, confidence: 0 };
|
|
84
|
+
let obj;
|
|
85
|
+
try {
|
|
86
|
+
obj = JSON.parse(match[0]);
|
|
87
|
+
}
|
|
88
|
+
catch {
|
|
89
|
+
return { category: DEFAULT_PDF_CATEGORY, confidence: 0 };
|
|
90
|
+
}
|
|
91
|
+
const rawCat = typeof obj.category === "string" ? obj.category : "";
|
|
92
|
+
const category = PDF_CATEGORIES.includes(rawCat)
|
|
93
|
+
? rawCat
|
|
94
|
+
: DEFAULT_PDF_CATEGORY;
|
|
95
|
+
const confRaw = typeof obj.confidence === "number"
|
|
96
|
+
? obj.confidence
|
|
97
|
+
: Number(obj.confidence ?? 0);
|
|
98
|
+
const confidence = Number.isFinite(confRaw)
|
|
99
|
+
? Math.max(0, Math.min(1, confRaw))
|
|
100
|
+
: 0;
|
|
101
|
+
return { category, confidence };
|
|
102
|
+
}
|
|
103
|
+
// Stable across content edits: a re-extracted PDF (new hash/text) keeps the same
|
|
104
|
+
// slug as long as its source path is unchanged, so a re-distill overwrites the
|
|
105
|
+
// same note instead of orphaning the old one (mirrors articleSlug, keyed off the
|
|
106
|
+
// source path rather than the content hash).
|
|
107
|
+
export function pdfSlug(parsed) {
|
|
108
|
+
const base = kebab(parsed.title).slice(0, 60);
|
|
109
|
+
const suffix = createHash("sha256")
|
|
110
|
+
.update(parsed.filePath)
|
|
111
|
+
.digest("hex")
|
|
112
|
+
.slice(0, 8);
|
|
113
|
+
return base.length > 0 ? `${base}-${suffix}` : `pdf-${suffix}`;
|
|
114
|
+
}
|
|
115
|
+
export function pdfRelPath(parsed) {
|
|
116
|
+
return join(PDFS_SUBDIR, `${pdfSlug(parsed)}.md`);
|
|
117
|
+
}
|
|
118
|
+
export function buildPdfFrontmatter(parsed, distilled) {
|
|
119
|
+
const lines = ["---", "type: pdf"];
|
|
120
|
+
lines.push(`category: ${distilled.classification.category}`);
|
|
121
|
+
lines.push(`source_path: ${parsed.filePath}`);
|
|
122
|
+
lines.push(`source_title: "${escapeYaml(parsed.title)}"`);
|
|
123
|
+
lines.push(`pages: ${parsed.pageCount}`);
|
|
124
|
+
lines.push(`distilled_at: ${new Date().toISOString()}`);
|
|
125
|
+
lines.push(`confidence: ${distilled.classification.confidence}`);
|
|
126
|
+
lines.push(`hash: ${parsed.hash}`);
|
|
127
|
+
lines.push("---", "");
|
|
128
|
+
return lines.join("\n");
|
|
129
|
+
}
|
|
130
|
+
function escapeYaml(s) {
|
|
131
|
+
return s.replace(/"/g, '\\"');
|
|
132
|
+
}
|
|
133
|
+
// Local copy of writer.kebab() — writer.ts depends on this module, so importing
|
|
134
|
+
// from it would create a cycle (mirrors articleDistiller.kebab).
|
|
135
|
+
function kebab(s) {
|
|
136
|
+
return s
|
|
137
|
+
.toLowerCase()
|
|
138
|
+
.replace(/[^a-z0-9]+/g, "-")
|
|
139
|
+
.replace(/^-+|-+$/g, "");
|
|
140
|
+
}
|
|
141
|
+
//# sourceMappingURL=pdfDistiller.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdfDistiller.js","sourceRoot":"","sources":["../../src/pipeline/pdfDistiller.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAEjC,OAAO,EACL,OAAO,EACP,oBAAoB,EACpB,kBAAkB,EAClB,kBAAkB,GACnB,MAAM,gBAAgB,CAAC;AAExB,OAAO,EAAE,KAAK,EAAE,MAAM,eAAe,CAAC;AAMtC,MAAM,CAAC,MAAM,cAAc,GAAkB;IAC3C,OAAO;IACP,WAAW;IACX,OAAO;IACP,OAAO;CACR,CAAC;AAEF,4EAA4E;AAC5E,gDAAgD;AAChD,MAAM,oBAAoB,GAAgB,OAAO,CAAC;AAElD,MAAM,CAAC,MAAM,WAAW,GAAG,MAAM,CAAC;AAElC,4EAA4E;AAC5E,yEAAyE;AACzE,6CAA6C;AAC7C,MAAM,sBAAsB,GAAG,IAAI,CAAC;AACpC,MAAM,cAAc,GAAG,MAAM,CAAC;AAY9B,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,MAAiB,EACjB,GAAW;IAEX,MAAM,MAAM,GAAG,oBAAoB,CAAC,GAAG,CAAC,CAAC;IACzC,MAAM,aAAa,GAAG,kBAAkB,CAAC,GAAG,CAAC,MAAM,CAAC,QAAQ,EAAE,GAAG,CAAC,QAAQ,CAAC,CAAC;IAC5E,MAAM,YAAY,GAAG,kBAAkB,CAAC,GAAG,CAAC,MAAM,CAAC,OAAO,EAAE,GAAG,CAAC,QAAQ,CAAC,CAAC;IAE1E,sEAAsE;IACtE,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,cAAc,CAAC,CAAC;IACzD,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAE1C,MAAM,OAAO,GAAG,MAAM,kBAAkB,CAAC,GAAG,EAAE,CAC5C,OAAO,CAAC,GAAG,EAAE,MAAM,EAAE;QACnB,MAAM,EAAE,cAAc,CAAC,MAAM,EAAE,IAAI,CAAC;QACpC,KAAK,EAAE,aAAa;QACpB,SAAS,EAAE,GAAG;QACd,IAAI,EAAE,EAAE,KAAK,EAAE,cAAc,EAAE;KAChC,CAAC,CACH,CAAC;IACF,MAAM,cAAc,GAAG,sBAAsB,CAAC,OAAO,CAAC,CAAC;IAEvD,MAAM,QAAQ,GAAG,CACf,MAAM,kBAAkB,CAAC,GAAG,EAAE,CAC5B,OAAO,CAAC,GAAG,EAAE,MAAM,EAAE;QACnB,MAAM,EAAE,aAAa,CAAC,MAAM,EAAE,IAAI,CAAC;QACnC,KAAK,EAAE,YAAY;QACnB,SAAS,EAAE,IAAI;QACf,IAAI,EAAE,EAAE,KAAK,EAAE,aAAa,EAAE;KAC/B,CAAC,CACH,CACF,CAAC,IAAI,EAAE,CAAC;IACT,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAEvC,OAAO,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC;AACtC,CAAC;AAED,SAAS,cAAc,CAAC,MAAiB,EAAE,IAAY;IACrD,OAAO;;;;;;;;;SASA,MAAM,CAAC,KAAK;;EAEnB,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,sBAAsB,CAAC,EAAE,CAAC;AAC1C,CAAC;AAED,SAAS,aAAa,CAAC,MAAiB,EAAE,IAAY;IACpD,OAAO;;;;;;;;;;;;;;;SAeA,MAAM,CAAC,KAAK;UACX,MAAM,CAAC,QAAQ;;EAEvB,IAAI,EAAE,CAAC;AACT,CAAC;AAED,MAAM,UAAU,sBAAsB,CAAC,IAAY;IACjD,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;IACxC,IAAI,CAAC,KAAK;QAAE,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,UAAU,EAAE,CAAC,EAAE,CAAC;IACrE,IAAI,GAA4B,CAAC;IACjC,IAAI,CAAC;QACH,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAA4B,CAAC;IACxD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,QAAQ,EAAE,oBAAoB,EAAE,UAAU,EAAE,CAAC,EAAE,CAAC;IAC3D,CAAC;IACD,MAAM,MAAM,GAAG,OAAO,GAAG,CAAC,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;IACpE,MAAM,QAAQ,GAAI,cAA2B,CAAC,QAAQ,CAAC,MAAM,CAAC;QAC5D,CAAC,CAAE,MAAsB;QACzB,CAAC,CAAC,oBAAoB,CAAC;IACzB,MAAM,OAAO,GACX,OAAO,GAAG,CAAC,UAAU,KAAK,QAAQ;QAChC,CAAC,CAAC,GAAG,CAAC,UAAU;QAChB,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,UAAU,IAAI,CAAC,CAAC,CAAC;IAClC,MAAM,UAAU,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC;QACzC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;QACnC,CAAC,CAAC,CAAC,CAAC;IACN,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE,CAAC;AAClC,CAAC;AAED,iFAAiF;AACjF,+EAA+E;AAC/E,iFAAiF;AACjF,6CAA6C;AAC7C,MAAM,UAAU,OAAO,CAAC,MAAiB;IACvC,MAAM,IAAI,GAAG,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;IAC9C,MAAM,MAAM,GAAG,UAAU,CAAC,QAAQ,CAAC;SAChC,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC;SACvB,MAAM,CAAC,KAAK,CAAC;SACb,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IACf,OAAO,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,IAAI,IAAI,MAAM,EAAE,CAAC,CAAC,CAAC,OAAO,MAAM,EAAE,CAAC;AACjE,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,MAAiB;IAC1C,OAAO,IAAI,CAAC,WAAW,EAAE,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AACpD,CAAC;AAED,MAAM,UAAU,mBAAmB,CACjC,MAAiB,EACjB,SAAuB;IAEvB,MAAM,KAAK,GAAG,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;IACnC,KAAK,CAAC,IAAI,CAAC,aAAa,SAAS,CAAC,cAAc,CAAC,QAAQ,EAAE,CAAC,CAAC;IAC7D,KAAK,CAAC,IAAI,CAAC,gBAAgB,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC;IAC9C,KAAK,CAAC,IAAI,CAAC,kBAAkB,UAAU,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IAC1D,KAAK,CAAC,IAAI,CAAC,UAAU,MAAM,CAAC,SAAS,EAAE,CAAC,CAAC;IACzC,KAAK,CAAC,IAAI,CAAC,iBAAiB,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;IACxD,KAAK,CAAC,IAAI,CAAC,eAAe,SAAS,CAAC,cAAc,CAAC,UAAU,EAAE,CAAC,CAAC;IACjE,KAAK,CAAC,IAAI,CAAC,SAAS,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;IACnC,KAAK,CAAC,IAAI,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;IACtB,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,SAAS,UAAU,CAAC,CAAS;IAC3B,OAAO,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;AAChC,CAAC;AAED,gFAAgF;AAChF,iEAAiE;AACjE,SAAS,KAAK,CAAC,CAAS;IACtB,OAAO,CAAC;SACL,WAAW,EAAE;SACb,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC;SAC3B,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;AAC7B,CAAC"}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import { readFileSync, readdirSync, statSync } from "node:fs";
|
|
3
|
+
import { basename, join } from "node:path";
|
|
4
|
+
import { extractText, getDocumentProxy } from "unpdf";
|
|
5
|
+
export async function parsePdf(filePath) {
|
|
6
|
+
const bytes = readFileSync(filePath);
|
|
7
|
+
const hash = createHash("sha256").update(bytes).digest("hex");
|
|
8
|
+
// unpdf bundles pdf.js; getDocumentProxy throws on corrupt/non-PDF bytes,
|
|
9
|
+
// which the run layer catches per-file and records (never crashes the run).
|
|
10
|
+
const proxy = await getDocumentProxy(new Uint8Array(bytes));
|
|
11
|
+
const { totalPages, text } = await extractText(proxy, { mergePages: true });
|
|
12
|
+
const metaTitle = await readMetadataTitle(proxy);
|
|
13
|
+
return {
|
|
14
|
+
filePath,
|
|
15
|
+
hash,
|
|
16
|
+
title: pdfTitle(metaTitle, filePath),
|
|
17
|
+
text: Array.isArray(text) ? text.join("\n") : text,
|
|
18
|
+
pageCount: totalPages,
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
export function scanPdfs(dir) {
|
|
22
|
+
const files = [];
|
|
23
|
+
walk(dir, files);
|
|
24
|
+
const out = [];
|
|
25
|
+
for (const f of files) {
|
|
26
|
+
try {
|
|
27
|
+
const hash = createHash("sha256").update(readFileSync(f)).digest("hex");
|
|
28
|
+
out.push({ filePath: f, hash });
|
|
29
|
+
}
|
|
30
|
+
catch {
|
|
31
|
+
// unreadable / mid-write file — skip, never fail the whole scan
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
return out;
|
|
35
|
+
}
|
|
36
|
+
// Pure: metadata Title wins when non-blank; otherwise the filename (sans .pdf).
|
|
37
|
+
export function pdfTitle(metaTitle, filePath) {
|
|
38
|
+
const t = (metaTitle ?? "").trim();
|
|
39
|
+
if (t.length > 0)
|
|
40
|
+
return t;
|
|
41
|
+
return basename(filePath).replace(/\.pdf$/i, "");
|
|
42
|
+
}
|
|
43
|
+
async function readMetadataTitle(proxy) {
|
|
44
|
+
try {
|
|
45
|
+
const meta = (await proxy.getMetadata());
|
|
46
|
+
const title = meta?.info?.Title;
|
|
47
|
+
return typeof title === "string" ? title : undefined;
|
|
48
|
+
}
|
|
49
|
+
catch {
|
|
50
|
+
return undefined;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
function walk(dir, acc) {
|
|
54
|
+
let entries;
|
|
55
|
+
try {
|
|
56
|
+
entries = readdirSync(dir);
|
|
57
|
+
}
|
|
58
|
+
catch {
|
|
59
|
+
return;
|
|
60
|
+
}
|
|
61
|
+
for (const name of entries) {
|
|
62
|
+
const full = join(dir, name);
|
|
63
|
+
let st;
|
|
64
|
+
try {
|
|
65
|
+
st = statSync(full);
|
|
66
|
+
}
|
|
67
|
+
catch {
|
|
68
|
+
continue;
|
|
69
|
+
}
|
|
70
|
+
if (st.isDirectory())
|
|
71
|
+
walk(full, acc);
|
|
72
|
+
else if (st.isFile() && name.toLowerCase().endsWith(".pdf"))
|
|
73
|
+
acc.push(full);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
//# sourceMappingURL=pdfReader.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdfReader.js","sourceRoot":"","sources":["../../src/pipeline/pdfReader.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,QAAQ,EAAE,MAAM,SAAS,CAAC;AAC9D,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAC3C,OAAO,EAAE,WAAW,EAAE,gBAAgB,EAAE,MAAM,OAAO,CAAC;AAwBtD,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAC,QAAgB;IAC7C,MAAM,KAAK,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IACrC,MAAM,IAAI,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAE9D,0EAA0E;IAC1E,4EAA4E;IAC5E,MAAM,KAAK,GAAG,MAAM,gBAAgB,CAAC,IAAI,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC;IAC5D,MAAM,EAAE,UAAU,EAAE,IAAI,EAAE,GAAG,MAAM,WAAW,CAAC,KAAK,EAAE,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC;IAC5E,MAAM,SAAS,GAAG,MAAM,iBAAiB,CAAC,KAAK,CAAC,CAAC;IAEjD,OAAO;QACL,QAAQ;QACR,IAAI;QACJ,KAAK,EAAE,QAAQ,CAAC,SAAS,EAAE,QAAQ,CAAC;QACpC,IAAI,EAAE,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI;QAClD,SAAS,EAAE,UAAU;KACtB,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,QAAQ,CAAC,GAAW;IAClC,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,IAAI,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;IACjB,MAAM,GAAG,GAAgB,EAAE,CAAC;IAC5B,KAAK,MAAM,CAAC,IAAI,KAAK,EAAE,CAAC;QACtB,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACxE,GAAG,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC;QAClC,CAAC;QAAC,MAAM,CAAC;YACP,gEAAgE;QAClE,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,gFAAgF;AAChF,MAAM,UAAU,QAAQ,CAAC,SAA6B,EAAE,QAAgB;IACtE,MAAM,CAAC,GAAG,CAAC,SAAS,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IACnC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,CAAC,CAAC;IAC3B,OAAO,QAAQ,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;AACnD,CAAC;AAED,KAAK,UAAU,iBAAiB,CAC9B,KAAmD;IAEnD,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,CAAC,MAAM,KAAK,CAAC,WAAW,EAAE,CAEtC,CAAC;QACF,MAAM,KAAK,GAAG,IAAI,EAAE,IAAI,EAAE,KAAK,CAAC;QAChC,OAAO,OAAO,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAAC;IACvD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,SAAS,CAAC;IACnB,CAAC;AACH,CAAC;AAED,SAAS,IAAI,CAAC,GAAW,EAAE,GAAa;IACtC,IAAI,OAAiB,CAAC;IACtB,IAAI,CAAC;QACH,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;IAC7B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO;IACT,CAAC;IACD,KAAK,MAAM,IAAI,IAAI,OAAO,EAAE,CAAC;QAC3B,MAAM,IAAI,GAAG,IAAI,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC;QAC7B,IAAI,EAAE,CAAC;QACP,IAAI,CAAC;YACH,EAAE,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;QACtB,CAAC;QAAC,MAAM,CAAC;YACP,SAAS;QACX,CAAC;QACD,IAAI,EAAE,CAAC,WAAW,EAAE;YAAE,IAAI,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;aACjC,IAAI,EAAE,CAAC,MAAM,EAAE,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC;YAAE,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC9E,CAAC;AACH,CAAC"}
|