@s-hirano-ist/s-scripts 1.23.1 → 1.23.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/rag/ingest.js +28 -19
- package/dist/rag/ingest.js.map +1 -1
- package/package.json +3 -3
package/dist/rag/ingest.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { readFileSync } from "node:fs";
|
|
3
3
|
import { parseJsonArticle, parseMarkdown, } from "@s-hirano-ist/s-search/chunker";
|
|
4
|
-
import { ingestChunks } from "@s-hirano-ist/s-search/ingest";
|
|
4
|
+
import { ingestChunks, pruneOrphans } from "@s-hirano-ist/s-search/ingest";
|
|
5
5
|
import { ensureCollection, getCollectionStats, } from "@s-hirano-ist/s-search/qdrant-client";
|
|
6
6
|
import { glob } from "glob";
|
|
7
7
|
import { INGEST_CONFIG } from "./ingest-config.js";
|
|
@@ -40,17 +40,19 @@ function parseFile(file) {
|
|
|
40
40
|
/**
|
|
41
41
|
* CLI entry point: list files, parse into chunks, delegate to ingestChunks
|
|
42
42
|
*/
|
|
43
|
-
async function ingest(
|
|
43
|
+
async function ingest(flags) {
|
|
44
44
|
console.log("Starting ingest...\n");
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
45
|
+
if (!flags.dryRun) {
|
|
46
|
+
await ensureCollection();
|
|
47
|
+
const initialStats = await getCollectionStats();
|
|
48
|
+
console.log(`Initial points count: ${initialStats.pointsCount}\n`);
|
|
49
|
+
}
|
|
50
|
+
else {
|
|
51
|
+
console.log("Dry-run mode: no writes to Qdrant\n");
|
|
52
|
+
}
|
|
53
|
+
if (flags.force) {
|
|
51
54
|
console.log("Force mode enabled: skipping change detection\n");
|
|
52
55
|
}
|
|
53
|
-
// List all files
|
|
54
56
|
const files = await listFiles();
|
|
55
57
|
const articleCount = files.filter((f) => f.contentType === "articles").length;
|
|
56
58
|
const noteCount = files.filter((f) => f.contentType === "notes").length;
|
|
@@ -59,7 +61,6 @@ async function ingest(force) {
|
|
|
59
61
|
console.log(` - Articles: ${articleCount}`);
|
|
60
62
|
console.log(` - Notes: ${noteCount}`);
|
|
61
63
|
console.log(` - Books: ${bookCount}\n`);
|
|
62
|
-
// Parse all files into chunks
|
|
63
64
|
console.log("Parsing files...");
|
|
64
65
|
const allChunks = [];
|
|
65
66
|
for (const file of files) {
|
|
@@ -71,28 +72,36 @@ async function ingest(force) {
|
|
|
71
72
|
console.error(`Error parsing ${file.path}:`, error);
|
|
72
73
|
}
|
|
73
74
|
}
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
75
|
+
console.log(`Total chunks parsed: ${allChunks.length}`);
|
|
76
|
+
if (flags.dryRun) {
|
|
77
|
+
console.log("Dry-run complete. Skipping upsert and prune.");
|
|
78
|
+
return;
|
|
79
|
+
}
|
|
80
|
+
const result = await ingestChunks(allChunks, { force: flags.force });
|
|
81
|
+
const pruneResult = await pruneOrphans(allChunks.map((c) => c.chunk_id));
|
|
77
82
|
const finalStats = await getCollectionStats();
|
|
78
83
|
console.log(`\nFinal points count: ${finalStats.pointsCount}`);
|
|
79
|
-
console.log(`Ingest completed
|
|
84
|
+
console.log(`Ingest completed! (${result.changedChunks} changed, ${result.skippedChunks} skipped, ${pruneResult.deletedCount} pruned)`);
|
|
80
85
|
}
|
|
81
86
|
async function main() {
|
|
82
87
|
if (!process.env.QDRANT_URL) {
|
|
83
|
-
|
|
88
|
+
console.error("QDRANT_URL environment variable is required.");
|
|
89
|
+
process.exit(1);
|
|
84
90
|
}
|
|
85
|
-
const
|
|
91
|
+
const flags = {
|
|
92
|
+
force: process.argv.includes("--force"),
|
|
93
|
+
dryRun: process.argv.includes("--dry-run"),
|
|
94
|
+
};
|
|
86
95
|
try {
|
|
87
|
-
await ingest(
|
|
96
|
+
await ingest(flags);
|
|
88
97
|
}
|
|
89
98
|
catch (error) {
|
|
90
99
|
console.error("❌ エラーが発生しました:", error);
|
|
91
|
-
process.exit(
|
|
100
|
+
process.exit(2);
|
|
92
101
|
}
|
|
93
102
|
}
|
|
94
103
|
main().catch((error) => {
|
|
95
104
|
console.error(error);
|
|
96
|
-
process.exit(
|
|
105
|
+
process.exit(2);
|
|
97
106
|
});
|
|
98
107
|
//# sourceMappingURL=ingest.js.map
|
package/dist/rag/ingest.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ingest.js","sourceRoot":"","sources":["../../src/rag/ingest.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EACN,gBAAgB,EAChB,aAAa,GACb,MAAM,gCAAgC,CAAC;AAExC,OAAO,EAAE,YAAY,EAAE,MAAM,+BAA+B,CAAC;
|
|
1
|
+
{"version":3,"file":"ingest.js","sourceRoot":"","sources":["../../src/rag/ingest.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EACN,gBAAgB,EAChB,aAAa,GACb,MAAM,gCAAgC,CAAC;AAExC,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,+BAA+B,CAAC;AAC3E,OAAO,EACN,gBAAgB,EAChB,kBAAkB,GAClB,MAAM,sCAAsC,CAAC;AAC9C,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAC5B,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAQnD;;GAEG;AACH,KAAK,UAAU,SAAS;IACvB,MAAM,KAAK,GAAe,EAAE,CAAC;IAE7B,kBAAkB;IAClB,MAAM,YAAY,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IAC9D,KAAK,MAAM,IAAI,IAAI,YAAY,EAAE,CAAC;QACjC,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,UAAU,EAAE,CAAC,CAAC;IAC7D,CAAC;IAED,mBAAmB;IACnB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACxD,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;QAC9B,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,OAAO,EAAE,CAAC,CAAC;IAC9D,CAAC;IAED,mBAAmB;IACnB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACxD,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;QAC9B,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,OAAO,EAAE,CAAC,CAAC;IAC9D,CAAC;IAED,OAAO,KAAK,CAAC;AACd,CAAC;AAED;;GAEG;AACH,SAAS,SAAS,CAAC,IAAc;IAChC,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAEjD,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;QAC1B,OAAO,gBAAgB,CAAC,IAAI,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC7C,CAAC;IACD,OAAO,aAAa,CAAC,IAAI,CAAC,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;AAC5D,CAAC;AAOD;;GAEG;AACH,KAAK,UAAU,MAAM,CAAC,KAAkB;IACvC,OAAO,CAAC,GAAG,CAAC,sBAAsB,CAAC,CAAC;IAEpC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC;QACnB,MAAM,gBAAgB,EAAE,CAAC;QACzB,MAAM,YAAY,GAAG,MAAM,kBAAkB,EAAE,CAAC;QAChD,OAAO,CAAC,GAAG,CAAC,yBAAyB,YAAY,CAAC,WAAW,IAAI,CAAC,CAAC;IACpE,CAAC;SAAM,CAAC;QACP,OAAO,CAAC,GAAG,CAAC,qCAAqC,CAAC,CAAC;IACpD,CAAC;IAED,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC;QACjB,OAAO,CAAC,GAAG,CAAC,iDAAiD,CAAC,CAAC;IAChE,CAAC;IAED,MAAM,KAAK,GAAG,MAAM,SAAS,EAAE,CAAC;IAChC,MAAM,YAAY,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,KAAK,UAAU,CAAC,CAAC,MAAM,CAAC;IAC9E,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,KAAK,OAAO,CAAC,CAAC,MAAM,CAAC;IACxE,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,KAAK,OAAO,CAAC,CAAC,MAAM,CAAC;IAExE,OAAO,CAAC,GAAG,CAAC,SAAS,KAAK,CAAC,MAAM,mBAAmB,CAAC,CAAC;IACtD,OAAO,CAAC,GAAG,CAAC,iBAAiB,YAAY,EAAE,CAAC,CAAC;IAC7C,OAAO,CAAC,GAAG,CAAC,cAAc,SAAS,EAAE,CAAC,CAAC;IACvC,OAAO,CAAC,GAAG,CAAC,cAAc,SAAS,IAAI,CAAC,CAAC;IAEzC,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAAC,CAAC;IAChC,MAAM,SAAS,GAAoB,EAAE,CAAC;IAEtC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QAC1B,IAAI,CAAC;YACJ,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;YAC/B,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;QAC3B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YAChB,OAAO,CAAC,KAAK,CAAC,iBAAiB,IAAI,CAAC,IAAI,GAAG,EAAE,KAAK,CAAC,CAAC;QACrD,CAAC;IACF,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,wBAAwB,SAAS,CAAC,MAAM,EAAE,CAAC,CAAC;IAExD,IAAI,KAAK,CAAC,MAAM,EAAE,CAAC;QAClB,OAAO,CAAC,GAAG,CAAC,8CAA8C,CAAC,CAAC;QAC5D,OAAO;IACR,CAAC;IAED,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,SAAS,EAAE,EAAE,KAAK,EAAE,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC;IAErE,MAAM,WAAW,GAAG,MAAM,YAAY,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;IAEzE,MAAM,UAAU,GAAG,MAAM,kBAAkB,EAAE,CAAC;IAC9C,OAAO,CAAC,GAAG,CAAC,yBAAyB,UAAU,CAAC,WAAW,EAAE,CAAC,CAAC;IAC/D,OAAO,CAAC,GAAG,CACV,sBAAsB,MAAM,CAAC,aAAa,aAAa,MAAM,CAAC,aAAa,aAAa,WAAW,CAAC,YAAY,UAAU,CAC1H,CAAC;AACH,CAAC;AAED,KAAK,UAAU,IAAI;IAClB,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,UAAU,EAAE,CAAC;QAC7B,OAAO,CAAC,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAC9D,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACjB,CAAC;IAED,MAAM,KAAK,GAAgB;QAC1B,KAAK,EAAE,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC;QACvC,MAAM,EAAE,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC;KAC1C,CAAC;IAEF,IAAI,CAAC;QACJ,MAAM,MAAM,CAAC,KAAK,CAAC,CAAC;IACrB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAChB,OAAO,CAAC,KAAK,CAAC,eAAe,EAAE,KAAK,CAAC,CAAC;QACtC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACjB,CAAC;AACF,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;IACtB,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACrB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AACjB,CAAC,CAAC,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@s-hirano-ist/s-scripts",
|
|
3
|
-
"version": "1.23.
|
|
3
|
+
"version": "1.23.3",
|
|
4
4
|
"description": "CLI scripts for s-private data operations",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -58,10 +58,10 @@
|
|
|
58
58
|
"js-yaml": "4.1.1",
|
|
59
59
|
"sharp": "0.34.5",
|
|
60
60
|
"turndown": "7.2.4",
|
|
61
|
-
"@s-hirano-ist/s-core": "1.20.0",
|
|
62
61
|
"@s-hirano-ist/s-database": "1.19.0",
|
|
62
|
+
"@s-hirano-ist/s-core": "1.20.1",
|
|
63
63
|
"@s-hirano-ist/s-notification": "1.18.4",
|
|
64
|
-
"@s-hirano-ist/s-search": "1.18.
|
|
64
|
+
"@s-hirano-ist/s-search": "1.18.9",
|
|
65
65
|
"@s-hirano-ist/s-storage": "1.18.5"
|
|
66
66
|
},
|
|
67
67
|
"devDependencies": {
|