@s-hirano-ist/s-scripts 1.12.2 → 1.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cleanup-minio-images.d.ts +3 -0
- package/dist/cleanup-minio-images.d.ts.map +1 -0
- package/dist/cleanup-minio-images.js +111 -0
- package/dist/cleanup-minio-images.js.map +1 -0
- package/dist/fetch-articles.js +1 -3
- package/dist/fetch-articles.js.map +1 -1
- package/dist/fetch-books.js +1 -3
- package/dist/fetch-books.js.map +1 -1
- package/dist/fetch-images.js +1 -3
- package/dist/fetch-images.js.map +1 -1
- package/dist/fetch-notes.js +1 -3
- package/dist/fetch-notes.js.map +1 -1
- package/dist/infrastructures/articles-command-repository.d.ts +11 -1
- package/dist/infrastructures/articles-command-repository.d.ts.map +1 -1
- package/dist/infrastructures/articles-command-repository.js.map +1 -1
- package/dist/infrastructures/books-command-repository.d.ts +11 -1
- package/dist/infrastructures/books-command-repository.d.ts.map +1 -1
- package/dist/infrastructures/books-command-repository.js.map +1 -1
- package/dist/infrastructures/images-command-repository.d.ts +11 -1
- package/dist/infrastructures/images-command-repository.d.ts.map +1 -1
- package/dist/infrastructures/images-command-repository.js.map +1 -1
- package/dist/infrastructures/notes-command-repository.d.ts +11 -1
- package/dist/infrastructures/notes-command-repository.d.ts.map +1 -1
- package/dist/infrastructures/notes-command-repository.js.map +1 -1
- package/dist/ingest-articles.d.ts +3 -0
- package/dist/ingest-articles.d.ts.map +1 -0
- package/dist/ingest-articles.js +230 -0
- package/dist/ingest-articles.js.map +1 -0
- package/dist/ingest-books.d.ts +3 -0
- package/dist/ingest-books.d.ts.map +1 -0
- package/dist/ingest-books.js +167 -0
- package/dist/ingest-books.js.map +1 -0
- package/dist/ingest-images.d.ts +3 -0
- package/dist/ingest-images.d.ts.map +1 -0
- package/dist/ingest-images.js +196 -0
- package/dist/ingest-images.js.map +1 -0
- package/dist/ingest-notes.d.ts +3 -0
- package/dist/ingest-notes.d.ts.map +1 -0
- package/dist/ingest-notes.js +187 -0
- package/dist/ingest-notes.js.map +1 -0
- package/dist/rag/ingest-config.d.ts +8 -0
- package/dist/rag/ingest-config.d.ts.map +1 -0
- package/dist/rag/ingest-config.js +8 -0
- package/dist/rag/ingest-config.js.map +1 -0
- package/dist/rag/ingest.d.ts +1 -1
- package/dist/rag/ingest.d.ts.map +1 -1
- package/dist/rag/ingest.js +51 -82
- package/dist/rag/ingest.js.map +1 -1
- package/dist/rag/search.d.ts +1 -1
- package/dist/rag/search.d.ts.map +1 -1
- package/dist/rag/search.js +57 -69
- package/dist/rag/search.js.map +1 -1
- package/dist/reset-articles.js +1 -3
- package/dist/reset-articles.js.map +1 -1
- package/dist/reset-books.js +1 -3
- package/dist/reset-books.js.map +1 -1
- package/dist/reset-images.js +1 -3
- package/dist/reset-images.js.map +1 -1
- package/dist/reset-notes.js +1 -3
- package/dist/reset-notes.js.map +1 -1
- package/dist/revert-articles.js +1 -3
- package/dist/revert-articles.js.map +1 -1
- package/dist/revert-books.js +1 -3
- package/dist/revert-books.js.map +1 -1
- package/dist/revert-images.js +1 -3
- package/dist/revert-images.js.map +1 -1
- package/dist/revert-notes.js +1 -3
- package/dist/revert-notes.js.map +1 -1
- package/dist/update-raw-articles.js +40 -26
- package/dist/update-raw-articles.js.map +1 -1
- package/package.json +20 -8
- package/dist/rag/chunker.d.ts +0 -10
- package/dist/rag/chunker.d.ts.map +0 -1
- package/dist/rag/chunker.js +0 -188
- package/dist/rag/chunker.js.map +0 -1
- package/dist/rag/config.d.ts +0 -44
- package/dist/rag/config.d.ts.map +0 -1
- package/dist/rag/config.js +0 -34
- package/dist/rag/config.js.map +0 -1
- package/dist/rag/embedding.d.ts +0 -15
- package/dist/rag/embedding.d.ts.map +0 -1
- package/dist/rag/embedding.js +0 -61
- package/dist/rag/embedding.js.map +0 -1
- package/dist/rag/qdrant-client.d.ts +0 -40
- package/dist/rag/qdrant-client.d.ts.map +0 -1
- package/dist/rag/qdrant-client.js +0 -160
- package/dist/rag/qdrant-client.js.map +0 -1
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ingest-notes.d.ts","sourceRoot":"","sources":["../src/ingest-notes.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { readFile } from "node:fs/promises";
|
|
3
|
+
import { basename } from "node:path";
|
|
4
|
+
import { makeExportedStatus, makeId, makeUserId, } from "@s-hirano-ist/s-core/shared-kernel/entities/common-entity";
|
|
5
|
+
import { createPushoverService } from "@s-hirano-ist/s-notification";
|
|
6
|
+
import { glob } from "glob";
|
|
7
|
+
const SCRIPT_NAME = "ingest-notes";
|
|
8
|
+
function parseFrontmatter(content) {
|
|
9
|
+
if (!content.startsWith("---")) {
|
|
10
|
+
return { body: content };
|
|
11
|
+
}
|
|
12
|
+
const endIndex = content.indexOf("---", 3);
|
|
13
|
+
if (endIndex === -1) {
|
|
14
|
+
return { body: content };
|
|
15
|
+
}
|
|
16
|
+
const frontmatter = content.slice(3, endIndex).trim();
|
|
17
|
+
const body = content.slice(endIndex + 3).trim();
|
|
18
|
+
let heading;
|
|
19
|
+
let draft = false;
|
|
20
|
+
for (const line of frontmatter.split("\n")) {
|
|
21
|
+
const [key, ...rest] = line.split(":");
|
|
22
|
+
const value = rest.join(":").trim();
|
|
23
|
+
if (key?.trim() === "heading")
|
|
24
|
+
heading = value;
|
|
25
|
+
if (key?.trim() === "draft" && value === "true")
|
|
26
|
+
draft = true;
|
|
27
|
+
}
|
|
28
|
+
return { heading, draft, body };
|
|
29
|
+
}
|
|
30
|
+
function parseNoteFile(filePath, content) {
|
|
31
|
+
const { heading, draft, body } = parseFrontmatter(content);
|
|
32
|
+
if (draft)
|
|
33
|
+
return null;
|
|
34
|
+
if (heading) {
|
|
35
|
+
return { title: heading, markdown: body };
|
|
36
|
+
}
|
|
37
|
+
const title = basename(filePath, ".md");
|
|
38
|
+
const titleLine = `# ${title}`;
|
|
39
|
+
let markdown = body;
|
|
40
|
+
if (markdown.startsWith(titleLine)) {
|
|
41
|
+
markdown = markdown.slice(titleLine.length).replace(/^\n+/, "");
|
|
42
|
+
}
|
|
43
|
+
return { title, markdown };
|
|
44
|
+
}
|
|
45
|
+
async function main() {
|
|
46
|
+
const dryRun = process.argv.includes("--dry-run");
|
|
47
|
+
const env = {
|
|
48
|
+
DATABASE_URL: process.env.DATABASE_URL,
|
|
49
|
+
PUSHOVER_URL: process.env.PUSHOVER_URL,
|
|
50
|
+
PUSHOVER_USER_KEY: process.env.PUSHOVER_USER_KEY,
|
|
51
|
+
PUSHOVER_APP_TOKEN: process.env.PUSHOVER_APP_TOKEN,
|
|
52
|
+
USERNAME_TO_EXPORT: process.env.USERNAME_TO_EXPORT,
|
|
53
|
+
};
|
|
54
|
+
if (Object.values(env).some((v) => !v)) {
|
|
55
|
+
throw new Error("Required environment variables are not set.");
|
|
56
|
+
}
|
|
57
|
+
const contentsPath = process.env.S_CONTENTS_PATH ?? process.cwd();
|
|
58
|
+
// Dynamic import for Prisma ESM compatibility
|
|
59
|
+
const { PrismaClient } = await import("@s-hirano-ist/s-database/generated");
|
|
60
|
+
const prisma = new PrismaClient({ accelerateUrl: env.DATABASE_URL ?? "" });
|
|
61
|
+
const notificationService = createPushoverService({
|
|
62
|
+
url: env.PUSHOVER_URL ?? "",
|
|
63
|
+
userKey: env.PUSHOVER_USER_KEY ?? "",
|
|
64
|
+
appToken: env.PUSHOVER_APP_TOKEN ?? "",
|
|
65
|
+
});
|
|
66
|
+
const userId = makeUserId(env.USERNAME_TO_EXPORT ?? "");
|
|
67
|
+
const exported = makeExportedStatus();
|
|
68
|
+
const fileTitles = new Set();
|
|
69
|
+
async function ingestNotes() {
|
|
70
|
+
const files = await glob(`${contentsPath}/markdown/note/*.md`);
|
|
71
|
+
console.log(`📁 ${files.length} 件のファイルを検出しました。`);
|
|
72
|
+
const existingNotes = await prisma.note.findMany({
|
|
73
|
+
where: { userId },
|
|
74
|
+
select: { id: true, title: true, markdown: true },
|
|
75
|
+
});
|
|
76
|
+
const existingNotesMap = new Map(existingNotes.map((n) => [n.title, n]));
|
|
77
|
+
console.log(`📊 DB に ${existingNotesMap.size} 件の既存ノートがあります。`);
|
|
78
|
+
let insertedCount = 0;
|
|
79
|
+
let updatedCount = 0;
|
|
80
|
+
let skippedCount = 0;
|
|
81
|
+
let errorCount = 0;
|
|
82
|
+
for (const filePath of files) {
|
|
83
|
+
let parsed = null;
|
|
84
|
+
try {
|
|
85
|
+
const content = await readFile(filePath, "utf-8");
|
|
86
|
+
parsed = parseNoteFile(filePath, content);
|
|
87
|
+
if (!parsed) {
|
|
88
|
+
console.log(`⏭️ スキップ(draft): ${basename(filePath)}`);
|
|
89
|
+
skippedCount++;
|
|
90
|
+
continue;
|
|
91
|
+
}
|
|
92
|
+
fileTitles.add(parsed.title);
|
|
93
|
+
const existing = existingNotesMap.get(parsed.title);
|
|
94
|
+
if (existing) {
|
|
95
|
+
if (existing.markdown === parsed.markdown) {
|
|
96
|
+
// console.log(`⏭️ スキップ(変更なし): ${parsed.title}`);
|
|
97
|
+
skippedCount++;
|
|
98
|
+
continue;
|
|
99
|
+
}
|
|
100
|
+
if (dryRun) {
|
|
101
|
+
console.log(`🔄 [dry-run] 更新予定: ${parsed.title}`);
|
|
102
|
+
}
|
|
103
|
+
else {
|
|
104
|
+
await prisma.note.update({
|
|
105
|
+
where: { id: existing.id },
|
|
106
|
+
data: { markdown: parsed.markdown },
|
|
107
|
+
});
|
|
108
|
+
console.log(`🔄 更新: ${parsed.title}`);
|
|
109
|
+
}
|
|
110
|
+
updatedCount++;
|
|
111
|
+
continue;
|
|
112
|
+
}
|
|
113
|
+
if (dryRun) {
|
|
114
|
+
console.log(`🔍 [dry-run] 挿入予定: ${parsed.title}`);
|
|
115
|
+
insertedCount++;
|
|
116
|
+
continue;
|
|
117
|
+
}
|
|
118
|
+
await prisma.note.create({
|
|
119
|
+
data: {
|
|
120
|
+
id: String(makeId()),
|
|
121
|
+
title: parsed.title,
|
|
122
|
+
markdown: parsed.markdown,
|
|
123
|
+
status: exported.status,
|
|
124
|
+
exportedAt: exported.exportedAt,
|
|
125
|
+
userId,
|
|
126
|
+
createdAt: new Date(),
|
|
127
|
+
},
|
|
128
|
+
});
|
|
129
|
+
insertedCount++;
|
|
130
|
+
console.log(`✅ 挿入: ${parsed.title}`);
|
|
131
|
+
}
|
|
132
|
+
catch (error) {
|
|
133
|
+
console.error(`❌ エラー(${basename(filePath)}):`, parsed
|
|
134
|
+
? `title(${parsed.title.length}文字) markdown(${parsed.markdown.length}文字)`
|
|
135
|
+
: "parse前", error);
|
|
136
|
+
errorCount++;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
return { insertedCount, updatedCount, skippedCount, errorCount };
|
|
140
|
+
}
|
|
141
|
+
async function purgeNotes() {
|
|
142
|
+
const exportedNotes = await prisma.note.findMany({
|
|
143
|
+
where: { userId, status: exported.status },
|
|
144
|
+
select: { id: true, title: true },
|
|
145
|
+
});
|
|
146
|
+
const toDelete = exportedNotes.filter((n) => !fileTitles.has(n.title));
|
|
147
|
+
if (toDelete.length === 0) {
|
|
148
|
+
console.log("🗑️ 削除対象なし");
|
|
149
|
+
return 0;
|
|
150
|
+
}
|
|
151
|
+
let deletedCount = 0;
|
|
152
|
+
for (const note of toDelete) {
|
|
153
|
+
if (dryRun) {
|
|
154
|
+
console.log(`🗑️ [dry-run] 削除予定: ${note.title}`);
|
|
155
|
+
}
|
|
156
|
+
else {
|
|
157
|
+
await prisma.note.delete({ where: { id: note.id } });
|
|
158
|
+
console.log(`🗑️ 削除: ${note.title}`);
|
|
159
|
+
}
|
|
160
|
+
deletedCount++;
|
|
161
|
+
}
|
|
162
|
+
return deletedCount;
|
|
163
|
+
}
|
|
164
|
+
try {
|
|
165
|
+
const { insertedCount, updatedCount, skippedCount, errorCount } = await ingestNotes();
|
|
166
|
+
const deletedCount = await purgeNotes();
|
|
167
|
+
console.log(`\n📊 結果: 挿入 ${insertedCount} 件, 更新 ${updatedCount} 件, スキップ ${skippedCount} 件, 削除 ${deletedCount} 件, エラー ${errorCount} 件${dryRun ? " (dry-run)" : ""}`);
|
|
168
|
+
await notificationService.notifyInfo(`${SCRIPT_NAME} completed`, {
|
|
169
|
+
caller: SCRIPT_NAME,
|
|
170
|
+
});
|
|
171
|
+
}
|
|
172
|
+
catch (error) {
|
|
173
|
+
console.error("❌ エラーが発生しました:", error);
|
|
174
|
+
await notificationService.notifyError(`${SCRIPT_NAME} failed: ${error}`, {
|
|
175
|
+
caller: SCRIPT_NAME,
|
|
176
|
+
});
|
|
177
|
+
process.exit(1);
|
|
178
|
+
}
|
|
179
|
+
finally {
|
|
180
|
+
await prisma.$disconnect();
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
main().catch((error) => {
|
|
184
|
+
console.error(error);
|
|
185
|
+
process.exit(1);
|
|
186
|
+
});
|
|
187
|
+
//# sourceMappingURL=ingest-notes.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ingest-notes.js","sourceRoot":"","sources":["../src/ingest-notes.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAC5C,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AACrC,OAAO,EACN,kBAAkB,EAClB,MAAM,EACN,UAAU,GAEV,MAAM,2DAA2D,CAAC;AACnE,OAAO,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AACrE,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAE5B,MAAM,WAAW,GAAG,cAAc,CAAC;AAEnC,SAAS,gBAAgB,CAAC,OAAe;IAKxC,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;QAChC,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC;IAC1B,CAAC;IACD,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;IAC3C,IAAI,QAAQ,KAAK,CAAC,CAAC,EAAE,CAAC;QACrB,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC;IAC1B,CAAC;IACD,MAAM,WAAW,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC;IACtD,MAAM,IAAI,GAAG,OAAO,CAAC,KAAK,CAAC,QAAQ,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IAEhD,IAAI,OAA2B,CAAC;IAChC,IAAI,KAAK,GAAG,KAAK,CAAC;IAClB,KAAK,MAAM,IAAI,IAAI,WAAW,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;QAC5C,MAAM,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QACvC,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QACpC,IAAI,GAAG,EAAE,IAAI,EAAE,KAAK,SAAS;YAAE,OAAO,GAAG,KAAK,CAAC;QAC/C,IAAI,GAAG,EAAE,IAAI,EAAE,KAAK,OAAO,IAAI,KAAK,KAAK,MAAM;YAAE,KAAK,GAAG,IAAI,CAAC;IAC/D,CAAC;IACD,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC;AACjC,CAAC;AAED,SAAS,aAAa,CACrB,QAAgB,EAChB,OAAe;IAEf,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAC;IAE3D,IAAI,KAAK;QAAE,OAAO,IAAI,CAAC;IAEvB,IAAI,OAAO,EAAE,CAAC;QACb,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC;IAC3C,CAAC;IAED,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;IACxC,MAAM,SAAS,GAAG,KAAK,KAAK,EAAE,CAAC;IAC/B,IAAI,QAAQ,GAAG,IAAI,CAAC;IACpB,IAAI,QAAQ,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QACpC,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;IACjE,CAAC;IACD,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;AAC5B,CAAC;AAED,KAAK,UAAU,IAAI;IAClB,MAAM,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;IAElD,MAAM,GAAG,GAAG;QACX,YAAY,EAAE,OAAO,CAAC,GAAG,CAAC,YAAY;QACtC,YAAY,EAAE,OAAO,CAAC,GAAG,CAAC,YAAY;QACtC,iBAAiB,EAAE,OAAO,CAAC,GAAG,CAAC,iBAAiB;QAChD,kBAAkB,EAAE,OAAO,CAAC,GAAG,CAAC,kBAAkB;QAClD,kBAAkB,EAAE,OAAO,CAAC,GAAG,CAAC,kBAAkB;KACzC,CAAC;IAEX,IAAI,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QACxC,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,CAAC;IAChE,CAAC;IAED,MAAM,YAAY,GAAG,OAAO,CAAC,GAAG,CAAC,eAAe,IAAI,OAAO,CAAC,GAAG,EAAE,CAAC;IAElE,8CAA8C;IAC9C,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,oCAAoC,CAAC,CAAC;IAC5E,MAAM,MAAM,GAAG,IAAI,YAAY,CAAC,EAAE,aAAa,EAAE,GAAG,CAAC,YAAY,IAAI,EAAE,EAAE,CAAC,CAAC;IAE3E,MAAM,mBAAmB,GAAG,qBAAqB,CAAC;QACjD,GAAG,EAAE,GAAG,CAAC,YAAY,IAAI,EAAE;QAC3B,OAAO,EAAE,GAAG,CAAC,iBAAiB,IAAI,EAAE;QACpC,QAAQ,EAAE,GAAG,CAAC,kBAAkB,IAAI,EAAE;KACtC,CAAC,CAAC;IAEH,MAAM,MAAM,GAAW,UAAU,CAAC,GAAG,CAAC,kBAAkB,IAAI,EAAE,CAAC,CAAC;IAChE,MAAM,QAAQ,GAAG,kBAAkB,EAAE,CAAC;IAEtC,MAAM,UAAU,GAAG,IAAI,GAAG,EAAU,CAAC;IAErC,KAAK,UAAU,WAAW;QACzB,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,GAAG,YAAY,qBAAqB,CAAC,CAAC;QAC/D,OAAO,CAAC,GAAG,CAAC,MAAM,KAAK,CAAC,MAAM,iBAAiB,CAAC,CAAC;QAEjD,MAAM,aAAa,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC;YAChD,KAAK,EAAE,EAAE,MAAM,EAAE;YACjB,MAAM,EAAE,EAAE,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE;SACjD,CAAC,CAAC;QACH,MAAM,gBAAgB,GAAG,IAAI,GAAG,CAC/B,aAAa,CAAC,GAAG,CAChB,CAAC,CAAkD,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,CACpE,CACD,CAAC;QACF,OAAO,CAAC,GAAG,CAAC,WAAW,gBAAgB,CAAC,IAAI,gBAAgB,CAAC,CAAC;QAE9D,IAAI,aAAa,GAAG,CAAC,CAAC;QACtB,IAAI,YAAY,GAAG,CAAC,CAAC;QACrB,IAAI,YAAY,GAAG,CAAC,CAAC;QACrB,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,KAAK,MAAM,QAAQ,IAAI,KAAK,EAAE,CAAC;YAC9B,IAAI,MAAM,GAA+C,IAAI,CAAC;YAC9D,IAAI,CAAC;gBACJ,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;gBAClD,MAAM,GAAG,aAAa,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;gBAE1C,IAAI,CAAC,MAAM,EAAE,CAAC;oBACb,OAAO,CAAC,GAAG,CAAC,oBAAoB,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;oBACtD,YAAY,EAAE,CAAC;oBACf,SAAS;gBACV,CAAC;gBAED,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBAE7B,MAAM,QAAQ,GAAG,gBAAgB,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBACpD,IAAI,QAAQ,EAAE,CAAC;oBACd,IAAI,QAAQ,CAAC,QAAQ,KAAK,MAAM,CAAC,QAAQ,EAAE,CAAC;wBAC3C,kDAAkD;wBAClD,YAAY,EAAE,CAAC;wBACf,SAAS;oBACV,CAAC;oBACD,IAAI,MAAM,EAAE,CAAC;wBACZ,OAAO,CAAC,GAAG,CAAC,sBAAsB,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC;oBACnD,CAAC;yBAAM,CAAC;wBACP,MAAM,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC;4BACxB,KAAK,EAAE,EAAE,EAAE,EAAE,QAAQ,CAAC,EAAE,EAAE;4BAC1B,IAAI,EAAE,EAAE,QAAQ,EAAE,MAAM,CAAC,QAAQ,EAAE;yBACnC,CAAC,CAAC;wBACH,OAAO,CAAC,GAAG,CAAC,UAAU,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC;oBACvC,CAAC;oBACD,YAAY,EAAE,CAAC;oBACf,SAAS;gBACV,CAAC;gBAED,IAAI,MAAM,EAAE,CAAC;oBACZ,OAAO,CAAC,GAAG,CAAC,sBAAsB,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC;oBAClD,aAAa,EAAE,CAAC;oBAChB,SAAS;gBACV,CAAC;gBAED,MAAM,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC;oBACxB,IAAI,EAAE;wBACL,EAAE,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC;wBACpB,KAAK,EAAE,MAAM,CAAC,KAAK;wBACnB,QAAQ,EAAE,MAAM,CAAC,QAAQ;wBACzB,MAAM,EAAE,QAAQ,CAAC,MAAM;wBACvB,UAAU,EAAE,QAAQ,CAAC,UAAU;wBAC/B,MAAM;wBACN,SAAS,EAAE,IAAI,IAAI,EAAE;qBACrB;iBACD,CAAC,CAAC;gBACH,aAAa,EAAE,CAAC;gBAChB,OAAO,CAAC,GAAG,CAAC,SAAS,MAAM,CAAC,KAAK,EAAE,CAAC,CAAC;YACtC,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBAChB,OAAO,CAAC,KAAK,CACZ,SAAS,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAC/B,MAAM;oBACL,CAAC,CAAC,SAAS,MAAM,CAAC,KAAK,CAAC,MAAM,gBAAgB,MAAM,CAAC,QAAQ,CAAC,MAAM,KAAK;oBACzE,CAAC,CAAC,QAAQ,EACX,KAAK,CACL,CAAC;gBACF,UAAU,EAAE,CAAC;YACd,CAAC;QACF,CAAC;QAED,OAAO,EAAE,aAAa,EAAE,YAAY,EAAE,YAAY,EAAE,UAAU,EAAE,CAAC;IAClE,CAAC;IAED,KAAK,UAAU,UAAU;QACxB,MAAM,aAAa,GAAG,MAAM,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC;YAChD,KAAK,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE;YAC1C,MAAM,EAAE,EAAE,EAAE,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE;SACjC,CAAC,CAAC;QAEH,MAAM,QAAQ,GAAG,aAAa,CAAC,MAAM,CACpC,CAAC,CAAgC,EAAE,EAAE,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAC9D,CAAC;QAEF,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC3B,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC;YAC3B,OAAO,CAAC,CAAC;QACV,CAAC;QAED,IAAI,YAAY,GAAG,CAAC,CAAC;QACrB,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;YAC7B,IAAI,MAAM,EAAE,CAAC;gBACZ,OAAO,CAAC,GAAG,CAAC,wBAAwB,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC;YACnD,CAAC;iBAAM,CAAC;gBACP,MAAM,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,EAAE,IAAI,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC;gBACrD,OAAO,CAAC,GAAG,CAAC,YAAY,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC;YACvC,CAAC;YACD,YAAY,EAAE,CAAC;QAChB,CAAC;QAED,OAAO,YAAY,CAAC;IACrB,CAAC;IAED,IAAI,CAAC;QACJ,MAAM,EAAE,aAAa,EAAE,YAAY,EAAE,YAAY,EAAE,UAAU,EAAE,GAC9D,MAAM,WAAW,EAAE,CAAC;QACrB,MAAM,YAAY,GAAG,MAAM,UAAU,EAAE,CAAC;QACxC,OAAO,CAAC,GAAG,CACV,eAAe,aAAa,UAAU,YAAY,YAAY,YAAY,UAAU,YAAY,WAAW,UAAU,KAAK,MAAM,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,EAAE,EAAE,CACtJ,CAAC;QACF,MAAM,mBAAmB,CAAC,UAAU,CAAC,GAAG,WAAW,YAAY,EAAE;YAChE,MAAM,EAAE,WAAW;SACnB,CAAC,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAChB,OAAO,CAAC,KAAK,CAAC,eAAe,EAAE,KAAK,CAAC,CAAC;QACtC,MAAM,mBAAmB,CAAC,WAAW,CAAC,GAAG,WAAW,YAAY,KAAK,EAAE,EAAE;YACxE,MAAM,EAAE,WAAW;SACnB,CAAC,CAAC;QACH,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACjB,CAAC;YAAS,CAAC;QACV,MAAM,MAAM,CAAC,WAAW,EAAE,CAAC;IAC5B,CAAC;AACF,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;IACtB,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACrB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AACjB,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ingest-config.d.ts","sourceRoot":"","sources":["../../src/rag/ingest-config.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,aAAa;;;;;;CAMhB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ingest-config.js","sourceRoot":"","sources":["../../src/rag/ingest-config.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,MAAM,aAAa,GAAG;IAC5B,KAAK,EAAE;QACN,KAAK,EAAE,uBAAuB;QAC9B,KAAK,EAAE,uBAAuB;QAC9B,QAAQ,EAAE,wBAAwB;KAClC;CACQ,CAAC"}
|
package/dist/rag/ingest.d.ts
CHANGED
package/dist/rag/ingest.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ingest.d.ts","sourceRoot":"","sources":["../../src/rag/ingest.ts"],"names":[],"mappings":""}
|
|
1
|
+
{"version":3,"file":"ingest.d.ts","sourceRoot":"","sources":["../../src/rag/ingest.ts"],"names":[],"mappings":";AACA,OAAO,eAAe,CAAC"}
|
package/dist/rag/ingest.js
CHANGED
|
@@ -1,49 +1,31 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
+
import "dotenv/config";
|
|
2
3
|
import { readFileSync } from "node:fs";
|
|
4
|
+
import { parseJsonArticle, parseMarkdown, } from "@s-hirano-ist/s-search/chunker";
|
|
5
|
+
import { createEmbeddingClient } from "@s-hirano-ist/s-search/embedding-client";
|
|
6
|
+
import { ingestChunks } from "@s-hirano-ist/s-search/ingest";
|
|
7
|
+
import { ensureCollection, getCollectionStats, } from "@s-hirano-ist/s-search/qdrant-client";
|
|
3
8
|
import { glob } from "glob";
|
|
4
|
-
import {
|
|
5
|
-
import { RAG_CONFIG } from "./config.js";
|
|
6
|
-
import { embedBatch } from "./embedding.js";
|
|
7
|
-
import { ensureCollection, getCollectionStats, getExistingHashes, upsertPoints, } from "./qdrant-client.js";
|
|
8
|
-
const BATCH_SIZE = 20;
|
|
9
|
-
const MAX_RETRIES = 3;
|
|
10
|
-
const RETRY_DELAY_MS = 2000;
|
|
11
|
-
async function sleep(ms) {
|
|
12
|
-
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
13
|
-
}
|
|
14
|
-
async function withRetry(fn, retries = MAX_RETRIES) {
|
|
15
|
-
for (let i = 0; i < retries; i++) {
|
|
16
|
-
try {
|
|
17
|
-
return await fn();
|
|
18
|
-
}
|
|
19
|
-
catch (error) {
|
|
20
|
-
if (i === retries - 1)
|
|
21
|
-
throw error;
|
|
22
|
-
console.log(` Retry ${i + 1}/${retries} after error...`);
|
|
23
|
-
await sleep(RETRY_DELAY_MS);
|
|
24
|
-
}
|
|
25
|
-
}
|
|
26
|
-
throw new Error("Unreachable");
|
|
27
|
-
}
|
|
9
|
+
import { INGEST_CONFIG } from "./ingest-config.js";
|
|
28
10
|
/**
|
|
29
|
-
* List all files to process
|
|
11
|
+
* List all files to process with content type information
|
|
30
12
|
*/
|
|
31
13
|
async function listFiles() {
|
|
32
14
|
const files = [];
|
|
33
|
-
// JSON
|
|
34
|
-
const
|
|
35
|
-
for (const path of
|
|
36
|
-
files.push({ path, type: "json" });
|
|
15
|
+
// Articles (JSON)
|
|
16
|
+
const articleFiles = await glob(INGEST_CONFIG.paths.articles);
|
|
17
|
+
for (const path of articleFiles) {
|
|
18
|
+
files.push({ path, type: "json", contentType: "articles" });
|
|
37
19
|
}
|
|
38
|
-
//
|
|
39
|
-
const
|
|
40
|
-
|
|
41
|
-
:
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
}
|
|
20
|
+
// Notes (Markdown)
|
|
21
|
+
const noteFiles = await glob(INGEST_CONFIG.paths.notes);
|
|
22
|
+
for (const path of noteFiles) {
|
|
23
|
+
files.push({ path, type: "markdown", contentType: "notes" });
|
|
24
|
+
}
|
|
25
|
+
// Books (Markdown)
|
|
26
|
+
const bookFiles = await glob(INGEST_CONFIG.paths.books);
|
|
27
|
+
for (const path of bookFiles) {
|
|
28
|
+
files.push({ path, type: "markdown", contentType: "books" });
|
|
47
29
|
}
|
|
48
30
|
return files;
|
|
49
31
|
}
|
|
@@ -55,23 +37,30 @@ function parseFile(file) {
|
|
|
55
37
|
if (file.type === "json") {
|
|
56
38
|
return parseJsonArticle(file.path, content);
|
|
57
39
|
}
|
|
58
|
-
return parseMarkdown(file.path, content);
|
|
40
|
+
return parseMarkdown(file.path, content, file.contentType);
|
|
59
41
|
}
|
|
60
42
|
/**
|
|
61
|
-
*
|
|
43
|
+
* CLI entry point: list files, parse into chunks, delegate to ingestChunks
|
|
62
44
|
*/
|
|
63
|
-
async function ingest() {
|
|
45
|
+
async function ingest(force) {
|
|
64
46
|
console.log("Starting ingest...\n");
|
|
65
47
|
// Ensure collection exists
|
|
66
48
|
await ensureCollection();
|
|
67
49
|
// Get initial stats
|
|
68
50
|
const initialStats = await getCollectionStats();
|
|
69
51
|
console.log(`Initial points count: ${initialStats.pointsCount}\n`);
|
|
52
|
+
if (force) {
|
|
53
|
+
console.log("Force mode enabled: skipping change detection\n");
|
|
54
|
+
}
|
|
70
55
|
// List all files
|
|
71
56
|
const files = await listFiles();
|
|
57
|
+
const articleCount = files.filter((f) => f.contentType === "articles").length;
|
|
58
|
+
const noteCount = files.filter((f) => f.contentType === "notes").length;
|
|
59
|
+
const bookCount = files.filter((f) => f.contentType === "books").length;
|
|
72
60
|
console.log(`Found ${files.length} files to process`);
|
|
73
|
-
console.log(` -
|
|
74
|
-
console.log(` -
|
|
61
|
+
console.log(` - Articles: ${articleCount}`);
|
|
62
|
+
console.log(` - Notes: ${noteCount}`);
|
|
63
|
+
console.log(` - Books: ${bookCount}\n`);
|
|
75
64
|
// Parse all files into chunks
|
|
76
65
|
console.log("Parsing files...");
|
|
77
66
|
const allChunks = [];
|
|
@@ -84,57 +73,37 @@ async function ingest() {
|
|
|
84
73
|
console.error(`Error parsing ${file.path}:`, error);
|
|
85
74
|
}
|
|
86
75
|
}
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
console.log(`Skipped (unchanged): ${allChunks.length - changedChunks.length}\n`);
|
|
99
|
-
if (changedChunks.length === 0) {
|
|
100
|
-
console.log("No changes detected. Done!");
|
|
101
|
-
return;
|
|
102
|
-
}
|
|
103
|
-
// Generate embeddings and upsert in batches
|
|
104
|
-
console.log("Generating embeddings and upserting...");
|
|
105
|
-
let processed = 0;
|
|
106
|
-
for (let i = 0; i < changedChunks.length; i += BATCH_SIZE) {
|
|
107
|
-
const batch = changedChunks.slice(i, i + BATCH_SIZE);
|
|
108
|
-
const texts = batch.map((c) => c.text);
|
|
109
|
-
// Generate embeddings
|
|
110
|
-
const embeddings = await embedBatch(texts, false);
|
|
111
|
-
// Prepare points
|
|
112
|
-
const points = batch.map((chunk, idx) => ({
|
|
113
|
-
id: chunk.chunk_id,
|
|
114
|
-
vector: embeddings[idx],
|
|
115
|
-
payload: chunk,
|
|
116
|
-
}));
|
|
117
|
-
// Upsert to Qdrant with retry
|
|
118
|
-
await withRetry(() => upsertPoints(points));
|
|
119
|
-
processed += batch.length;
|
|
120
|
-
console.log(` Progress: ${processed}/${changedChunks.length}`);
|
|
121
|
-
// Small delay between batches to avoid overwhelming Qdrant
|
|
122
|
-
await sleep(100);
|
|
123
|
-
}
|
|
76
|
+
// Use VPS embedding if EMBEDDING_API_URL is set, otherwise local
|
|
77
|
+
const embedBatchFn = process.env.EMBEDDING_API_URL
|
|
78
|
+
? createEmbeddingClient({
|
|
79
|
+
apiUrl: process.env.EMBEDDING_API_URL,
|
|
80
|
+
apiKey: process.env.EMBEDDING_API_KEY ?? "",
|
|
81
|
+
cfAccessClientId: process.env.CF_ACCESS_CLIENT_ID ?? "",
|
|
82
|
+
cfAccessClientSecret: process.env.CF_ACCESS_CLIENT_SECRET ?? "",
|
|
83
|
+
}).embedBatch
|
|
84
|
+
: undefined;
|
|
85
|
+
// Delegate to core ingest logic
|
|
86
|
+
const result = await ingestChunks(allChunks, { embedBatchFn, force });
|
|
124
87
|
// Get final stats
|
|
125
88
|
const finalStats = await getCollectionStats();
|
|
126
89
|
console.log(`\nFinal points count: ${finalStats.pointsCount}`);
|
|
127
|
-
console.log(
|
|
90
|
+
console.log(`Ingest completed successfully! (${result.changedChunks} changed, ${result.skippedChunks} skipped)`);
|
|
128
91
|
}
|
|
129
92
|
async function main() {
|
|
130
93
|
const env = {
|
|
131
94
|
QDRANT_URL: process.env.QDRANT_URL,
|
|
95
|
+
CF_ACCESS_CLIENT_ID: process.env.CF_ACCESS_CLIENT_ID,
|
|
96
|
+
CF_ACCESS_CLIENT_SECRET: process.env.CF_ACCESS_CLIENT_SECRET,
|
|
132
97
|
};
|
|
133
98
|
if (!env.QDRANT_URL) {
|
|
134
99
|
throw new Error("QDRANT_URL environment variable is required.");
|
|
135
100
|
}
|
|
101
|
+
if (!env.CF_ACCESS_CLIENT_ID || !env.CF_ACCESS_CLIENT_SECRET) {
|
|
102
|
+
throw new Error("CF_ACCESS_CLIENT_ID and CF_ACCESS_CLIENT_SECRET environment variables are required.");
|
|
103
|
+
}
|
|
104
|
+
const force = process.argv.includes("--force");
|
|
136
105
|
try {
|
|
137
|
-
await ingest();
|
|
106
|
+
await ingest(force);
|
|
138
107
|
}
|
|
139
108
|
catch (error) {
|
|
140
109
|
console.error("❌ エラーが発生しました:", error);
|
package/dist/rag/ingest.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ingest.js","sourceRoot":"","sources":["../../src/rag/ingest.ts"],"names":[],"mappings":";AACA,OAAO,
|
|
1
|
+
{"version":3,"file":"ingest.js","sourceRoot":"","sources":["../../src/rag/ingest.ts"],"names":[],"mappings":";AACA,OAAO,eAAe,CAAC;AACvB,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EACN,gBAAgB,EAChB,aAAa,GACb,MAAM,gCAAgC,CAAC;AAExC,OAAO,EAAE,qBAAqB,EAAE,MAAM,yCAAyC,CAAC;AAChF,OAAO,EAAE,YAAY,EAAE,MAAM,+BAA+B,CAAC;AAC7D,OAAO,EACN,gBAAgB,EAChB,kBAAkB,GAClB,MAAM,sCAAsC,CAAC;AAC9C,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAC5B,OAAO,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAC;AAQhD;;GAEG;AACH,KAAK,UAAU,SAAS;IACvB,MAAM,KAAK,GAAe,EAAE,CAAC;IAE7B,kBAAkB;IAClB,MAAM,YAAY,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IAC9D,KAAK,MAAM,IAAI,IAAI,YAAY,EAAE,CAAC;QACjC,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,WAAW,EAAE,UAAU,EAAE,CAAC,CAAC;IAC7D,CAAC;IAED,mBAAmB;IACnB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACxD,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;QAC9B,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,OAAO,EAAE,CAAC,CAAC;IAC9D,CAAC;IAED,mBAAmB;IACnB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACxD,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;QAC9B,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE,WAAW,EAAE,OAAO,EAAE,CAAC,CAAC;IAC9D,CAAC;IAED,OAAO,KAAK,CAAC;AACd,CAAC;AAED;;GAEG;AACH,SAAS,SAAS,CAAC,IAAc;IAChC,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAEjD,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;QAC1B,OAAO,gBAAgB,CAAC,IAAI,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC7C,CAAC;IACD,OAAO,aAAa,CAAC,IAAI,CAAC,IAAI,EAAE,OAAO,EAAE,IAAI,CAAC,WAAW,CAAC,CAAC;AAC5D,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,MAAM,CAAC,KAAc;IACnC,OAAO,CAAC,GAAG,CAAC,sBAAsB,CAAC,CAAC;IAEpC,2BAA2B;IAC3B,MAAM,gBAAgB,EAAE,CAAC;IAEzB,oBAAoB;IACpB,MAAM,YAAY,GAAG,MAAM,kBAAkB,EAAE,CAAC;IAChD,OAAO,CAAC,GAAG,CAAC,yBAAyB,YAAY,CAAC,WAAW,IAAI,CAAC,CAAC;IAEnE,IAAI,KAAK,EAAE,CAAC;QACX,OAAO,CAAC,GAAG,CAAC,iDAAiD,CAAC,CAAC;IAChE,CAAC;IAED,iBAAiB;IACjB,MAAM,KAAK,GAAG,MAAM,SAAS,EAAE,CAAC;IAChC,MAAM,YAAY,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,KAAK,UAAU,CAAC,CAAC,MAAM,CAAC;IAC9E,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,KAAK,OAAO,CAAC,CAAC,MAAM,CAAC;IACxE,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,KAAK,OAAO,CAAC,CAAC,MAAM,CAAC;IAExE,OAAO,CAAC,GAAG,CAAC,SAAS,KAAK,CAAC,MAAM,mBAAmB,CAAC,CAAC;IACtD,OAAO,CAAC,GAAG,CAAC,iBAAiB,YAAY,EAAE,CAAC,CAAC;IAC7C,OAAO,CAAC,GAAG,CAAC,cAAc,SAAS,EAAE,CAAC,CAAC;IACvC,OAAO,CAAC,GAAG,CAAC,cAAc,SAAS,IAAI,CAAC,CAAC;IAEzC,8BAA8B;IAC9B,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAAC,CAAC;IAChC,MAAM,SAAS,GAAoB,EAAE,CAAC;IAEtC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QAC1B,IAAI,CAAC;YACJ,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;YAC/B,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;QAC3B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YAChB,OAAO,CAAC,KAAK,CAAC,iBAAiB,IAAI,CAAC,IAAI,GAAG,EAAE,KAAK,CAAC,CAAC;QACrD,CAAC;IACF,CAAC;IAED,iEAAiE;IACjE,MAAM,YAAY,GAAG,OAAO,CAAC,GAAG,CAAC,iBAAiB;QACjD,CAAC,CAAC,qBAAqB,CAAC;YACtB,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,iBAAiB;YACrC,MAAM,EAAE,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,EAAE;YAC3C,gBAAgB,EAAE,OAAO,CAAC,GAAG,CAAC,mBAAmB,IAAI,EAAE;YACvD,oBAAoB,EAAE,OAAO,CAAC,GAAG,CAAC,uBAAuB,IAAI,EAAE;SAC/D,CAAC,CAAC,UAAU;QACd,CAAC,CAAC,SAAS,CAAC;IAEb,gCAAgC;IAChC,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC,SAAS,EAAE,EAAE,YAAY,EAAE,KAAK,EAAE,CAAC,CAAC;IAEtE,kBAAkB;IAClB,MAAM,UAAU,GAAG,MAAM,kBAAkB,EAAE,CAAC;IAC9C,OAAO,CAAC,GAAG,CAAC,yBAAyB,UAAU,CAAC,WAAW,EAAE,CAAC,CAAC;IAC/D,OAAO,CAAC,GAAG,CACV,mCAAmC,MAAM,CAAC,aAAa,aAAa,MAAM,CAAC,aAAa,WAAW,CACnG,CAAC;AACH,CAAC;AAED,KAAK,UAAU,IAAI;IAClB,MAAM,GAAG,GAAG;QACX,UAAU,EAAE,OAAO,CAAC,GAAG,CAAC,UAAU;QAClC,mBAAmB,EAAE,OAAO,CAAC,GAAG,CAAC,mBAAmB;QACpD,uBAAuB,EAAE,OAAO,CAAC,GAAG,CAAC,uBAAuB;KACnD,CAAC;IAEX,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,CAAC;QACrB,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;IACjE,CAAC;IACD,IAAI,CAAC,GAAG,CAAC,mBAAmB,IAAI,CAAC,GAAG,CAAC,uBAAuB,EAAE,CAAC;QAC9D,MAAM,IAAI,KAAK,CACd,qFAAqF,CACrF,CAAC;IACH,CAAC;IAED,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC;IAE/C,IAAI,CAAC;QACJ,MAAM,MAAM,CAAC,KAAK,CAAC,CAAC;IACrB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAChB,OAAO,CAAC,KAAK,CAAC,eAAe,EAAE,KAAK,CAAC,CAAC;QACtC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACjB,CAAC;AACF,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;IACtB,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACrB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AACjB,CAAC,CAAC,CAAC"}
|
package/dist/rag/search.d.ts
CHANGED
package/dist/rag/search.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"search.d.ts","sourceRoot":"","sources":["../../src/rag/search.ts"],"names":[],"mappings":""}
|
|
1
|
+
{"version":3,"file":"search.d.ts","sourceRoot":"","sources":["../../src/rag/search.ts"],"names":[],"mappings":";AACA,OAAO,eAAe,CAAC"}
|
package/dist/rag/search.js
CHANGED
|
@@ -1,86 +1,74 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
import
|
|
3
|
-
import {
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
*/
|
|
7
|
-
async function runSearch() {
|
|
8
|
-
// Parse command line arguments
|
|
2
|
+
import "dotenv/config";
|
|
3
|
+
import { embed } from "@s-hirano-ist/s-search/embedding";
|
|
4
|
+
import { ensureCollection, search as qdrantSearch, } from "@s-hirano-ist/s-search/qdrant-client";
|
|
5
|
+
function parseArgs() {
|
|
9
6
|
const args = process.argv.slice(2);
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
console.log("");
|
|
13
|
-
console.log("Options:");
|
|
14
|
-
console.log(" --top-k <number> Number of results (default: 5)");
|
|
15
|
-
console.log(" --type <type> Filter by type: markdown_note | bookmark_json");
|
|
16
|
-
console.log(" --heading <heading> Filter by top_heading");
|
|
17
|
-
console.log("");
|
|
18
|
-
console.log("Examples:");
|
|
19
|
-
console.log(' rag-search "ルネサンス 遠近法"');
|
|
20
|
-
console.log(' rag-search "AI 脆弱性" --type bookmark_json');
|
|
21
|
-
console.log(' rag-search "React" --heading javascript --top-k 10');
|
|
22
|
-
process.exit(1);
|
|
23
|
-
}
|
|
24
|
-
// Parse options
|
|
25
|
-
let query = "";
|
|
26
|
-
let topK = 5;
|
|
27
|
-
let filterType;
|
|
28
|
-
let filterHeading;
|
|
7
|
+
const options = {};
|
|
8
|
+
const positional = [];
|
|
29
9
|
for (let i = 0; i < args.length; i++) {
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
i
|
|
10
|
+
const arg = args[i];
|
|
11
|
+
if (arg === "--top-k" && i + 1 < args.length) {
|
|
12
|
+
options.topK = Number(args[++i]);
|
|
13
|
+
}
|
|
14
|
+
else if (arg === "--type" && i + 1 < args.length) {
|
|
15
|
+
options.type = args[++i];
|
|
33
16
|
}
|
|
34
|
-
else if (
|
|
35
|
-
|
|
36
|
-
i++;
|
|
17
|
+
else if (arg === "--heading" && i + 1 < args.length) {
|
|
18
|
+
options.heading = args[++i];
|
|
37
19
|
}
|
|
38
|
-
else if (
|
|
39
|
-
|
|
40
|
-
|
|
20
|
+
else if (arg === "--content-type" && i + 1 < args.length) {
|
|
21
|
+
const value = args[++i];
|
|
22
|
+
const types = value.split(",");
|
|
23
|
+
options.contentType = types.length === 1 ? types[0] : types;
|
|
41
24
|
}
|
|
42
|
-
else
|
|
43
|
-
|
|
25
|
+
else {
|
|
26
|
+
positional.push(arg);
|
|
44
27
|
}
|
|
45
28
|
}
|
|
29
|
+
const query = positional.join(" ");
|
|
46
30
|
if (!query) {
|
|
47
|
-
console.error("
|
|
48
|
-
process.exit(1);
|
|
49
|
-
}
|
|
50
|
-
// Check collection status
|
|
51
|
-
const stats = await getCollectionStats();
|
|
52
|
-
if (stats.status === "not_found") {
|
|
53
|
-
console.error("Error: Collection not found. Run ingest first.");
|
|
31
|
+
console.error("Usage: rag-search <query> [--top-k N] [--type markdown_note|bookmark_json] [--heading HEADING] [--content-type articles|books|notes]");
|
|
54
32
|
process.exit(1);
|
|
55
33
|
}
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
console.log("Generating query embedding...");
|
|
34
|
+
return { query, options };
|
|
35
|
+
}
|
|
36
|
+
async function searchContent(query, options = {}) {
|
|
60
37
|
const queryVector = await embed(query, true);
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
const results = await search(queryVector, {
|
|
64
|
-
topK,
|
|
38
|
+
const results = await qdrantSearch(queryVector, {
|
|
39
|
+
topK: options.topK,
|
|
65
40
|
filter: {
|
|
66
|
-
type:
|
|
67
|
-
top_heading:
|
|
41
|
+
type: options.type,
|
|
42
|
+
top_heading: options.heading,
|
|
43
|
+
content_type: options.contentType,
|
|
68
44
|
},
|
|
69
45
|
});
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
console.log(`
|
|
77
|
-
|
|
78
|
-
console.log(`
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
console.log(`
|
|
83
|
-
|
|
46
|
+
return { results, query, totalResults: results.length };
|
|
47
|
+
}
|
|
48
|
+
async function search() {
|
|
49
|
+
const { query, options } = parseArgs();
|
|
50
|
+
console.log(`Searching for: "${query}"`);
|
|
51
|
+
if (options.topK)
|
|
52
|
+
console.log(` top-k: ${options.topK}`);
|
|
53
|
+
if (options.type)
|
|
54
|
+
console.log(` type: ${options.type}`);
|
|
55
|
+
if (options.heading)
|
|
56
|
+
console.log(` heading: ${options.heading}`);
|
|
57
|
+
if (options.contentType)
|
|
58
|
+
console.log(` content-type: ${Array.isArray(options.contentType) ? options.contentType.join(",") : options.contentType}`);
|
|
59
|
+
console.log();
|
|
60
|
+
await ensureCollection();
|
|
61
|
+
const response = await searchContent(query, options);
|
|
62
|
+
console.log(`Found ${response.totalResults} results:\n`);
|
|
63
|
+
for (const [i, result] of response.results.entries()) {
|
|
64
|
+
console.log(`--- Result ${i + 1} (score: ${result.score.toFixed(4)}) ---`);
|
|
65
|
+
console.log(`Title: ${result.title}`);
|
|
66
|
+
if (result.url)
|
|
67
|
+
console.log(`URL: ${result.url}`);
|
|
68
|
+
console.log(`Type: ${result.type}`);
|
|
69
|
+
console.log(`Content-Type: ${result.content_type}`);
|
|
70
|
+
console.log(`Heading: ${result.heading_path.join(" > ")}`);
|
|
71
|
+
console.log(`\n${result.text}\n`);
|
|
84
72
|
}
|
|
85
73
|
}
|
|
86
74
|
async function main() {
|
|
@@ -91,7 +79,7 @@ async function main() {
|
|
|
91
79
|
throw new Error("QDRANT_URL environment variable is required.");
|
|
92
80
|
}
|
|
93
81
|
try {
|
|
94
|
-
await
|
|
82
|
+
await search();
|
|
95
83
|
}
|
|
96
84
|
catch (error) {
|
|
97
85
|
console.error("❌ エラーが発生しました:", error);
|