@s-hirano-ist/s-scripts 1.5.1 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ import { type QdrantPayload } from "./config.js";
2
+ /**
3
+ * Parse JSON article file and generate chunks
4
+ */
5
+ export declare function parseJsonArticle(filePath: string, content: string): QdrantPayload[];
6
+ /**
7
+ * Parse Markdown file and generate chunks
8
+ */
9
+ export declare function parseMarkdown(filePath: string, content: string): QdrantPayload[];
10
+ //# sourceMappingURL=chunker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunker.d.ts","sourceRoot":"","sources":["../../src/rag/chunker.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,KAAK,aAAa,EAAc,MAAM,aAAa,CAAC;AAsC7D;;GAEG;AACH,wBAAgB,gBAAgB,CAC/B,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,GACb,aAAa,EAAE,CAsCjB;AAiID;;GAEG;AACH,wBAAgB,aAAa,CAC5B,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,GACb,aAAa,EAAE,CA2CjB"}
@@ -0,0 +1,188 @@
1
+ import { createHash } from "crypto";
2
+ import { RAG_CONFIG } from "./config.js";
3
+ /**
4
+ * Generate content hash for change detection
5
+ */
6
+ function generateHash(content) {
7
+ return createHash("sha256").update(content).digest("hex").slice(0, 16);
8
+ }
9
+ /**
10
+ * Generate chunk ID from doc_id and index
11
+ */
12
+ function generateChunkId(docId, index) {
13
+ return `${docId}#${index}`;
14
+ }
15
+ /**
16
+ * Parse JSON article file and generate chunks
17
+ */
18
+ export function parseJsonArticle(filePath, content) {
19
+ const json = JSON.parse(content);
20
+ const docId = `file:${filePath}`;
21
+ const chunks = [];
22
+ for (let i = 0; i < json.body.length; i++) {
23
+ const item = json.body[i];
24
+ // Build text from available fields
25
+ const textParts = [];
26
+ if (item.title)
27
+ textParts.push(item.title);
28
+ if (item.ogTitle && item.ogTitle !== item.title)
29
+ textParts.push(item.ogTitle);
30
+ if (item.ogDescription)
31
+ textParts.push(item.ogDescription);
32
+ if (item.quote)
33
+ textParts.push(item.quote);
34
+ if (item.url)
35
+ textParts.push(item.url);
36
+ const text = textParts.join("\n");
37
+ // Skip empty items
38
+ if (!text.trim())
39
+ continue;
40
+ const chunkId = generateChunkId(docId, i);
41
+ chunks.push({
42
+ type: "bookmark_json",
43
+ top_heading: json.heading,
44
+ doc_id: docId,
45
+ chunk_id: chunkId,
46
+ title: item.title || item.ogTitle || "Untitled",
47
+ url: item.url,
48
+ heading_path: [json.heading],
49
+ text,
50
+ content_hash: generateHash(text),
51
+ });
52
+ }
53
+ return chunks;
54
+ }
55
+ /**
56
+ * Parse Markdown frontmatter
57
+ */
58
+ function parseFrontmatter(content) {
59
+ const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
60
+ if (!frontmatterMatch) {
61
+ return {
62
+ frontmatter: { heading: "unknown" },
63
+ body: content,
64
+ };
65
+ }
66
+ const frontmatterStr = frontmatterMatch[1];
67
+ const body = frontmatterMatch[2];
68
+ // Simple YAML parsing for our needs
69
+ const frontmatter = { heading: "unknown" };
70
+ for (const line of frontmatterStr.split("\n")) {
71
+ const [key, ...valueParts] = line.split(":");
72
+ const value = valueParts.join(":").trim();
73
+ if (key === "heading") {
74
+ frontmatter.heading = value;
75
+ }
76
+ else if (key === "description") {
77
+ frontmatter.description = value;
78
+ }
79
+ else if (key === "draft") {
80
+ frontmatter.draft = value === "true";
81
+ }
82
+ }
83
+ return { frontmatter, body };
84
+ }
85
+ /**
86
+ * Split markdown into sections by headings
87
+ */
88
+ function splitMarkdownByHeadings(content) {
89
+ const lines = content.split("\n");
90
+ const sections = [];
91
+ let currentHeadingPath = [];
92
+ let currentSection = null;
93
+ const headingStack = [];
94
+ for (const line of lines) {
95
+ const headingMatch = line.match(/^(#{2,3})\s+(.+)$/);
96
+ if (headingMatch) {
97
+ // Save previous section
98
+ if (currentSection && currentSection.content.trim()) {
99
+ sections.push(currentSection);
100
+ }
101
+ const level = headingMatch[1].length;
102
+ const title = headingMatch[2];
103
+ // Update heading stack
104
+ while (headingStack.length > 0 &&
105
+ headingStack[headingStack.length - 1].level >= level) {
106
+ headingStack.pop();
107
+ }
108
+ headingStack.push({ level, title });
109
+ // Update heading path
110
+ currentHeadingPath = headingStack.map((h) => h.title);
111
+ currentSection = {
112
+ headingPath: [...currentHeadingPath],
113
+ title,
114
+ content: "",
115
+ level,
116
+ };
117
+ }
118
+ else if (currentSection) {
119
+ currentSection.content += line + "\n";
120
+ }
121
+ }
122
+ // Save last section
123
+ if (currentSection && currentSection.content.trim()) {
124
+ sections.push(currentSection);
125
+ }
126
+ return sections;
127
+ }
128
+ /**
129
+ * Split long text into smaller chunks by paragraphs
130
+ */
131
+ function splitByParagraphs(text, maxLength) {
132
+ if (text.length <= maxLength) {
133
+ return [text];
134
+ }
135
+ const paragraphs = text.split(/\n\n+/);
136
+ const chunks = [];
137
+ let currentChunk = "";
138
+ for (const para of paragraphs) {
139
+ if (currentChunk.length + para.length > maxLength && currentChunk) {
140
+ chunks.push(currentChunk.trim());
141
+ currentChunk = para;
142
+ }
143
+ else {
144
+ currentChunk += (currentChunk ? "\n\n" : "") + para;
145
+ }
146
+ }
147
+ if (currentChunk.trim()) {
148
+ chunks.push(currentChunk.trim());
149
+ }
150
+ return chunks;
151
+ }
152
+ /**
153
+ * Parse Markdown file and generate chunks
154
+ */
155
+ export function parseMarkdown(filePath, content) {
156
+ const { frontmatter, body } = parseFrontmatter(content);
157
+ const docId = `file:${filePath}`;
158
+ const chunks = [];
159
+ // Skip draft files
160
+ if (frontmatter.draft) {
161
+ return [];
162
+ }
163
+ const sections = splitMarkdownByHeadings(body);
164
+ let chunkIndex = 0;
165
+ for (const section of sections) {
166
+ // Split long sections
167
+ const textChunks = splitByParagraphs(section.content, RAG_CONFIG.chunking.maxChunkLength);
168
+ for (const text of textChunks) {
169
+ if (!text.trim())
170
+ continue;
171
+ const fullHeadingPath = [frontmatter.heading, ...section.headingPath];
172
+ const chunkId = generateChunkId(docId, chunkIndex);
173
+ chunks.push({
174
+ type: "markdown_note",
175
+ top_heading: frontmatter.heading,
176
+ doc_id: docId,
177
+ chunk_id: chunkId,
178
+ title: section.title,
179
+ heading_path: fullHeadingPath,
180
+ text,
181
+ content_hash: generateHash(text),
182
+ });
183
+ chunkIndex++;
184
+ }
185
+ }
186
+ return chunks;
187
+ }
188
+ //# sourceMappingURL=chunker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chunker.js","sourceRoot":"","sources":["../../src/rag/chunker.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACpC,OAAO,EAAsB,UAAU,EAAE,MAAM,aAAa,CAAC;AAwB7D;;GAEG;AACH,SAAS,YAAY,CAAC,OAAe;IACpC,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AACxE,CAAC;AAED;;GAEG;AACH,SAAS,eAAe,CAAC,KAAa,EAAE,KAAa;IACpD,OAAO,GAAG,KAAK,IAAI,KAAK,EAAE,CAAC;AAC5B,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAC/B,QAAgB,EAChB,OAAe;IAEf,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAgB,CAAC;IAChD,MAAM,KAAK,GAAG,QAAQ,QAAQ,EAAE,CAAC;IACjC,MAAM,MAAM,GAAoB,EAAE,CAAC;IAEnC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3C,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAE1B,mCAAmC;QACnC,MAAM,SAAS,GAAa,EAAE,CAAC;QAC/B,IAAI,IAAI,CAAC,KAAK;YAAE,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC3C,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,CAAC,KAAK;YAC9C,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC9B,IAAI,IAAI,CAAC,aAAa;YAAE,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAC3D,IAAI,IAAI,CAAC,KAAK;YAAE,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC3C,IAAI,IAAI,CAAC,GAAG;YAAE,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAEvC,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAElC,mBAAmB;QACnB,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;YAAE,SAAS;QAE3B,MAAM,OAAO,GAAG,eAAe,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QAE1C,MAAM,CAAC,IAAI,CAAC;YACX,IAAI,EAAE,eAAe;YACrB,WAAW,EAAE,IAAI,CAAC,OAAO;YACzB,MAAM,EAAE,KAAK;YACb,QAAQ,EAAE,OAAO;YACjB,KAAK,EAAE,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,OAAO,IAAI,UAAU;YAC/C,GAAG,EAAE,IAAI,CAAC,GAAG;YACb,YAAY,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC;YAC5B,IAAI;YACJ,YAAY,EAAE,YAAY,CAAC,IAAI,CAAC;SAChC,CAAC,CAAC;IACJ,CAAC;IAED,OAAO,MAAM,CAAC;AACf,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,OAAe;IAIxC,MAAM,gBAAgB,GAAG,OAAO,CAAC,KAAK,CAAC,mCAAmC,CAAC,CAAC;IAE5E,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACvB,OAAO;YACN,WAAW,EAAE,EAAE,OAAO,EAAE,SAAS,EAAE;YACnC,IAAI,EAAE,OAAO;SACb,CAAC;IACH,CAAC;IAED,MAAM,cAAc,GAAG,gBAAgB,CAAC,CAAC,CAAC,CAAC;IAC3C,MAAM,IAAI,GAAG,gBAAgB,CAAC,CAAC,CAAC,CAAC;IAEjC,oCAAoC;IACpC,MAAM,WAAW,GAAwB,EAAE,OAAO,EAAE,SAAS,EAAE,CAAC;IAEhE,KAAK,MAAM,IAAI,IAAI,cAAc,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;QAC/C,MAAM,CAAC,GAAG,EAAE,GAAG,UAAU,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC7C,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QAE1C,IAAI,GAAG,KAAK,SAAS,EAAE,CAAC;YACvB,WAAW,CAAC,OAAO,GAAG,KAAK,CAAC;QAC7B,CAAC;aAAM,IAAI,GAAG,KAAK,aAAa,EAAE,CAAC;YAClC,WAAW,CAAC,WAAW,GAAG,KAAK,CAAC;QACjC,CAAC;aAAM,IAAI,GAAG,KAAK,OAAO,EAAE,CAAC;YAC5B,WAAW,CAAC,KAAK,GAAG,KAAK,KAAK,MAAM,CAAC;QACtC,CAAC;IACF,CAAC;IAED,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;AAC9B,CAAC;AASD;;GAEG;AACH,SAAS,uBAAuB,CAAC,OAAe;IAC/C,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAClC,MAAM,QAAQ,GAAsB,EAAE,CAAC;IAEvC,IAAI,kBAAkB,GAAa,EAAE,CAAC;IACtC,IAAI,cAAc,GAA2B,IAAI,CAAC;IAClD,MAAM,YAAY,GAAuC,EAAE,CAAC;IAE5D,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QAC1B,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,mBAAmB,CAAC,CAAC;QAErD,IAAI,YAAY,EAAE,CAAC;YAClB,wBAAwB;YACxB,IAAI,cAAc,IAAI,cAAc,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;gBACrD,QAAQ,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;YAC/B,CAAC;YAED,MAAM,KAAK,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;YACrC,MAAM,KAAK,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;YAE9B,uBAAuB;YACvB,OACC,YAAY,CAAC,MAAM,GAAG,CAAC;gBACvB,YAAY,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,KAAK,IAAI,KAAK,EACnD,CAAC;gBACF,YAAY,CAAC,GAAG,EAAE,CAAC;YACpB,CAAC;YACD,YAAY,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,CAAC;YAEpC,sBAAsB;YACtB,kBAAkB,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;YAEtD,cAAc,GAAG;gBAChB,WAAW,EAAE,CAAC,GAAG,kBAAkB,CAAC;gBACpC,KAAK;gBACL,OAAO,EAAE,EAAE;gBACX,KAAK;aACL,CAAC;QACH,CAAC;aAAM,IAAI,cAAc,EAAE,CAAC;YAC3B,cAAc,CAAC,OAAO,IAAI,IAAI,GAAG,IAAI,CAAC;QACvC,CAAC;IACF,CAAC;IAED,oBAAoB;IACpB,IAAI,cAAc,IAAI,cAAc,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;QACrD,QAAQ,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;IAC/B,CAAC;IAED,OAAO,QAAQ,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,SAAS,iBAAiB,CAAC,IAAY,EAAE,SAAiB;IACzD,IAAI,IAAI,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;QAC9B,OAAO,CAAC,IAAI,CAAC,CAAC;IACf,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACvC,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,YAAY,GAAG,EAAE,CAAC;IAEtB,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC/B,IAAI,YAAY,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,GAAG,SAAS,IAAI,YAAY,EAAE,CAAC;YACnE,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC;YACjC,YAAY,GAAG,IAAI,CAAC;QACrB,CAAC;aAAM,CAAC;YACP,YAAY,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC;QACrD,CAAC;IACF,CAAC;IAED,IAAI,YAAY,CAAC,IAAI,EAAE,EAAE,CAAC;QACzB,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC;IAClC,CAAC;IAED,OAAO,MAAM,CAAC;AACf,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,aAAa,CAC5B,QAAgB,EAChB,OAAe;IAEf,MAAM,EAAE,WAAW,EAAE,IAAI,EAAE,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAC;IACxD,MAAM,KAAK,GAAG,QAAQ,QAAQ,EAAE,CAAC;IACjC,MAAM,MAAM,GAAoB,EAAE,CAAC;IAEnC,mBAAmB;IACnB,IAAI,WAAW,CAAC,KAAK,EAAE,CAAC;QACvB,OAAO,EAAE,CAAC;IACX,CAAC;IAED,MAAM,QAAQ,GAAG,uBAAuB,CAAC,IAAI,CAAC,CAAC;IAE/C,IAAI,UAAU,GAAG,CAAC,CAAC;IAEnB,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAChC,sBAAsB;QACtB,MAAM,UAAU,GAAG,iBAAiB,CACnC,OAAO,CAAC,OAAO,EACf,UAAU,CAAC,QAAQ,CAAC,cAAc,CAClC,CAAC;QAEF,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;YAC/B,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;gBAAE,SAAS;YAE3B,MAAM,eAAe,GAAG,CAAC,WAAW,CAAC,OAAO,EAAE,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;YACtE,MAAM,OAAO,GAAG,eAAe,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC;YAEnD,MAAM,CAAC,IAAI,CAAC;gBACX,IAAI,EAAE,eAAe;gBACrB,WAAW,EAAE,WAAW,CAAC,OAAO;gBAChC,MAAM,EAAE,KAAK;gBACb,QAAQ,EAAE,OAAO;gBACjB,KAAK,EAAE,OAAO,CAAC,KAAK;gBACpB,YAAY,EAAE,eAAe;gBAC7B,IAAI;gBACJ,YAAY,EAAE,YAAY,CAAC,IAAI,CAAC;aAChC,CAAC,CAAC;YAEH,UAAU,EAAE,CAAC;QACd,CAAC;IACF,CAAC;IAED,OAAO,MAAM,CAAC;AACf,CAAC"}
@@ -0,0 +1,44 @@
1
+ export declare const RAG_CONFIG: {
2
+ readonly qdrant: {
3
+ readonly collectionName: "knowledge_v1";
4
+ readonly vectorSize: 384;
5
+ readonly distance: "Cosine";
6
+ };
7
+ readonly embedding: {
8
+ readonly model: "intfloat/multilingual-e5-small";
9
+ readonly prefix: {
10
+ readonly query: "query: ";
11
+ readonly passage: "passage: ";
12
+ };
13
+ };
14
+ readonly paths: {
15
+ readonly markdown: readonly ["markdown/note/**/*.md", "markdown/book/**/*.md", "raw/article/**/*.md"];
16
+ readonly json: "json/article/**/*.json";
17
+ };
18
+ readonly chunking: {
19
+ readonly maxChunkLength: 2000;
20
+ readonly headingLevels: readonly [2, 3];
21
+ };
22
+ readonly hashCachePath: ".rag-hash-cache.json";
23
+ };
24
+ export type QdrantPayload = {
25
+ type: "markdown_note" | "bookmark_json";
26
+ top_heading: string;
27
+ doc_id: string;
28
+ chunk_id: string;
29
+ title: string;
30
+ url?: string;
31
+ heading_path: string[];
32
+ text: string;
33
+ content_hash: string;
34
+ };
35
+ export type SearchResult = {
36
+ score: number;
37
+ text: string;
38
+ title: string;
39
+ url?: string;
40
+ heading_path: string[];
41
+ type: "markdown_note" | "bookmark_json";
42
+ doc_id: string;
43
+ };
44
+ //# sourceMappingURL=config.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../../src/rag/config.ts"],"names":[],"mappings":"AACA,eAAO,MAAM,UAAU;;;;;;;;;;;;;;;;;;;;;;CAmCb,CAAC;AAGX,MAAM,MAAM,aAAa,GAAG;IAC3B,IAAI,EAAE,eAAe,GAAG,eAAe,CAAC;IACxC,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,YAAY,EAAE,MAAM,CAAC;CACrB,CAAC;AAGF,MAAM,MAAM,YAAY,GAAG;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,IAAI,EAAE,eAAe,GAAG,eAAe,CAAC;IACxC,MAAM,EAAE,MAAM,CAAC;CACf,CAAC"}
@@ -0,0 +1,34 @@
1
+ // RAG Configuration
2
+ export const RAG_CONFIG = {
3
+ // Qdrant settings
4
+ qdrant: {
5
+ collectionName: "knowledge_v1",
6
+ vectorSize: 384, // multilingual-e5-small
7
+ distance: "Cosine",
8
+ },
9
+ // Embedding settings
10
+ embedding: {
11
+ model: "intfloat/multilingual-e5-small",
12
+ prefix: {
13
+ query: "query: ",
14
+ passage: "passage: ",
15
+ },
16
+ },
17
+ // File paths
18
+ paths: {
19
+ markdown: [
20
+ "markdown/note/**/*.md",
21
+ "markdown/book/**/*.md",
22
+ "raw/article/**/*.md",
23
+ ],
24
+ json: "json/article/**/*.json",
25
+ },
26
+ // Chunking settings
27
+ chunking: {
28
+ maxChunkLength: 2000,
29
+ headingLevels: [2, 3], // ## and ###
30
+ },
31
+ // Cache file for hash comparison
32
+ hashCachePath: ".rag-hash-cache.json",
33
+ };
34
+ //# sourceMappingURL=config.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"config.js","sourceRoot":"","sources":["../../src/rag/config.ts"],"names":[],"mappings":"AAAA,oBAAoB;AACpB,MAAM,CAAC,MAAM,UAAU,GAAG;IACzB,kBAAkB;IAClB,MAAM,EAAE;QACP,cAAc,EAAE,cAAc;QAC9B,UAAU,EAAE,GAAG,EAAE,wBAAwB;QACzC,QAAQ,EAAE,QAAiB;KAC3B;IAED,qBAAqB;IACrB,SAAS,EAAE;QACV,KAAK,EAAE,gCAAgC;QACvC,MAAM,EAAE;YACP,KAAK,EAAE,SAAS;YAChB,OAAO,EAAE,WAAW;SACpB;KACD;IAED,aAAa;IACb,KAAK,EAAE;QACN,QAAQ,EAAE;YACT,uBAAuB;YACvB,uBAAuB;YACvB,qBAAqB;SACrB;QACD,IAAI,EAAE,wBAAwB;KAC9B;IAED,oBAAoB;IACpB,QAAQ,EAAE;QACT,cAAc,EAAE,IAAI;QACpB,aAAa,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,aAAa;KACpC;IAED,iCAAiC;IACjC,aAAa,EAAE,sBAAsB;CAC5B,CAAC"}
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Generate embedding for a single text
3
+ * @param text - Input text to embed
4
+ * @param isQuery - Whether this is a query (vs passage)
5
+ * @returns Embedding vector
6
+ */
7
+ export declare function embed(text: string, isQuery?: boolean): Promise<number[]>;
8
+ /**
9
+ * Generate embeddings for multiple texts in batch
10
+ * @param texts - Array of input texts
11
+ * @param isQuery - Whether these are queries (vs passages)
12
+ * @returns Array of embedding vectors
13
+ */
14
+ export declare function embedBatch(texts: string[], isQuery?: boolean): Promise<number[][]>;
15
+ //# sourceMappingURL=embedding.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embedding.d.ts","sourceRoot":"","sources":["../../src/rag/embedding.ts"],"names":[],"mappings":"AAwBA;;;;;GAKG;AACH,wBAAsB,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,UAAQ,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAiB5E;AAED;;;;;GAKG;AACH,wBAAsB,UAAU,CAC/B,KAAK,EAAE,MAAM,EAAE,EACf,OAAO,UAAQ,GACb,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CA2BrB"}
@@ -0,0 +1,61 @@
1
+ import { pipeline, } from "@huggingface/transformers";
2
+ import { RAG_CONFIG } from "./config.js";
3
+ let embeddingPipeline = null;
4
+ /**
5
+ * Initialize the embedding model (lazy loading)
6
+ */
7
+ async function getEmbeddingPipeline() {
8
+ if (!embeddingPipeline) {
9
+ console.log(`Loading embedding model: ${RAG_CONFIG.embedding.model}...`);
10
+ embeddingPipeline = (await pipeline("feature-extraction", RAG_CONFIG.embedding.model, { dtype: "fp32" }));
11
+ console.log("Embedding model loaded successfully.");
12
+ }
13
+ return embeddingPipeline;
14
+ }
15
+ /**
16
+ * Generate embedding for a single text
17
+ * @param text - Input text to embed
18
+ * @param isQuery - Whether this is a query (vs passage)
19
+ * @returns Embedding vector
20
+ */
21
+ export async function embed(text, isQuery = false) {
22
+ const pipe = await getEmbeddingPipeline();
23
+ // E5 models require prefixes
24
+ const prefix = isQuery
25
+ ? RAG_CONFIG.embedding.prefix.query
26
+ : RAG_CONFIG.embedding.prefix.passage;
27
+ const prefixedText = prefix + text;
28
+ const output = await pipe(prefixedText, {
29
+ pooling: "mean",
30
+ normalize: true,
31
+ });
32
+ // Convert to array
33
+ return Array.from(output.data);
34
+ }
35
+ /**
36
+ * Generate embeddings for multiple texts in batch
37
+ * @param texts - Array of input texts
38
+ * @param isQuery - Whether these are queries (vs passages)
39
+ * @returns Array of embedding vectors
40
+ */
41
+ export async function embedBatch(texts, isQuery = false) {
42
+ const pipe = await getEmbeddingPipeline();
43
+ const prefix = isQuery
44
+ ? RAG_CONFIG.embedding.prefix.query
45
+ : RAG_CONFIG.embedding.prefix.passage;
46
+ const prefixedTexts = texts.map((t) => prefix + t);
47
+ const outputs = await pipe(prefixedTexts, {
48
+ pooling: "mean",
49
+ normalize: true,
50
+ });
51
+ // outputs.data is a flat Float32Array, need to reshape
52
+ const embeddings = [];
53
+ const dim = RAG_CONFIG.qdrant.vectorSize;
54
+ for (let i = 0; i < texts.length; i++) {
55
+ const start = i * dim;
56
+ const end = start + dim;
57
+ embeddings.push(Array.from(outputs.data.slice(start, end)));
58
+ }
59
+ return embeddings;
60
+ }
61
+ //# sourceMappingURL=embedding.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embedding.js","sourceRoot":"","sources":["../../src/rag/embedding.ts"],"names":[],"mappings":"AAAA,OAAO,EAEN,QAAQ,GACR,MAAM,2BAA2B,CAAC;AACnC,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEzC,IAAI,iBAAiB,GAAqC,IAAI,CAAC;AAE/D;;GAEG;AACH,KAAK,UAAU,oBAAoB;IAClC,IAAI,CAAC,iBAAiB,EAAE,CAAC;QACxB,OAAO,CAAC,GAAG,CAAC,4BAA4B,UAAU,CAAC,SAAS,CAAC,KAAK,KAAK,CAAC,CAAC;QACzE,iBAAiB,GAAG,CAAC,MAAM,QAAQ,CAClC,oBAAoB,EACpB,UAAU,CAAC,SAAS,CAAC,KAAK,EAC1B,EAAE,KAAK,EAAE,MAAM,EAAE,CACjB,CAAyC,CAAC;QAC3C,OAAO,CAAC,GAAG,CAAC,sCAAsC,CAAC,CAAC;IACrD,CAAC;IACD,OAAO,iBAAiB,CAAC;AAC1B,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,KAAK,CAAC,IAAY,EAAE,OAAO,GAAG,KAAK;IACxD,MAAM,IAAI,GAAG,MAAM,oBAAoB,EAAE,CAAC;IAE1C,6BAA6B;IAC7B,MAAM,MAAM,GAAG,OAAO;QACrB,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK;QACnC,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,MAAM,CAAC,OAAO,CAAC;IAEvC,MAAM,YAAY,GAAG,MAAM,GAAG,IAAI,CAAC;IAEnC,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,YAAY,EAAE;QACvC,OAAO,EAAE,MAAM;QACf,SAAS,EAAE,IAAI;KACf,CAAC,CAAC;IAEH,mBAAmB;IACnB,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAoB,CAAC,CAAC;AAChD,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAC/B,KAAe,EACf,OAAO,GAAG,KAAK;IAEf,MAAM,IAAI,GAAG,MAAM,oBAAoB,EAAE,CAAC;IAE1C,MAAM,MAAM,GAAG,OAAO;QACrB,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK;QACnC,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,MAAM,CAAC,OAAO,CAAC;IAEvC,MAAM,aAAa,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAEnD,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,aAAa,EAAE;QACzC,OAAO,EAAE,MAAM;QACf,SAAS,EAAE,IAAI;KACf,CAAC,CAAC;IAEH,uDAAuD;IACvD,MAAM,UAAU,GAAe,EAAE,CAAC;IAClC,MAAM,GAAG,GAAG,UAAU,CAAC,MAAM,CAAC,UAAU,CAAC;IAEzC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,MAAM,KAAK,GAAG,CAAC,GAAG,GAAG,CAAC;QACtB,MAAM,GAAG,GAAG,KAAK,GAAG,GAAG,CAAC;QACxB,UAAU,CAAC,IAAI,CACd,KAAK,CAAC,IAAI,CAAE,OAAO,CAAC,IAAqB,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,CAC5D,CAAC;IACH,CAAC;IAED,OAAO,UAAU,CAAC;AACnB,CAAC"}
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ export {};
3
+ //# sourceMappingURL=ingest.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ingest.d.ts","sourceRoot":"","sources":["../../src/rag/ingest.ts"],"names":[],"mappings":""}
@@ -0,0 +1,148 @@
1
+ #!/usr/bin/env node
2
+ import { readFileSync } from "fs";
3
+ import { glob } from "glob";
4
+ import { parseJsonArticle, parseMarkdown } from "./chunker.js";
5
+ import { RAG_CONFIG } from "./config.js";
6
+ import { embedBatch } from "./embedding.js";
7
+ import { ensureCollection, getCollectionStats, getExistingHashes, upsertPoints, } from "./qdrant-client.js";
8
+ const BATCH_SIZE = 20;
9
+ const MAX_RETRIES = 3;
10
+ const RETRY_DELAY_MS = 2000;
11
+ async function sleep(ms) {
12
+ return new Promise((resolve) => setTimeout(resolve, ms));
13
+ }
14
+ async function withRetry(fn, retries = MAX_RETRIES) {
15
+ for (let i = 0; i < retries; i++) {
16
+ try {
17
+ return await fn();
18
+ }
19
+ catch (error) {
20
+ if (i === retries - 1)
21
+ throw error;
22
+ console.log(` Retry ${i + 1}/${retries} after error...`);
23
+ await sleep(RETRY_DELAY_MS);
24
+ }
25
+ }
26
+ throw new Error("Unreachable");
27
+ }
28
+ /**
29
+ * List all files to process
30
+ */
31
+ async function listFiles() {
32
+ const files = [];
33
+ // JSON files
34
+ const jsonFiles = await glob(RAG_CONFIG.paths.json);
35
+ for (const path of jsonFiles) {
36
+ files.push({ path, type: "json" });
37
+ }
38
+ // Markdown files (supports array of patterns)
39
+ const mdPatterns = Array.isArray(RAG_CONFIG.paths.markdown)
40
+ ? RAG_CONFIG.paths.markdown
41
+ : [RAG_CONFIG.paths.markdown];
42
+ for (const pattern of mdPatterns) {
43
+ const mdFiles = await glob(pattern);
44
+ for (const path of mdFiles) {
45
+ files.push({ path, type: "markdown" });
46
+ }
47
+ }
48
+ return files;
49
+ }
50
+ /**
51
+ * Parse a single file into chunks
52
+ */
53
+ function parseFile(file) {
54
+ const content = readFileSync(file.path, "utf-8");
55
+ if (file.type === "json") {
56
+ return parseJsonArticle(file.path, content);
57
+ }
58
+ return parseMarkdown(file.path, content);
59
+ }
60
+ /**
61
+ * Ingest all documents
62
+ */
63
+ async function ingest() {
64
+ console.log("Starting ingest...\n");
65
+ // Ensure collection exists
66
+ await ensureCollection();
67
+ // Get initial stats
68
+ const initialStats = await getCollectionStats();
69
+ console.log(`Initial points count: ${initialStats.pointsCount}\n`);
70
+ // List all files
71
+ const files = await listFiles();
72
+ console.log(`Found ${files.length} files to process`);
73
+ console.log(` - JSON: ${files.filter((f) => f.type === "json").length}`);
74
+ console.log(` - Markdown: ${files.filter((f) => f.type === "markdown").length}\n`);
75
+ // Parse all files into chunks
76
+ console.log("Parsing files...");
77
+ const allChunks = [];
78
+ for (const file of files) {
79
+ try {
80
+ const chunks = parseFile(file);
81
+ allChunks.push(...chunks);
82
+ }
83
+ catch (error) {
84
+ console.error(`Error parsing ${file.path}:`, error);
85
+ }
86
+ }
87
+ console.log(`Total chunks: ${allChunks.length}\n`);
88
+ // Get existing hashes for change detection
89
+ console.log("Checking for changes...");
90
+ const chunkIds = allChunks.map((c) => c.chunk_id);
91
+ const existingHashes = await getExistingHashes(chunkIds);
92
+ // Filter to only changed chunks
93
+ const changedChunks = allChunks.filter((chunk) => {
94
+ const existingHash = existingHashes.get(chunk.chunk_id);
95
+ return existingHash !== chunk.content_hash;
96
+ });
97
+ console.log(`Changed chunks: ${changedChunks.length}`);
98
+ console.log(`Skipped (unchanged): ${allChunks.length - changedChunks.length}\n`);
99
+ if (changedChunks.length === 0) {
100
+ console.log("No changes detected. Done!");
101
+ return;
102
+ }
103
+ // Generate embeddings and upsert in batches
104
+ console.log("Generating embeddings and upserting...");
105
+ let processed = 0;
106
+ for (let i = 0; i < changedChunks.length; i += BATCH_SIZE) {
107
+ const batch = changedChunks.slice(i, i + BATCH_SIZE);
108
+ const texts = batch.map((c) => c.text);
109
+ // Generate embeddings
110
+ const embeddings = await embedBatch(texts, false);
111
+ // Prepare points
112
+ const points = batch.map((chunk, idx) => ({
113
+ id: chunk.chunk_id,
114
+ vector: embeddings[idx],
115
+ payload: chunk,
116
+ }));
117
+ // Upsert to Qdrant with retry
118
+ await withRetry(() => upsertPoints(points));
119
+ processed += batch.length;
120
+ console.log(` Progress: ${processed}/${changedChunks.length}`);
121
+ // Small delay between batches to avoid overwhelming Qdrant
122
+ await sleep(100);
123
+ }
124
+ // Get final stats
125
+ const finalStats = await getCollectionStats();
126
+ console.log(`\nFinal points count: ${finalStats.pointsCount}`);
127
+ console.log("Ingest completed successfully!");
128
+ }
129
+ async function main() {
130
+ const env = {
131
+ QDRANT_URL: process.env.QDRANT_URL,
132
+ };
133
+ if (!env.QDRANT_URL) {
134
+ throw new Error("QDRANT_URL environment variable is required.");
135
+ }
136
+ try {
137
+ await ingest();
138
+ }
139
+ catch (error) {
140
+ console.error("❌ エラーが発生しました:", error);
141
+ process.exit(1);
142
+ }
143
+ }
144
+ main().catch((error) => {
145
+ console.error(error);
146
+ process.exit(1);
147
+ });
148
+ //# sourceMappingURL=ingest.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ingest.js","sourceRoot":"","sources":["../../src/rag/ingest.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,YAAY,EAAE,MAAM,IAAI,CAAC;AAClC,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAC5B,OAAO,EAAE,gBAAgB,EAAE,aAAa,EAAE,MAAM,cAAc,CAAC;AAC/D,OAAO,EAAsB,UAAU,EAAE,MAAM,aAAa,CAAC;AAC7D,OAAO,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAC5C,OAAO,EACN,gBAAgB,EAChB,kBAAkB,EAClB,iBAAiB,EACjB,YAAY,GACZ,MAAM,oBAAoB,CAAC;AAE5B,MAAM,UAAU,GAAG,EAAE,CAAC;AACtB,MAAM,WAAW,GAAG,CAAC,CAAC;AACtB,MAAM,cAAc,GAAG,IAAI,CAAC;AAE5B,KAAK,UAAU,KAAK,CAAC,EAAU;IAC9B,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,CAAC;AAC1D,CAAC;AAED,KAAK,UAAU,SAAS,CACvB,EAAoB,EACpB,OAAO,GAAG,WAAW;IAErB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,IAAI,CAAC;YACJ,OAAO,MAAM,EAAE,EAAE,CAAC;QACnB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YAChB,IAAI,CAAC,KAAK,OAAO,GAAG,CAAC;gBAAE,MAAM,KAAK,CAAC;YACnC,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,IAAI,OAAO,iBAAiB,CAAC,CAAC;YAC1D,MAAM,KAAK,CAAC,cAAc,CAAC,CAAC;QAC7B,CAAC;IACF,CAAC;IACD,MAAM,IAAI,KAAK,CAAC,aAAa,CAAC,CAAC;AAChC,CAAC;AAOD;;GAEG;AACH,KAAK,UAAU,SAAS;IACvB,MAAM,KAAK,GAAe,EAAE,CAAC;IAE7B,aAAa;IACb,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACpD,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;QAC9B,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;IACpC,CAAC;IAED,8CAA8C;IAC9C,MAAM,UAAU,GAAG,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,QAAQ,CAAC;QAC1D,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC,QAAQ;QAC3B,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;IAE/B,KAAK,MAAM,OAAO,IAAI,UAAU,EAAE,CAAC;QAClC,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,CAAC;QACpC,KAAK,MAAM,IAAI,IAAI,OAAO,EAAE,CAAC;YAC5B,KAAK,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC,CAAC;QACxC,CAAC;IACF,CAAC;IAED,OAAO,KAAK,CAAC;AACd,CAAC;AAED;;GAEG;AACH,SAAS,SAAS,CAAC,IAAc;IAChC,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAEjD,IAAI,IAAI,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;QAC1B,OAAO,gBAAgB,CAAC,IAAI,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;IAC7C,CAAC;IACD,OAAO,aAAa,CAAC,IAAI,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;AAC1C,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,MAAM;IACpB,OAAO,CAAC,GAAG,CAAC,sBAAsB,CAAC,CAAC;IAEpC,2BAA2B;IAC3B,MAAM,gBAAgB,EAAE,CAAC;IAEzB,oBAAoB;IACpB,MAAM,YAAY,GAAG,MAAM,kBAAkB,EAAE,CAAC;IAChD,OAAO,CAAC,GAAG,CAAC,yBAAyB,YAAY,CAAC,WAAW,IAAI,CAAC,CAAC;IAEnE,iBAAiB;IACjB,MAAM,KAAK,GAAG,MAAM,SAAS,EAAE,CAAC;IAChC,OAAO,CAAC,GAAG,CAAC,SAAS,KAAK,CAAC,MAAM,mBAAmB,CAAC,CAAC;IACtD,OAAO,CAAC,GAAG,CAAC,aAAa,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;IAC1E,OAAO,CAAC,GAAG,CACV,iBAAiB,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,UAAU,CAAC,CAAC,MAAM,IAAI,CACtE,CAAC;IAEF,8BAA8B;IAC9B,OAAO,CAAC,GAAG,CAAC,kBAAkB,CAAC,CAAC;IAChC,MAAM,SAAS,GAAoB,EAAE,CAAC;IAEtC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QAC1B,IAAI,CAAC;YACJ,MAAM,MAAM,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;YAC/B,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;QAC3B,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YAChB,OAAO,CAAC,KAAK,CAAC,iBAAiB,IAAI,CAAC,IAAI,GAAG,EAAE,KAAK,CAAC,CAAC;QACrD,CAAC;IACF,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,iBAAiB,SAAS,CAAC,MAAM,IAAI,CAAC,CAAC;IAEnD,2CAA2C;IAC3C,OAAO,CAAC,GAAG,CAAC,yBAAyB,CAAC,CAAC;IACvC,MAAM,QAAQ,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;IAClD,MAAM,cAAc,GAAG,MAAM,iBAAiB,CAAC,QAAQ,CAAC,CAAC;IAEzD,gCAAgC;IAChC,MAAM,aAAa,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE;QAChD,MAAM,YAAY,GAAG,cAAc,CAAC,GAAG,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QACxD,OAAO,YAAY,KAAK,KAAK,CAAC,YAAY,CAAC;IAC5C,CAAC,CAAC,CAAC;IAEH,OAAO,CAAC,GAAG,CAAC,mBAAmB,aAAa,CAAC,MAAM,EAAE,CAAC,CAAC;IACvD,OAAO,CAAC,GAAG,CACV,wBAAwB,SAAS,CAAC,MAAM,GAAG,aAAa,CAAC,MAAM,IAAI,CACnE,CAAC;IAEF,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAChC,OAAO,CAAC,GAAG,CAAC,4BAA4B,CAAC,CAAC;QAC1C,OAAO;IACR,CAAC;IAED,4CAA4C;IAC5C,OAAO,CAAC,GAAG,CAAC,wCAAwC,CAAC,CAAC;IACtD,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,aAAa,CAAC,MAAM,EAAE,CAAC,IAAI,UAAU,EAAE,CAAC;QAC3D,MAAM,KAAK,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,CAAC;QACrD,MAAM,KAAK,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAEvC,sBAAsB;QACtB,MAAM,UAAU,GAAG,MAAM,UAAU,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;QAElD,iBAAiB;QACjB,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC;YACzC,EAAE,EAAE,KAAK,CAAC,QAAQ;YAClB,MAAM,EAAE,UAAU,CAAC,GAAG,CAAC;YACvB,OAAO,EAAE,KAAK;SACd,CAAC,CAAC,CAAC;QAEJ,8BAA8B;QAC9B,MAAM,SAAS,CAAC,GAAG,EAAE,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC,CAAC;QAE5C,SAAS,IAAI,KAAK,CAAC,MAAM,CAAC;QAC1B,OAAO,CAAC,GAAG,CAAC,eAAe,SAAS,IAAI,aAAa,CAAC,MAAM,EAAE,CAAC,CAAC;QAEhE,2DAA2D;QAC3D,MAAM,KAAK,CAAC,GAAG,CAAC,CAAC;IAClB,CAAC;IAED,kBAAkB;IAClB,MAAM,UAAU,GAAG,MAAM,kBAAkB,EAAE,CAAC;IAC9C,OAAO,CAAC,GAAG,CAAC,yBAAyB,UAAU,CAAC,WAAW,EAAE,CAAC,CAAC;IAC/D,OAAO,CAAC,GAAG,CAAC,gCAAgC,CAAC,CAAC;AAC/C,CAAC;AAED,KAAK,UAAU,IAAI;IAClB,MAAM,GAAG,GAAG;QACX,UAAU,EAAE,OAAO,CAAC,GAAG,CAAC,UAAU;KACzB,CAAC;IAEX,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,CAAC;QACrB,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;IACjE,CAAC;IAED,IAAI,CAAC;QACJ,MAAM,MAAM,EAAE,CAAC;IAChB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAChB,OAAO,CAAC,KAAK,CAAC,eAAe,EAAE,KAAK,CAAC,CAAC;QACtC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACjB,CAAC;AACF,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;IACtB,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACrB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AACjB,CAAC,CAAC,CAAC"}
@@ -0,0 +1,40 @@
1
+ import { QdrantClient } from "@qdrant/js-client-rest";
2
+ import { type QdrantPayload, type SearchResult } from "./config.js";
3
+ /**
4
+ * Get or create Qdrant client
5
+ */
6
+ export declare function getQdrantClient(): QdrantClient;
7
+ /**
8
+ * Create collection if not exists
9
+ */
10
+ export declare function ensureCollection(): Promise<void>;
11
+ /**
12
+ * Upsert points to Qdrant
13
+ */
14
+ export declare function upsertPoints(points: {
15
+ id: string;
16
+ vector: number[];
17
+ payload: QdrantPayload;
18
+ }[]): Promise<void>;
19
+ /**
20
+ * Get existing content hashes for a set of chunk IDs
21
+ */
22
+ export declare function getExistingHashes(chunkIds: string[]): Promise<Map<string, string>>;
23
+ /**
24
+ * Search for similar documents
25
+ */
26
+ export declare function search(queryVector: number[], options?: {
27
+ topK?: number;
28
+ filter?: {
29
+ type?: "markdown_note" | "bookmark_json";
30
+ top_heading?: string;
31
+ };
32
+ }): Promise<SearchResult[]>;
33
+ /**
34
+ * Get collection stats
35
+ */
36
+ export declare function getCollectionStats(): Promise<{
37
+ pointsCount: number;
38
+ status: string;
39
+ }>;
40
+ //# sourceMappingURL=qdrant-client.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"qdrant-client.d.ts","sourceRoot":"","sources":["../../src/rag/qdrant-client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,wBAAwB,CAAC;AACtD,OAAO,EAAE,KAAK,aAAa,EAAc,KAAK,YAAY,EAAE,MAAM,aAAa,CAAC;AAIhF;;GAEG;AACH,wBAAgB,eAAe,IAAI,YAAY,CAgB9C;AAED;;GAEG;AACH,wBAAsB,gBAAgB,IAAI,OAAO,CAAC,IAAI,CAAC,CAmBtD;AAED;;GAEG;AACH,wBAAsB,YAAY,CACjC,MAAM,EAAE;IAAE,EAAE,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,EAAE,CAAC;IAAC,OAAO,EAAE,aAAa,CAAA;CAAE,EAAE,GAChE,OAAO,CAAC,IAAI,CAAC,CAef;AAED;;GAEG;AACH,wBAAsB,iBAAiB,CACtC,QAAQ,EAAE,MAAM,EAAE,GAChB,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CA4B9B;AAED;;GAEG;AACH,wBAAsB,MAAM,CAC3B,WAAW,EAAE,MAAM,EAAE,EACrB,OAAO,GAAE;IACR,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE;QACR,IAAI,CAAC,EAAE,eAAe,GAAG,eAAe,CAAC;QACzC,WAAW,CAAC,EAAE,MAAM,CAAC;KACrB,CAAC;CACG,GACJ,OAAO,CAAC,YAAY,EAAE,CAAC,CA6CzB;AAED;;GAEG;AACH,wBAAsB,kBAAkB,IAAI,OAAO,CAAC;IACnD,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,MAAM,CAAC;CACf,CAAC,CAgBD"}
@@ -0,0 +1,160 @@
1
+ import { QdrantClient } from "@qdrant/js-client-rest";
2
+ import { RAG_CONFIG } from "./config.js";
3
+ let client = null;
4
+ /**
5
+ * Get or create Qdrant client
6
+ */
7
+ export function getQdrantClient() {
8
+ if (!client) {
9
+ const url = process.env.QDRANT_URL;
10
+ const apiKey = process.env.QDRANT_API_KEY;
11
+ if (!url) {
12
+ throw new Error("QDRANT_URL environment variable is required");
13
+ }
14
+ client = new QdrantClient({
15
+ url,
16
+ apiKey,
17
+ });
18
+ }
19
+ return client;
20
+ }
21
+ /**
22
+ * Create collection if not exists
23
+ */
24
+ export async function ensureCollection() {
25
+ const qdrant = getQdrantClient();
26
+ const { collectionName, vectorSize, distance } = RAG_CONFIG.qdrant;
27
+ const collections = await qdrant.getCollections();
28
+ const exists = collections.collections.some((c) => c.name === collectionName);
29
+ if (!exists) {
30
+ console.log(`Creating collection: ${collectionName}`);
31
+ await qdrant.createCollection(collectionName, {
32
+ vectors: {
33
+ size: vectorSize,
34
+ distance,
35
+ },
36
+ });
37
+ console.log(`Collection ${collectionName} created successfully.`);
38
+ }
39
+ else {
40
+ console.log(`Collection ${collectionName} already exists.`);
41
+ }
42
+ }
43
+ /**
44
+ * Upsert points to Qdrant
45
+ */
46
+ export async function upsertPoints(points) {
47
+ const qdrant = getQdrantClient();
48
+ const { collectionName } = RAG_CONFIG.qdrant;
49
+ // Qdrant requires numeric or UUID IDs, so we hash the chunk_id
50
+ const qdrantPoints = points.map((p) => ({
51
+ id: hashToUint(p.id),
52
+ vector: p.vector,
53
+ payload: p.payload,
54
+ }));
55
+ await qdrant.upsert(collectionName, {
56
+ wait: true,
57
+ points: qdrantPoints,
58
+ });
59
+ }
60
+ /**
61
+ * Get existing content hashes for a set of chunk IDs
62
+ */
63
+ export async function getExistingHashes(chunkIds) {
64
+ const qdrant = getQdrantClient();
65
+ const { collectionName } = RAG_CONFIG.qdrant;
66
+ const hashMap = new Map();
67
+ if (chunkIds.length === 0)
68
+ return hashMap;
69
+ // Convert chunk IDs to numeric IDs
70
+ const numericIds = chunkIds.map((id) => hashToUint(id));
71
+ try {
72
+ const result = await qdrant.retrieve(collectionName, {
73
+ ids: numericIds,
74
+ with_payload: ["chunk_id", "content_hash"],
75
+ });
76
+ for (const point of result) {
77
+ const payload = point.payload;
78
+ if (payload?.chunk_id && payload?.content_hash) {
79
+ hashMap.set(payload.chunk_id, payload.content_hash);
80
+ }
81
+ }
82
+ }
83
+ catch {
84
+ // Collection might not exist or be empty
85
+ }
86
+ return hashMap;
87
+ }
88
+ /**
89
+ * Search for similar documents
90
+ */
91
+ export async function search(queryVector, options = {}) {
92
+ const qdrant = getQdrantClient();
93
+ const { collectionName } = RAG_CONFIG.qdrant;
94
+ const { topK = 10, filter } = options;
95
+ // Build filter conditions
96
+ const filterConditions = [];
97
+ if (filter?.type) {
98
+ filterConditions.push({
99
+ key: "type",
100
+ match: { value: filter.type },
101
+ });
102
+ }
103
+ if (filter?.top_heading) {
104
+ filterConditions.push({
105
+ key: "top_heading",
106
+ match: { value: filter.top_heading },
107
+ });
108
+ }
109
+ const result = await qdrant.search(collectionName, {
110
+ vector: queryVector,
111
+ limit: topK,
112
+ with_payload: true,
113
+ filter: filterConditions.length > 0 ? { must: filterConditions } : undefined,
114
+ });
115
+ return result.map((r) => {
116
+ const payload = r.payload;
117
+ return {
118
+ score: r.score,
119
+ text: payload.text,
120
+ title: payload.title,
121
+ url: payload.url,
122
+ heading_path: payload.heading_path,
123
+ type: payload.type,
124
+ doc_id: payload.doc_id,
125
+ };
126
+ });
127
+ }
128
+ /**
129
+ * Get collection stats
130
+ */
131
+ export async function getCollectionStats() {
132
+ const qdrant = getQdrantClient();
133
+ const { collectionName } = RAG_CONFIG.qdrant;
134
+ try {
135
+ const info = await qdrant.getCollection(collectionName);
136
+ return {
137
+ pointsCount: info.points_count ?? 0,
138
+ status: info.status,
139
+ };
140
+ }
141
+ catch {
142
+ return {
143
+ pointsCount: 0,
144
+ status: "not_found",
145
+ };
146
+ }
147
+ }
148
+ /**
149
+ * Hash string to unsigned integer (for Qdrant point ID)
150
+ */
151
+ function hashToUint(str) {
152
+ let hash = 0;
153
+ for (let i = 0; i < str.length; i++) {
154
+ const char = str.charCodeAt(i);
155
+ hash = (hash << 5) - hash + char;
156
+ hash = hash & hash; // Convert to 32bit integer
157
+ }
158
+ return Math.abs(hash);
159
+ }
160
+ //# sourceMappingURL=qdrant-client.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"qdrant-client.js","sourceRoot":"","sources":["../../src/rag/qdrant-client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,wBAAwB,CAAC;AACtD,OAAO,EAAsB,UAAU,EAAqB,MAAM,aAAa,CAAC;AAEhF,IAAI,MAAM,GAAwB,IAAI,CAAC;AAEvC;;GAEG;AACH,MAAM,UAAU,eAAe;IAC9B,IAAI,CAAC,MAAM,EAAE,CAAC;QACb,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC;QACnC,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC;QAE1C,IAAI,CAAC,GAAG,EAAE,CAAC;YACV,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,CAAC;QAChE,CAAC;QAED,MAAM,GAAG,IAAI,YAAY,CAAC;YACzB,GAAG;YACH,MAAM;SACN,CAAC,CAAC;IACJ,CAAC;IAED,OAAO,MAAM,CAAC;AACf,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB;IACrC,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;IACjC,MAAM,EAAE,cAAc,EAAE,UAAU,EAAE,QAAQ,EAAE,GAAG,UAAU,CAAC,MAAM,CAAC;IAEnE,MAAM,WAAW,GAAG,MAAM,MAAM,CAAC,cAAc,EAAE,CAAC;IAClD,MAAM,MAAM,GAAG,WAAW,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,cAAc,CAAC,CAAC;IAE9E,IAAI,CAAC,MAAM,EAAE,CAAC;QACb,OAAO,CAAC,GAAG,CAAC,wBAAwB,cAAc,EAAE,CAAC,CAAC;QACtD,MAAM,MAAM,CAAC,gBAAgB,CAAC,cAAc,EAAE;YAC7C,OAAO,EAAE;gBACR,IAAI,EAAE,UAAU;gBAChB,QAAQ;aACR;SACD,CAAC,CAAC;QACH,OAAO,CAAC,GAAG,CAAC,cAAc,cAAc,wBAAwB,CAAC,CAAC;IACnE,CAAC;SAAM,CAAC;QACP,OAAO,CAAC,GAAG,CAAC,cAAc,cAAc,kBAAkB,CAAC,CAAC;IAC7D,CAAC;AACF,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CACjC,MAAkE;IAElE,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;IACjC,MAAM,EAAE,cAAc,EAAE,GAAG,UAAU,CAAC,MAAM,CAAC;IAE7C,+DAA+D;IAC/D,MAAM,YAAY,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACvC,EAAE,EAAE,UAAU,CAAC,CAAC,CAAC,EAAE,CAAC;QACpB,MAAM,EAAE,CAAC,CAAC,MAAM;QAChB,OAAO,EAAE,CAAC,CAAC,OAAO;KAClB,CAAC,CAAC,CAAC;IAEJ,MAAM,MAAM,CAAC,MAAM,CAAC,cAAc,EAAE;QACnC,IAAI,EAAE,IAAI;QACV,MAAM,EAAE,YAAY;KACpB,CAAC,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACtC,QAAkB;IAElB,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;IACjC,MAAM,EAAE,cAAc,EAAE,GAAG,UAAU,CAAC,MAAM,CAAC;IAE7C,MAAM,OAAO,GAAG,IAAI,GAAG,EAAkB,CAAC;IAE1C,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,OAAO,CAAC;IAE1C,mCAAmC;IACnC,MAAM,UAAU,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC;IAExD,IAAI,CAAC;QACJ,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,cAAc,EAAE;YACpD,GAAG,EAAE,UAAU;YACf,YAAY,EAAE,CAAC,UAAU,EAAE,cAAc,CAAC;SAC1C,CAAC,CAAC;QAEH,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC5B,MAAM,OAAO,GAAG,KAAK,CAAC,OAAwB,CAAC;YAC/C,IAAI,OAAO,EAAE,QAAQ,IAAI,OAAO,EAAE,YAAY,EAAE,CAAC;gBAChD,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,EAAE,OAAO,CAAC,YAAY,CAAC,CAAC;YACrD,CAAC;QACF,CAAC;IACF,CAAC;IAAC,MAAM,CAAC;QACR,yCAAyC;IAC1C,CAAC;IAED,OAAO,OAAO,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,MAAM,CAC3B,WAAqB,EACrB,UAMI,EAAE;IAEN,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;IACjC,MAAM,EAAE,cAAc,EAAE,GAAG,UAAU,CAAC,MAAM,CAAC;IAC7C,MAAM,EAAE,IAAI,GAAG,EAAE,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC;IAEtC,0BAA0B;IAC1B,MAAM,gBAAgB,GAGjB,EAAE,CAAC;IAER,IAAI,MAAM,EAAE,IAAI,EAAE,CAAC;QAClB,gBAAgB,CAAC,IAAI,CAAC;YACrB,GAAG,EAAE,MAAM;YACX,KAAK,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,IAAI,EAAE;SAC7B,CAAC,CAAC;IACJ,CAAC;IAED,IAAI,MAAM,EAAE,WAAW,EAAE,CAAC;QACzB,gBAAgB,CAAC,IAAI,CAAC;YACrB,GAAG,EAAE,aAAa;YAClB,KAAK,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,WAAW,EAAE;SACpC,CAAC,CAAC;IACJ,CAAC;IAED,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,cAAc,EAAE;QAClD,MAAM,EAAE,WAAW;QACnB,KAAK,EAAE,IAAI;QACX,YAAY,EAAE,IAAI;QAClB,MAAM,EACL,gBAAgB,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,gBAAgB,EAAE,CAAC,CAAC,CAAC,SAAS;KACrE,CAAC,CAAC;IAEH,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QACvB,MAAM,OAAO,GAAG,CAAC,CAAC,OAAwB,CAAC;QAC3C,OAAO;YACN,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,IAAI,EAAE,OAAO,CAAC,IAAI;YAClB,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,GAAG,EAAE,OAAO,CAAC,GAAG;YAChB,YAAY,EAAE,OAAO,CAAC,YAAY;YAClC,IAAI,EAAE,OAAO,CAAC,IAAI;YAClB,MAAM,EAAE,OAAO,CAAC,MAAM;SACtB,CAAC;IACH,CAAC,CAAC,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB;IAIvC,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;IACjC,MAAM,EAAE,cAAc,EAAE,GAAG,UAAU,CAAC,MAAM,CAAC;IAE7C,IAAI,CAAC;QACJ,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,cAAc,CAAC,CAAC;QACxD,OAAO;YACN,WAAW,EAAE,IAAI,CAAC,YAAY,IAAI,CAAC;YACnC,MAAM,EAAE,IAAI,CAAC,MAAM;SACnB,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACR,OAAO;YACN,WAAW,EAAE,CAAC;YACd,MAAM,EAAE,WAAW;SACnB,CAAC;IACH,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,UAAU,CAAC,GAAW;IAC9B,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,IAAI,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QAC/B,IAAI,GAAG,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,IAAI,GAAG,IAAI,CAAC;QACjC,IAAI,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC,2BAA2B;IAChD,CAAC;IACD,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;AACvB,CAAC"}
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ export {};
3
+ //# sourceMappingURL=search.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"search.d.ts","sourceRoot":"","sources":["../../src/rag/search.ts"],"names":[],"mappings":""}
@@ -0,0 +1,105 @@
1
+ #!/usr/bin/env node
2
+ import { embed } from "./embedding.js";
3
+ import { getCollectionStats, search } from "./qdrant-client.js";
4
+ /**
5
+ * Search for documents matching a query
6
+ */
7
+ async function runSearch() {
8
+ // Parse command line arguments
9
+ const args = process.argv.slice(2);
10
+ if (args.length === 0) {
11
+ console.log("Usage: rag-search <query> [options]");
12
+ console.log("");
13
+ console.log("Options:");
14
+ console.log(" --top-k <number> Number of results (default: 5)");
15
+ console.log(" --type <type> Filter by type: markdown_note | bookmark_json");
16
+ console.log(" --heading <heading> Filter by top_heading");
17
+ console.log("");
18
+ console.log("Examples:");
19
+ console.log(' rag-search "ルネサンス 遠近法"');
20
+ console.log(' rag-search "AI 脆弱性" --type bookmark_json');
21
+ console.log(' rag-search "React" --heading javascript --top-k 10');
22
+ process.exit(1);
23
+ }
24
+ // Parse options
25
+ let query = "";
26
+ let topK = 5;
27
+ let filterType;
28
+ let filterHeading;
29
+ for (let i = 0; i < args.length; i++) {
30
+ if (args[i] === "--top-k" && args[i + 1]) {
31
+ topK = Number.parseInt(args[i + 1], 10);
32
+ i++;
33
+ }
34
+ else if (args[i] === "--type" && args[i + 1]) {
35
+ filterType = args[i + 1];
36
+ i++;
37
+ }
38
+ else if (args[i] === "--heading" && args[i + 1]) {
39
+ filterHeading = args[i + 1];
40
+ i++;
41
+ }
42
+ else if (!args[i].startsWith("--")) {
43
+ query = args[i];
44
+ }
45
+ }
46
+ if (!query) {
47
+ console.error("Error: Query is required");
48
+ process.exit(1);
49
+ }
50
+ // Check collection status
51
+ const stats = await getCollectionStats();
52
+ if (stats.status === "not_found") {
53
+ console.error("Error: Collection not found. Run ingest first.");
54
+ process.exit(1);
55
+ }
56
+ console.log(`Searching for: "${query}"`);
57
+ console.log(`Collection has ${stats.pointsCount} points\n`);
58
+ // Generate query embedding
59
+ console.log("Generating query embedding...");
60
+ const queryVector = await embed(query, true);
61
+ // Search
62
+ console.log("Searching...\n");
63
+ const results = await search(queryVector, {
64
+ topK,
65
+ filter: {
66
+ type: filterType,
67
+ top_heading: filterHeading,
68
+ },
69
+ });
70
+ // Display results
71
+ console.log(`Found ${results.length} results:\n`);
72
+ console.log("=".repeat(80));
73
+ for (let i = 0; i < results.length; i++) {
74
+ const r = results[i];
75
+ console.log(`\n[${i + 1}] Score: ${r.score.toFixed(4)}`);
76
+ console.log(` Title: ${r.title}`);
77
+ console.log(` Type: ${r.type}`);
78
+ console.log(` Path: ${r.heading_path.join(" > ")}`);
79
+ if (r.url) {
80
+ console.log(` URL: ${r.url}`);
81
+ }
82
+ console.log(` Text: ${r.text.slice(0, 200)}${r.text.length > 200 ? "..." : ""}`);
83
+ console.log("-".repeat(80));
84
+ }
85
+ }
86
+ async function main() {
87
+ const env = {
88
+ QDRANT_URL: process.env.QDRANT_URL,
89
+ };
90
+ if (!env.QDRANT_URL) {
91
+ throw new Error("QDRANT_URL environment variable is required.");
92
+ }
93
+ try {
94
+ await runSearch();
95
+ }
96
+ catch (error) {
97
+ console.error("❌ エラーが発生しました:", error);
98
+ process.exit(1);
99
+ }
100
+ }
101
+ main().catch((error) => {
102
+ console.error(error);
103
+ process.exit(1);
104
+ });
105
+ //# sourceMappingURL=search.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"search.js","sourceRoot":"","sources":["../../src/rag/search.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,KAAK,EAAE,MAAM,gBAAgB,CAAC;AACvC,OAAO,EAAE,kBAAkB,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEhE;;GAEG;AACH,KAAK,UAAU,SAAS;IACvB,+BAA+B;IAC/B,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAEnC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,GAAG,CAAC,qCAAqC,CAAC,CAAC;QACnD,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAChB,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;QACxB,OAAO,CAAC,GAAG,CAAC,uDAAuD,CAAC,CAAC;QACrE,OAAO,CAAC,GAAG,CACV,sEAAsE,CACtE,CAAC;QACF,OAAO,CAAC,GAAG,CAAC,8CAA8C,CAAC,CAAC;QAC5D,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAChB,OAAO,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;QACzB,OAAO,CAAC,GAAG,CAAC,0BAA0B,CAAC,CAAC;QACxC,OAAO,CAAC,GAAG,CAAC,4CAA4C,CAAC,CAAC;QAC1D,OAAO,CAAC,GAAG,CAAC,sDAAsD,CAAC,CAAC;QACpE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACjB,CAAC;IAED,gBAAgB;IAChB,IAAI,KAAK,GAAG,EAAE,CAAC;IACf,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,IAAI,UAAyD,CAAC;IAC9D,IAAI,aAAiC,CAAC;IAEtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,SAAS,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YAC1C,IAAI,GAAG,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACxC,CAAC,EAAE,CAAC;QACL,CAAC;aAAM,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,QAAQ,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YAChD,UAAU,GAAG,IAAI,CAAC,CAAC,GAAG,CAAC,CAAsC,CAAC;YAC9D,CAAC,EAAE,CAAC;QACL,CAAC;aAAM,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,WAAW,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YACnD,aAAa,GAAG,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YAC5B,CAAC,EAAE,CAAC;QACL,CAAC;aAAM,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;YACtC,KAAK,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACjB,CAAC;IACF,CAAC;IAED,IAAI,CAAC,KAAK,EAAE,CAAC;QACZ,OAAO,CAAC,KAAK,CAAC,0BAA0B,CAAC,CAAC;QAC1C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACjB,CAAC;IAED,0BAA0B;IAC1B,MAAM,KAAK,GAAG,MAAM,kBAAkB,EAAE,CAAC;IACzC,IAAI,KAAK,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;QAClC,OAAO,CAAC,KAAK,CAAC,gDAAgD,CAAC,CAAC;QAChE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACjB,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,mBAAmB,KAAK,GAAG,CAAC,CAAC;IACzC,OAAO,CAAC,GAAG,CAAC,kBAAkB,KAAK,CAAC,WAAW,WAAW,CAAC,CAAC;IAE5D,2BAA2B;IAC3B,OAAO,CAAC,GAAG,CAAC,+BAA+B,CAAC,CAAC;IAC7C,MAAM,WAAW,GAAG,MAAM,KAAK,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC;IAE7C,SAAS;IACT,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;IAC9B,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,WAAW,EAAE;QACzC,IAAI;QACJ,MAAM,EAAE;YACP,IAAI,EAAE,UAAU;YAChB,WAAW,EAAE,aAAa;SAC1B;KACD,CAAC,CAAC;IAEH,kBAAkB;IAClB,OAAO,CAAC,GAAG,CAAC,SAAS,OAAO,CAAC,MAAM,aAAa,CAAC,CAAC;IAClD,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;IAE5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACzC,MAAM,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;QACrB,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACzD,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;QACrC,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QACnC,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;QACvD,IAAI,CAAC,CAAC,GAAG,EAAE,CAAC;YACX,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC;QAClC,CAAC;QACD,OAAO,CAAC,GAAG,CACV,aAAa,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CACtE,CAAC;QACF,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC;IAC7B,CAAC;AACF,CAAC;AAED,KAAK,UAAU,IAAI;IAClB,MAAM,GAAG,GAAG;QACX,UAAU,EAAE,OAAO,CAAC,GAAG,CAAC,UAAU;KACzB,CAAC;IAEX,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,CAAC;QACrB,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;IACjE,CAAC;IAED,IAAI,CAAC;QACJ,MAAM,SAAS,EAAE,CAAC;IACnB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAChB,OAAO,CAAC,KAAK,CAAC,eAAe,EAAE,KAAK,CAAC,CAAC;QACtC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACjB,CAAC;AACF,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;IACtB,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACrB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AACjB,CAAC,CAAC,CAAC"}
@@ -4,7 +4,7 @@ import { mkdir, readdir, readFile, writeFile } from "node:fs/promises";
4
4
  import path from "node:path";
5
5
  import { createPushoverService } from "@s-hirano-ist/s-notification";
6
6
  import TurndownService from "turndown";
7
- const FETCHED_URLS_FILE = "script/fetched_urls.txt";
7
+ const FETCHED_URLS_FILE = "fetched_urls.txt";
8
8
  const JSON_DIR = "json/article";
9
9
  const OUTPUT_DIR = "raw/article";
10
10
  async function loadFetchedUrls() {
@@ -1 +1 @@
1
- {"version":3,"file":"update-raw-articles.js","sourceRoot":"","sources":["../src/update-raw-articles.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACrC,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AACvE,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AACrE,OAAO,eAAe,MAAM,UAAU,CAAC;AAEvC,MAAM,iBAAiB,GAAG,yBAAyB,CAAC;AACpD,MAAM,QAAQ,GAAG,cAAc,CAAC;AAChC,MAAM,UAAU,GAAG,aAAa,CAAC;AAEjC,KAAK,UAAU,eAAe;IAC7B,IAAI,CAAC;QACJ,IAAI,UAAU,CAAC,iBAAiB,CAAC,EAAE,CAAC;YACnC,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,iBAAiB,EAAE,OAAO,CAAC,CAAC;YAC3D,OAAO,IAAI,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QACjE,CAAC;IACF,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAChB,OAAO,CAAC,KAAK,CAAC,6BAA6B,EAAE,KAAK,CAAC,CAAC;IACrD,CAAC;IACD,OAAO,IAAI,GAAG,EAAE,CAAC;AAClB,CAAC;AAED,KAAK,UAAU,eAAe,CAAC,IAAiB;IAC/C,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;IAC3C,MAAM,SAAS,CAAC,iBAAiB,EAAE,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,OAAO,CAAC,CAAC;AACpE,CAAC;AAED,KAAK,UAAU,oBAAoB,CAAC,GAAW;IAC9C,IAAI,CAAC;QACJ,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YACjC,OAAO,EAAE;gBACR,YAAY,EACX,2HAA2H;aAC5H;SACD,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YAClB,MAAM,IAAI,KAAK,CAAC,uBAAuB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QAC3D,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QAEnC,MAAM,eAAe,GAAG,IAAI,eAAe,CAAC;YAC3C,YAAY,EAAE,KAAK;YACnB,cAAc,EAAE,QAAQ;YACxB,gBAAgB,EAAE,GAAG;YACrB,eAAe,EAAE,IAAI;YACrB,WAAW,EAAE,GAAG;SAChB,CAAC,CAAC;QAEH,eAAe,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC;QAEtE,eAAe,CAAC,OAAO,CAAC,eAAe,EAAE;YACxC,MAAM,EAAE,CAAC,GAAG,CAAC;YACb,WAAW,EAAE,CAAC,OAAO,EAAE,IAAI,EAAE,EAAE;gBAC9B,MAAM,OAAO,GAAG,IAAyB,CAAC;gBAC1C,MAAM,IAAI,GAAG,OAAO,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;gBAC1C,IAAI,CAAC,IAAI;oBAAE,OAAO,OAAO,CAAC;gBAE1B,IAAI,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;oBACzD,OAAO,IAAI,OAAO,KAAK,IAAI,GAAG,CAAC;gBAChC,CAAC;gBACD,IAAI,CAAC;oBACJ,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC;oBAC5C,OAAO,IAAI,OAAO,KAAK,WAAW,GAAG,CAAC;gBACvC,CAAC;gBAAC,MAAM,CAAC;oBACR,OAAO,OAAO,CAAC;gBAChB,CAAC;YACF,CAAC;SACD,CAAC,CAAC;QAEH,eAAe,CAAC,OAAO,CAAC,gBAAgB,EAAE;YACzC,MAAM,EAAE,CAAC,KAAK,CAAC;YACf,WAAW,EAAE,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE;gBAC/B,MAAM,OAAO,GAAG,IAAwB,CAAC;gBACzC,MAAM,GAAG,GAAG,OAAO,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC;gBACxC,MAAM,GAAG,GAAG,OAAO,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;gBAC9C,IAAI,CAAC,GAAG;oBAAE,OAAO,EAAE,CAAC;gBAEpB,IAAI,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,IAAI,GAAG,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;oBACvD,OAAO,KAAK,GAAG,KAAK,GAAG,GAAG,CAAC;gBAC5B,CAAC;gBACD,IAAI,CAAC;oBACJ,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC;oBAC3C,OAAO,KAAK,GAAG,KAAK,WAAW,GAAG,CAAC;gBACpC,CAAC;gBAAC,MAAM,CAAC;oBACR,OAAO,EAAE,CAAC;gBACX,CAAC;YACF,CAAC;SACD,CAAC,CAAC;QAEH,MAAM,QAAQ,GAAG,eAAe,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QAChD,OAAO,QAAQ,CAAC,IAAI,EAAE,CAAC;IACxB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAChB,OAAO,CAAC,KAAK,CAAC,mBAAmB,GAAG,GAAG,EAAE,KAAK,CAAC,CAAC;QAChD,OAAO,EAAE,CAAC;IACX,CAAC;AACF,CAAC;AAeD,KAAK,UAAU,cAAc,CAC5B,QAAgB,EAChB,WAAwB;IAExB,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAClD,MAAM,IAAI,GAAiB,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAE/C,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;IAEjC,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;QAC7B,MAAM,KAAK,CAAC,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC9C,CAAC;IAED,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;QAC7B,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,IAAI,CAAC;QACzC,IAAI,CAAC;YACJ,IAAI,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,IAAI,EAAE,CAAC;gBAClC,SAAS;YACV,CAAC;YAED,MAAM,WAAW,GAAG,MAAM,oBAAoB,CAAC,GAAG,CAAC,CAAC;YAEpD,IAAI,CAAC,WAAW,EAAE,CAAC;gBAClB,SAAS;YACV,CAAC;YAED,MAAM,WAAW,GAAG,GAAG,CAAC,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC,CAAC;YACpD,MAAM,OAAO,GAAG,kBAAkB,CAAC,WAAW,CAAC,CAAC;YAChD,MAAM,cAAc,GAAG,GAAG,OAAO,KAAK,CAAC;YACvC,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,cAAc,CAAC,CAAC;YAEzD,MAAM,eAAe,GAAG,MAAM,KAAK,KAAK,GAAG;;;;;;EAM5C,KAAK;;;;EAIL,WAAW;CACZ,CAAC;YAEC,MAAM,SAAS,CAAC,UAAU,EAAE,eAAe,EAAE,OAAO,CAAC,CAAC;YACtD,OAAO,CAAC,GAAG,CAAC,aAAa,UAAU,EAAE,CAAC,CAAC;YAEvC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACtB,CAAC;QAAC,OAAO,MAAM,EAAE,CAAC;YACjB,OAAO,CAAC,KAAK,CAAC,gBAAgB,EAAE,GAAG,CAAC,CAAC;QACtC,CAAC;IACF,CAAC;AACF,CAAC;AAED,KAAK,UAAU,IAAI;IAClB,MAAM,GAAG,GAAG;QACX,YAAY,EAAE,OAAO,CAAC,GAAG,CAAC,YAAY;QACtC,iBAAiB,EAAE,OAAO,CAAC,GAAG,CAAC,iBAAiB;QAChD,kBAAkB,EAAE,OAAO,CAAC,GAAG,CAAC,kBAAkB;KACzC,CAAC;IAEX,IAAI,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QACxC,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,CAAC;IAChE,CAAC;IAED,MAAM,mBAAmB,GAAG,qBAAqB,CAAC;QACjD,GAAG,EAAE,GAAG,CAAC,YAAY,IAAI,EAAE;QAC3B,OAAO,EAAE,GAAG,CAAC,iBAAiB,IAAI,EAAE;QACpC,QAAQ,EAAE,GAAG,CAAC,kBAAkB,IAAI,EAAE;KACtC,CAAC,CAAC;IAEH,IAAI,CAAC;QACJ,MAAM,WAAW,GAAG,MAAM,eAAe,EAAE,CAAC;QAE5C,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,QAAQ,CAAC,CAAC;QAC1C,MAAM,aAAa,GAAG,SAAS;aAC7B,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;aACxC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC,CAAC;QAE3C,KAAK,MAAM,QAAQ,IAAI,aAAa,EAAE,CAAC;YACtC,OAAO,CAAC,GAAG,CAAC,eAAe,QAAQ,EAAE,CAAC,CAAC;YACvC,MAAM,cAAc,CAAC,QAAQ,EAAE,WAAW,CAAC,CAAC;QAC7C,CAAC;QAED,MAAM,eAAe,CAAC,WAAW,CAAC,CAAC;QACnC,MAAM,mBAAmB,CAAC,UAAU,CAAC,+BAA+B,EAAE;YACrE,MAAM,EAAE,qBAAqB;SAC7B,CAAC,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAChB,OAAO,CAAC,KAAK,CAAC,wBAAwB,EAAE,KAAK,CAAC,CAAC;QAC/C,MAAM,mBAAmB,CAAC,WAAW,CACpC,+BAA+B,KAAK,EAAE,EACtC;YACC,MAAM,EAAE,qBAAqB;SAC7B,CACD,CAAC;QACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACjB,CAAC;AACF,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;IACtB,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACrB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AACjB,CAAC,CAAC,CAAC"}
1
+ {"version":3,"file":"update-raw-articles.js","sourceRoot":"","sources":["../src/update-raw-articles.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAC;AACrC,OAAO,EAAE,KAAK,EAAE,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AACvE,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AACrE,OAAO,eAAe,MAAM,UAAU,CAAC;AAEvC,MAAM,iBAAiB,GAAG,kBAAkB,CAAC;AAC7C,MAAM,QAAQ,GAAG,cAAc,CAAC;AAChC,MAAM,UAAU,GAAG,aAAa,CAAC;AAEjC,KAAK,UAAU,eAAe;IAC7B,IAAI,CAAC;QACJ,IAAI,UAAU,CAAC,iBAAiB,CAAC,EAAE,CAAC;YACnC,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,iBAAiB,EAAE,OAAO,CAAC,CAAC;YAC3D,OAAO,IAAI,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;QACjE,CAAC;IACF,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAChB,OAAO,CAAC,KAAK,CAAC,6BAA6B,EAAE,KAAK,CAAC,CAAC;IACrD,CAAC;IACD,OAAO,IAAI,GAAG,EAAE,CAAC;AAClB,CAAC;AAED,KAAK,UAAU,eAAe,CAAC,IAAiB;IAC/C,MAAM,UAAU,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;IAC3C,MAAM,SAAS,CAAC,iBAAiB,EAAE,UAAU,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,OAAO,CAAC,CAAC;AACpE,CAAC;AAED,KAAK,UAAU,oBAAoB,CAAC,GAAW;IAC9C,IAAI,CAAC;QACJ,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YACjC,OAAO,EAAE;gBACR,YAAY,EACX,2HAA2H;aAC5H;SACD,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YAClB,MAAM,IAAI,KAAK,CAAC,uBAAuB,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;QAC3D,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;QAEnC,MAAM,eAAe,GAAG,IAAI,eAAe,CAAC;YAC3C,YAAY,EAAE,KAAK;YACnB,cAAc,EAAE,QAAQ;YACxB,gBAAgB,EAAE,GAAG;YACrB,eAAe,EAAE,IAAI;YACrB,WAAW,EAAE,GAAG;SAChB,CAAC,CAAC;QAEH,eAAe,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC;QAEtE,eAAe,CAAC,OAAO,CAAC,eAAe,EAAE;YACxC,MAAM,EAAE,CAAC,GAAG,CAAC;YACb,WAAW,EAAE,CAAC,OAAO,EAAE,IAAI,EAAE,EAAE;gBAC9B,MAAM,OAAO,GAAG,IAAyB,CAAC;gBAC1C,MAAM,IAAI,GAAG,OAAO,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;gBAC1C,IAAI,CAAC,IAAI;oBAAE,OAAO,OAAO,CAAC;gBAE1B,IAAI,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;oBACzD,OAAO,IAAI,OAAO,KAAK,IAAI,GAAG,CAAC;gBAChC,CAAC;gBACD,IAAI,CAAC;oBACJ,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC;oBAC5C,OAAO,IAAI,OAAO,KAAK,WAAW,GAAG,CAAC;gBACvC,CAAC;gBAAC,MAAM,CAAC;oBACR,OAAO,OAAO,CAAC;gBAChB,CAAC;YACF,CAAC;SACD,CAAC,CAAC;QAEH,eAAe,CAAC,OAAO,CAAC,gBAAgB,EAAE;YACzC,MAAM,EAAE,CAAC,KAAK,CAAC;YACf,WAAW,EAAE,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE;gBAC/B,MAAM,OAAO,GAAG,IAAwB,CAAC;gBACzC,MAAM,GAAG,GAAG,OAAO,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC;gBACxC,MAAM,GAAG,GAAG,OAAO,CAAC,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;gBAC9C,IAAI,CAAC,GAAG;oBAAE,OAAO,EAAE,CAAC;gBAEpB,IAAI,GAAG,CAAC,UAAU,CAAC,MAAM,CAAC,IAAI,GAAG,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;oBACvD,OAAO,KAAK,GAAG,KAAK,GAAG,GAAG,CAAC;gBAC5B,CAAC;gBACD,IAAI,CAAC;oBACJ,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC;oBAC3C,OAAO,KAAK,GAAG,KAAK,WAAW,GAAG,CAAC;gBACpC,CAAC;gBAAC,MAAM,CAAC;oBACR,OAAO,EAAE,CAAC;gBACX,CAAC;YACF,CAAC;SACD,CAAC,CAAC;QAEH,MAAM,QAAQ,GAAG,eAAe,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QAChD,OAAO,QAAQ,CAAC,IAAI,EAAE,CAAC;IACxB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAChB,OAAO,CAAC,KAAK,CAAC,mBAAmB,GAAG,GAAG,EAAE,KAAK,CAAC,CAAC;QAChD,OAAO,EAAE,CAAC;IACX,CAAC;AACF,CAAC;AAeD,KAAK,UAAU,cAAc,CAC5B,QAAgB,EAChB,WAAwB;IAExB,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAClD,MAAM,IAAI,GAAiB,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAE/C,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;IAEjC,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;QAC7B,MAAM,KAAK,CAAC,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC9C,CAAC;IAED,KAAK,MAAM,IAAI,IAAI,QAAQ,EAAE,CAAC;QAC7B,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,IAAI,CAAC;QACzC,IAAI,CAAC;YACJ,IAAI,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,IAAI,EAAE,CAAC;gBAClC,SAAS;YACV,CAAC;YAED,MAAM,WAAW,GAAG,MAAM,oBAAoB,CAAC,GAAG,CAAC,CAAC;YAEpD,IAAI,CAAC,WAAW,EAAE,CAAC;gBAClB,SAAS;YACV,CAAC;YAED,MAAM,WAAW,GAAG,GAAG,CAAC,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC,CAAC;YACpD,MAAM,OAAO,GAAG,kBAAkB,CAAC,WAAW,CAAC,CAAC;YAChD,MAAM,cAAc,GAAG,GAAG,OAAO,KAAK,CAAC;YACvC,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,cAAc,CAAC,CAAC;YAEzD,MAAM,eAAe,GAAG,MAAM,KAAK,KAAK,GAAG;;;;;;EAM5C,KAAK;;;;EAIL,WAAW;CACZ,CAAC;YAEC,MAAM,SAAS,CAAC,UAAU,EAAE,eAAe,EAAE,OAAO,CAAC,CAAC;YACtD,OAAO,CAAC,GAAG,CAAC,aAAa,UAAU,EAAE,CAAC,CAAC;YAEvC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACtB,CAAC;QAAC,OAAO,MAAM,EAAE,CAAC;YACjB,OAAO,CAAC,KAAK,CAAC,gBAAgB,EAAE,GAAG,CAAC,CAAC;QACtC,CAAC;IACF,CAAC;AACF,CAAC;AAED,KAAK,UAAU,IAAI;IAClB,MAAM,GAAG,GAAG;QACX,YAAY,EAAE,OAAO,CAAC,GAAG,CAAC,YAAY;QACtC,iBAAiB,EAAE,OAAO,CAAC,GAAG,CAAC,iBAAiB;QAChD,kBAAkB,EAAE,OAAO,CAAC,GAAG,CAAC,kBAAkB;KACzC,CAAC;IAEX,IAAI,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QACxC,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,CAAC;IAChE,CAAC;IAED,MAAM,mBAAmB,GAAG,qBAAqB,CAAC;QACjD,GAAG,EAAE,GAAG,CAAC,YAAY,IAAI,EAAE;QAC3B,OAAO,EAAE,GAAG,CAAC,iBAAiB,IAAI,EAAE;QACpC,QAAQ,EAAE,GAAG,CAAC,kBAAkB,IAAI,EAAE;KACtC,CAAC,CAAC;IAEH,IAAI,CAAC;QACJ,MAAM,WAAW,GAAG,MAAM,eAAe,EAAE,CAAC;QAE5C,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,QAAQ,CAAC,CAAC;QAC1C,MAAM,aAAa,GAAG,SAAS;aAC7B,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;aACxC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC,CAAC;QAE3C,KAAK,MAAM,QAAQ,IAAI,aAAa,EAAE,CAAC;YACtC,OAAO,CAAC,GAAG,CAAC,eAAe,QAAQ,EAAE,CAAC,CAAC;YACvC,MAAM,cAAc,CAAC,QAAQ,EAAE,WAAW,CAAC,CAAC;QAC7C,CAAC;QAED,MAAM,eAAe,CAAC,WAAW,CAAC,CAAC;QACnC,MAAM,mBAAmB,CAAC,UAAU,CAAC,+BAA+B,EAAE;YACrE,MAAM,EAAE,qBAAqB;SAC7B,CAAC,CAAC;IACJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QAChB,OAAO,CAAC,KAAK,CAAC,wBAAwB,EAAE,KAAK,CAAC,CAAC;QAC/C,MAAM,mBAAmB,CAAC,WAAW,CACpC,+BAA+B,KAAK,EAAE,EACtC;YACC,MAAM,EAAE,qBAAqB;SAC7B,CACD,CAAC;QACF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACjB,CAAC;AACF,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;IACtB,OAAO,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IACrB,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AACjB,CAAC,CAAC,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@s-hirano-ist/s-scripts",
3
- "version": "1.5.1",
3
+ "version": "1.6.0",
4
4
  "description": "CLI scripts for s-private data operations",
5
5
  "type": "module",
6
6
  "bin": {
@@ -9,6 +9,8 @@
9
9
  "fetch-images": "./dist/fetch-images.js",
10
10
  "fetch-notes": "./dist/fetch-notes.js",
11
11
  "find-duplicate-json-articles": "./dist/find-duplicate-json-articles.js",
12
+ "rag-ingest": "./dist/rag/ingest.js",
13
+ "rag-search": "./dist/rag/search.js",
12
14
  "reset-articles": "./dist/reset-articles.js",
13
15
  "reset-books": "./dist/reset-books.js",
14
16
  "reset-images": "./dist/reset-images.js",
@@ -35,12 +37,15 @@
35
37
  "access": "public"
36
38
  },
37
39
  "dependencies": {
40
+ "@huggingface/transformers": "3.5.1",
41
+ "@qdrant/js-client-rest": "1.13.0",
42
+ "glob": "11.0.2",
38
43
  "jsdom": "26.0.0",
39
44
  "minio": "8.0.5",
40
45
  "turndown": "7.2.0",
41
- "@s-hirano-ist/s-core": "1.5.1",
42
- "@s-hirano-ist/s-database": "1.5.1",
43
- "@s-hirano-ist/s-notification": "1.5.1"
46
+ "@s-hirano-ist/s-core": "1.6.0",
47
+ "@s-hirano-ist/s-database": "1.6.0",
48
+ "@s-hirano-ist/s-notification": "1.6.0"
44
49
  },
45
50
  "devDependencies": {
46
51
  "@types/jsdom": "21.1.7",
@@ -58,6 +63,8 @@
58
63
  "fetch-images": "tsx src/fetch-images.ts",
59
64
  "fetch-notes": "tsx src/fetch-notes.ts",
60
65
  "find-duplicate-json-articles": "tsx src/find-duplicate-json-articles.ts",
66
+ "rag-ingest": "tsx src/rag/ingest.ts",
67
+ "rag-search": "tsx src/rag/search.ts",
61
68
  "reset-articles": "tsx src/reset-articles.ts",
62
69
  "reset-books": "tsx src/reset-books.ts",
63
70
  "reset-images": "tsx src/reset-images.ts",