@s-hirano-ist/s-scripts 1.12.2 → 1.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cleanup-minio-images.d.ts +3 -0
- package/dist/cleanup-minio-images.d.ts.map +1 -0
- package/dist/cleanup-minio-images.js +111 -0
- package/dist/cleanup-minio-images.js.map +1 -0
- package/dist/fetch-articles.js +1 -3
- package/dist/fetch-articles.js.map +1 -1
- package/dist/fetch-books.js +1 -3
- package/dist/fetch-books.js.map +1 -1
- package/dist/fetch-images.js +1 -3
- package/dist/fetch-images.js.map +1 -1
- package/dist/fetch-notes.js +1 -3
- package/dist/fetch-notes.js.map +1 -1
- package/dist/infrastructures/articles-command-repository.d.ts +11 -1
- package/dist/infrastructures/articles-command-repository.d.ts.map +1 -1
- package/dist/infrastructures/articles-command-repository.js.map +1 -1
- package/dist/infrastructures/books-command-repository.d.ts +11 -1
- package/dist/infrastructures/books-command-repository.d.ts.map +1 -1
- package/dist/infrastructures/books-command-repository.js.map +1 -1
- package/dist/infrastructures/images-command-repository.d.ts +11 -1
- package/dist/infrastructures/images-command-repository.d.ts.map +1 -1
- package/dist/infrastructures/images-command-repository.js.map +1 -1
- package/dist/infrastructures/notes-command-repository.d.ts +11 -1
- package/dist/infrastructures/notes-command-repository.d.ts.map +1 -1
- package/dist/infrastructures/notes-command-repository.js.map +1 -1
- package/dist/ingest-articles.d.ts +3 -0
- package/dist/ingest-articles.d.ts.map +1 -0
- package/dist/ingest-articles.js +230 -0
- package/dist/ingest-articles.js.map +1 -0
- package/dist/ingest-books.d.ts +3 -0
- package/dist/ingest-books.d.ts.map +1 -0
- package/dist/ingest-books.js +167 -0
- package/dist/ingest-books.js.map +1 -0
- package/dist/ingest-images.d.ts +3 -0
- package/dist/ingest-images.d.ts.map +1 -0
- package/dist/ingest-images.js +196 -0
- package/dist/ingest-images.js.map +1 -0
- package/dist/ingest-notes.d.ts +3 -0
- package/dist/ingest-notes.d.ts.map +1 -0
- package/dist/ingest-notes.js +187 -0
- package/dist/ingest-notes.js.map +1 -0
- package/dist/rag/ingest-config.d.ts +8 -0
- package/dist/rag/ingest-config.d.ts.map +1 -0
- package/dist/rag/ingest-config.js +8 -0
- package/dist/rag/ingest-config.js.map +1 -0
- package/dist/rag/ingest.d.ts +1 -1
- package/dist/rag/ingest.d.ts.map +1 -1
- package/dist/rag/ingest.js +51 -82
- package/dist/rag/ingest.js.map +1 -1
- package/dist/rag/search.d.ts +1 -1
- package/dist/rag/search.d.ts.map +1 -1
- package/dist/rag/search.js +57 -69
- package/dist/rag/search.js.map +1 -1
- package/dist/reset-articles.js +1 -3
- package/dist/reset-articles.js.map +1 -1
- package/dist/reset-books.js +1 -3
- package/dist/reset-books.js.map +1 -1
- package/dist/reset-images.js +1 -3
- package/dist/reset-images.js.map +1 -1
- package/dist/reset-notes.js +1 -3
- package/dist/reset-notes.js.map +1 -1
- package/dist/revert-articles.js +1 -3
- package/dist/revert-articles.js.map +1 -1
- package/dist/revert-books.js +1 -3
- package/dist/revert-books.js.map +1 -1
- package/dist/revert-images.js +1 -3
- package/dist/revert-images.js.map +1 -1
- package/dist/revert-notes.js +1 -3
- package/dist/revert-notes.js.map +1 -1
- package/dist/update-raw-articles.js +40 -26
- package/dist/update-raw-articles.js.map +1 -1
- package/package.json +20 -8
- package/dist/rag/chunker.d.ts +0 -10
- package/dist/rag/chunker.d.ts.map +0 -1
- package/dist/rag/chunker.js +0 -188
- package/dist/rag/chunker.js.map +0 -1
- package/dist/rag/config.d.ts +0 -44
- package/dist/rag/config.d.ts.map +0 -1
- package/dist/rag/config.js +0 -34
- package/dist/rag/config.js.map +0 -1
- package/dist/rag/embedding.d.ts +0 -15
- package/dist/rag/embedding.d.ts.map +0 -1
- package/dist/rag/embedding.js +0 -61
- package/dist/rag/embedding.js.map +0 -1
- package/dist/rag/qdrant-client.d.ts +0 -40
- package/dist/rag/qdrant-client.d.ts.map +0 -1
- package/dist/rag/qdrant-client.js +0 -160
- package/dist/rag/qdrant-client.js.map +0 -1
package/dist/rag/chunker.js
DELETED
|
@@ -1,188 +0,0 @@
|
|
|
1
|
-
import { createHash } from "node:crypto";
|
|
2
|
-
import { RAG_CONFIG } from "./config.js";
|
|
3
|
-
/**
|
|
4
|
-
* Generate content hash for change detection
|
|
5
|
-
*/
|
|
6
|
-
function generateHash(content) {
|
|
7
|
-
return createHash("sha256").update(content).digest("hex").slice(0, 16);
|
|
8
|
-
}
|
|
9
|
-
/**
|
|
10
|
-
* Generate chunk ID from doc_id and index
|
|
11
|
-
*/
|
|
12
|
-
function generateChunkId(docId, index) {
|
|
13
|
-
return `${docId}#${index}`;
|
|
14
|
-
}
|
|
15
|
-
/**
|
|
16
|
-
* Parse JSON article file and generate chunks
|
|
17
|
-
*/
|
|
18
|
-
export function parseJsonArticle(filePath, content) {
|
|
19
|
-
const json = JSON.parse(content);
|
|
20
|
-
const docId = `file:${filePath}`;
|
|
21
|
-
const chunks = [];
|
|
22
|
-
for (let i = 0; i < json.body.length; i++) {
|
|
23
|
-
const item = json.body[i];
|
|
24
|
-
// Build text from available fields
|
|
25
|
-
const textParts = [];
|
|
26
|
-
if (item.title)
|
|
27
|
-
textParts.push(item.title);
|
|
28
|
-
if (item.ogTitle && item.ogTitle !== item.title)
|
|
29
|
-
textParts.push(item.ogTitle);
|
|
30
|
-
if (item.ogDescription)
|
|
31
|
-
textParts.push(item.ogDescription);
|
|
32
|
-
if (item.quote)
|
|
33
|
-
textParts.push(item.quote);
|
|
34
|
-
if (item.url)
|
|
35
|
-
textParts.push(item.url);
|
|
36
|
-
const text = textParts.join("\n");
|
|
37
|
-
// Skip empty items
|
|
38
|
-
if (!text.trim())
|
|
39
|
-
continue;
|
|
40
|
-
const chunkId = generateChunkId(docId, i);
|
|
41
|
-
chunks.push({
|
|
42
|
-
type: "bookmark_json",
|
|
43
|
-
top_heading: json.heading,
|
|
44
|
-
doc_id: docId,
|
|
45
|
-
chunk_id: chunkId,
|
|
46
|
-
title: item.title || item.ogTitle || "Untitled",
|
|
47
|
-
url: item.url,
|
|
48
|
-
heading_path: [json.heading],
|
|
49
|
-
text,
|
|
50
|
-
content_hash: generateHash(text),
|
|
51
|
-
});
|
|
52
|
-
}
|
|
53
|
-
return chunks;
|
|
54
|
-
}
|
|
55
|
-
/**
|
|
56
|
-
* Parse Markdown frontmatter
|
|
57
|
-
*/
|
|
58
|
-
function parseFrontmatter(content) {
|
|
59
|
-
const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
|
|
60
|
-
if (!frontmatterMatch) {
|
|
61
|
-
return {
|
|
62
|
-
frontmatter: { heading: "unknown" },
|
|
63
|
-
body: content,
|
|
64
|
-
};
|
|
65
|
-
}
|
|
66
|
-
const frontmatterStr = frontmatterMatch[1];
|
|
67
|
-
const body = frontmatterMatch[2];
|
|
68
|
-
// Simple YAML parsing for our needs
|
|
69
|
-
const frontmatter = { heading: "unknown" };
|
|
70
|
-
for (const line of frontmatterStr.split("\n")) {
|
|
71
|
-
const [key, ...valueParts] = line.split(":");
|
|
72
|
-
const value = valueParts.join(":").trim();
|
|
73
|
-
if (key === "heading") {
|
|
74
|
-
frontmatter.heading = value;
|
|
75
|
-
}
|
|
76
|
-
else if (key === "description") {
|
|
77
|
-
frontmatter.description = value;
|
|
78
|
-
}
|
|
79
|
-
else if (key === "draft") {
|
|
80
|
-
frontmatter.draft = value === "true";
|
|
81
|
-
}
|
|
82
|
-
}
|
|
83
|
-
return { frontmatter, body };
|
|
84
|
-
}
|
|
85
|
-
/**
|
|
86
|
-
* Split markdown into sections by headings
|
|
87
|
-
*/
|
|
88
|
-
function splitMarkdownByHeadings(content) {
|
|
89
|
-
const lines = content.split("\n");
|
|
90
|
-
const sections = [];
|
|
91
|
-
let currentHeadingPath = [];
|
|
92
|
-
let currentSection = null;
|
|
93
|
-
const headingStack = [];
|
|
94
|
-
for (const line of lines) {
|
|
95
|
-
const headingMatch = line.match(/^(#{2,3})\s+(.+)$/);
|
|
96
|
-
if (headingMatch) {
|
|
97
|
-
// Save previous section
|
|
98
|
-
if (currentSection?.content.trim()) {
|
|
99
|
-
sections.push(currentSection);
|
|
100
|
-
}
|
|
101
|
-
const level = headingMatch[1].length;
|
|
102
|
-
const title = headingMatch[2];
|
|
103
|
-
// Update heading stack
|
|
104
|
-
while (headingStack.length > 0 &&
|
|
105
|
-
headingStack[headingStack.length - 1].level >= level) {
|
|
106
|
-
headingStack.pop();
|
|
107
|
-
}
|
|
108
|
-
headingStack.push({ level, title });
|
|
109
|
-
// Update heading path
|
|
110
|
-
currentHeadingPath = headingStack.map((h) => h.title);
|
|
111
|
-
currentSection = {
|
|
112
|
-
headingPath: [...currentHeadingPath],
|
|
113
|
-
title,
|
|
114
|
-
content: "",
|
|
115
|
-
level,
|
|
116
|
-
};
|
|
117
|
-
}
|
|
118
|
-
else if (currentSection) {
|
|
119
|
-
currentSection.content += `${line}\n`;
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
// Save last section
|
|
123
|
-
if (currentSection?.content.trim()) {
|
|
124
|
-
sections.push(currentSection);
|
|
125
|
-
}
|
|
126
|
-
return sections;
|
|
127
|
-
}
|
|
128
|
-
/**
|
|
129
|
-
* Split long text into smaller chunks by paragraphs
|
|
130
|
-
*/
|
|
131
|
-
function splitByParagraphs(text, maxLength) {
|
|
132
|
-
if (text.length <= maxLength) {
|
|
133
|
-
return [text];
|
|
134
|
-
}
|
|
135
|
-
const paragraphs = text.split(/\n\n+/);
|
|
136
|
-
const chunks = [];
|
|
137
|
-
let currentChunk = "";
|
|
138
|
-
for (const para of paragraphs) {
|
|
139
|
-
if (currentChunk.length + para.length > maxLength && currentChunk) {
|
|
140
|
-
chunks.push(currentChunk.trim());
|
|
141
|
-
currentChunk = para;
|
|
142
|
-
}
|
|
143
|
-
else {
|
|
144
|
-
currentChunk += (currentChunk ? "\n\n" : "") + para;
|
|
145
|
-
}
|
|
146
|
-
}
|
|
147
|
-
if (currentChunk.trim()) {
|
|
148
|
-
chunks.push(currentChunk.trim());
|
|
149
|
-
}
|
|
150
|
-
return chunks;
|
|
151
|
-
}
|
|
152
|
-
/**
|
|
153
|
-
* Parse Markdown file and generate chunks
|
|
154
|
-
*/
|
|
155
|
-
export function parseMarkdown(filePath, content) {
|
|
156
|
-
const { frontmatter, body } = parseFrontmatter(content);
|
|
157
|
-
const docId = `file:${filePath}`;
|
|
158
|
-
const chunks = [];
|
|
159
|
-
// Skip draft files
|
|
160
|
-
if (frontmatter.draft) {
|
|
161
|
-
return [];
|
|
162
|
-
}
|
|
163
|
-
const sections = splitMarkdownByHeadings(body);
|
|
164
|
-
let chunkIndex = 0;
|
|
165
|
-
for (const section of sections) {
|
|
166
|
-
// Split long sections
|
|
167
|
-
const textChunks = splitByParagraphs(section.content, RAG_CONFIG.chunking.maxChunkLength);
|
|
168
|
-
for (const text of textChunks) {
|
|
169
|
-
if (!text.trim())
|
|
170
|
-
continue;
|
|
171
|
-
const fullHeadingPath = [frontmatter.heading, ...section.headingPath];
|
|
172
|
-
const chunkId = generateChunkId(docId, chunkIndex);
|
|
173
|
-
chunks.push({
|
|
174
|
-
type: "markdown_note",
|
|
175
|
-
top_heading: frontmatter.heading,
|
|
176
|
-
doc_id: docId,
|
|
177
|
-
chunk_id: chunkId,
|
|
178
|
-
title: section.title,
|
|
179
|
-
heading_path: fullHeadingPath,
|
|
180
|
-
text,
|
|
181
|
-
content_hash: generateHash(text),
|
|
182
|
-
});
|
|
183
|
-
chunkIndex++;
|
|
184
|
-
}
|
|
185
|
-
}
|
|
186
|
-
return chunks;
|
|
187
|
-
}
|
|
188
|
-
//# sourceMappingURL=chunker.js.map
|
package/dist/rag/chunker.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"chunker.js","sourceRoot":"","sources":["../../src/rag/chunker.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAsB,UAAU,EAAE,MAAM,UAAU,CAAC;AAwB1D;;GAEG;AACH,SAAS,YAAY,CAAC,OAAe;IACpC,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AACxE,CAAC;AAED;;GAEG;AACH,SAAS,eAAe,CAAC,KAAa,EAAE,KAAa;IACpD,OAAO,GAAG,KAAK,IAAI,KAAK,EAAE,CAAC;AAC5B,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAC/B,QAAgB,EAChB,OAAe;IAEf,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAgB,CAAC;IAChD,MAAM,KAAK,GAAG,QAAQ,QAAQ,EAAE,CAAC;IACjC,MAAM,MAAM,GAAoB,EAAE,CAAC;IAEnC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3C,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAE1B,mCAAmC;QACnC,MAAM,SAAS,GAAa,EAAE,CAAC;QAC/B,IAAI,IAAI,CAAC,KAAK;YAAE,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC3C,IAAI,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO,KAAK,IAAI,CAAC,KAAK;YAC9C,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC9B,IAAI,IAAI,CAAC,aAAa;YAAE,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;QAC3D,IAAI,IAAI,CAAC,KAAK;YAAE,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC3C,IAAI,IAAI,CAAC,GAAG;YAAE,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAEvC,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAElC,mBAAmB;QACnB,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;YAAE,SAAS;QAE3B,MAAM,OAAO,GAAG,eAAe,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QAE1C,MAAM,CAAC,IAAI,CAAC;YACX,IAAI,EAAE,eAAe;YACrB,WAAW,EAAE,IAAI,CAAC,OAAO;YACzB,MAAM,EAAE,KAAK;YACb,QAAQ,EAAE,OAAO;YACjB,KAAK,EAAE,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,OAAO,IAAI,UAAU;YAC/C,GAAG,EAAE,IAAI,CAAC,GAAG;YACb,YAAY,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC;YAC5B,IAAI;YACJ,YAAY,EAAE,YAAY,CAAC,IAAI,CAAC;SAChC,CAAC,CAAC;IACJ,CAAC;IAED,OAAO,MAAM,CAAC;AACf,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,OAAe;IAIxC,MAAM,gBAAgB,GAAG,OAAO,CAAC,KAAK,CAAC,mCAAmC,CAAC,CAAC;IAE5E,IAAI,CAAC,gBAAgB,EAAE,CAAC;QACvB,OAAO;YACN,WAAW,EAAE,EAAE,OAAO,EAAE,SAAS,EAAE;YACnC,IAAI,EAAE,OAAO;SACb,CAAC;IACH,CAAC;IAED,MAAM,cAAc,GAAG,gBAAgB,CAAC,CAAC,CAAC,CAAC;IAC3C,MAAM,IAAI,GAAG,gBAAgB,CAAC,CAAC,CAAC,CAAC;IAEjC,oCAAoC;IACpC,MAAM,WAAW,GAAwB,EAAE,OAAO,EAAE,SAAS,EAAE,CAAC;IAEhE,KAAK,MAAM,IAAI,IAAI,cAAc,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;QAC/C,MAAM,CAAC,GAAG,EAAE,GAAG,UAAU,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC7C,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QAE1C,IAAI,GAAG,KAAK,SAAS,EAAE,CAAC;YACvB,WAAW,CAAC,OAAO,GAAG,KAAK,CAAC;QAC7B,CAAC;aAAM,IAAI,GAAG,KAAK,aAAa,EAAE,CAAC;YAClC,WAAW,CAAC,WAAW,GAAG,KAAK,CAAC;QACjC,CAAC;aAAM,IAAI,GAAG,KAAK,OAAO,EAAE,CAAC;YAC5B,WAAW,CAAC,KAAK,GAAG,KAAK,KAAK,MAAM,CAAC;QACtC,CAAC;IACF,CAAC;IAED,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;AAC9B,CAAC;AASD;;GAEG;AACH,SAAS,uBAAuB,CAAC,OAAe;IAC/C,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAClC,MAAM,QAAQ,GAAsB,EAAE,CAAC;IAEvC,IAAI,kBAAkB,GAAa,EAAE,CAAC;IACtC,IAAI,cAAc,GAA2B,IAAI,CAAC;IAClD,MAAM,YAAY,GAAuC,EAAE,CAAC;IAE5D,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QAC1B,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,mBAAmB,CAAC,CAAC;QAErD,IAAI,YAAY,EAAE,CAAC;YAClB,wBAAwB;YACxB,IAAI,cAAc,EAAE,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;gBACpC,QAAQ,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;YAC/B,CAAC;YAED,MAAM,KAAK,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;YACrC,MAAM,KAAK,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;YAE9B,uBAAuB;YACvB,OACC,YAAY,CAAC,MAAM,GAAG,CAAC;gBACvB,YAAY,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,KAAK,IAAI,KAAK,EACnD,CAAC;gBACF,YAAY,CAAC,GAAG,EAAE,CAAC;YACpB,CAAC;YACD,YAAY,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC,CAAC;YAEpC,sBAAsB;YACtB,kBAAkB,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;YAEtD,cAAc,GAAG;gBAChB,WAAW,EAAE,CAAC,GAAG,kBAAkB,CAAC;gBACpC,KAAK;gBACL,OAAO,EAAE,EAAE;gBACX,KAAK;aACL,CAAC;QACH,CAAC;aAAM,IAAI,cAAc,EAAE,CAAC;YAC3B,cAAc,CAAC,OAAO,IAAI,GAAG,IAAI,IAAI,CAAC;QACvC,CAAC;IACF,CAAC;IAED,oBAAoB;IACpB,IAAI,cAAc,EAAE,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;QACpC,QAAQ,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;IAC/B,CAAC;IAED,OAAO,QAAQ,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,SAAS,iBAAiB,CAAC,IAAY,EAAE,SAAiB;IACzD,IAAI,IAAI,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;QAC9B,OAAO,CAAC,IAAI,CAAC,CAAC;IACf,CAAC;IAED,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACvC,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,YAAY,GAAG,EAAE,CAAC;IAEtB,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;QAC/B,IAAI,YAAY,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,GAAG,SAAS,IAAI,YAAY,EAAE,CAAC;YACnE,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC;YACjC,YAAY,GAAG,IAAI,CAAC;QACrB,CAAC;aAAM,CAAC;YACP,YAAY,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC;QACrD,CAAC;IACF,CAAC;IAED,IAAI,YAAY,CAAC,IAAI,EAAE,EAAE,CAAC;QACzB,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC;IAClC,CAAC;IAED,OAAO,MAAM,CAAC;AACf,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,aAAa,CAC5B,QAAgB,EAChB,OAAe;IAEf,MAAM,EAAE,WAAW,EAAE,IAAI,EAAE,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAC;IACxD,MAAM,KAAK,GAAG,QAAQ,QAAQ,EAAE,CAAC;IACjC,MAAM,MAAM,GAAoB,EAAE,CAAC;IAEnC,mBAAmB;IACnB,IAAI,WAAW,CAAC,KAAK,EAAE,CAAC;QACvB,OAAO,EAAE,CAAC;IACX,CAAC;IAED,MAAM,QAAQ,GAAG,uBAAuB,CAAC,IAAI,CAAC,CAAC;IAE/C,IAAI,UAAU,GAAG,CAAC,CAAC;IAEnB,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAChC,sBAAsB;QACtB,MAAM,UAAU,GAAG,iBAAiB,CACnC,OAAO,CAAC,OAAO,EACf,UAAU,CAAC,QAAQ,CAAC,cAAc,CAClC,CAAC;QAEF,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;YAC/B,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;gBAAE,SAAS;YAE3B,MAAM,eAAe,GAAG,CAAC,WAAW,CAAC,OAAO,EAAE,GAAG,OAAO,CAAC,WAAW,CAAC,CAAC;YACtE,MAAM,OAAO,GAAG,eAAe,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC;YAEnD,MAAM,CAAC,IAAI,CAAC;gBACX,IAAI,EAAE,eAAe;gBACrB,WAAW,EAAE,WAAW,CAAC,OAAO;gBAChC,MAAM,EAAE,KAAK;gBACb,QAAQ,EAAE,OAAO;gBACjB,KAAK,EAAE,OAAO,CAAC,KAAK;gBACpB,YAAY,EAAE,eAAe;gBAC7B,IAAI;gBACJ,YAAY,EAAE,YAAY,CAAC,IAAI,CAAC;aAChC,CAAC,CAAC;YAEH,UAAU,EAAE,CAAC;QACd,CAAC;IACF,CAAC;IAED,OAAO,MAAM,CAAC;AACf,CAAC"}
|
package/dist/rag/config.d.ts
DELETED
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
export declare const RAG_CONFIG: {
|
|
2
|
-
readonly qdrant: {
|
|
3
|
-
readonly collectionName: "knowledge_v1";
|
|
4
|
-
readonly vectorSize: 384;
|
|
5
|
-
readonly distance: "Cosine";
|
|
6
|
-
};
|
|
7
|
-
readonly embedding: {
|
|
8
|
-
readonly model: "intfloat/multilingual-e5-small";
|
|
9
|
-
readonly prefix: {
|
|
10
|
-
readonly query: "query: ";
|
|
11
|
-
readonly passage: "passage: ";
|
|
12
|
-
};
|
|
13
|
-
};
|
|
14
|
-
readonly paths: {
|
|
15
|
-
readonly markdown: readonly ["markdown/note/**/*.md", "markdown/book/**/*.md", "raw/article/**/*.md"];
|
|
16
|
-
readonly json: "json/article/**/*.json";
|
|
17
|
-
};
|
|
18
|
-
readonly chunking: {
|
|
19
|
-
readonly maxChunkLength: 2000;
|
|
20
|
-
readonly headingLevels: readonly [2, 3];
|
|
21
|
-
};
|
|
22
|
-
readonly hashCachePath: ".rag-hash-cache.json";
|
|
23
|
-
};
|
|
24
|
-
export type QdrantPayload = {
|
|
25
|
-
type: "markdown_note" | "bookmark_json";
|
|
26
|
-
top_heading: string;
|
|
27
|
-
doc_id: string;
|
|
28
|
-
chunk_id: string;
|
|
29
|
-
title: string;
|
|
30
|
-
url?: string;
|
|
31
|
-
heading_path: string[];
|
|
32
|
-
text: string;
|
|
33
|
-
content_hash: string;
|
|
34
|
-
};
|
|
35
|
-
export type SearchResult = {
|
|
36
|
-
score: number;
|
|
37
|
-
text: string;
|
|
38
|
-
title: string;
|
|
39
|
-
url?: string;
|
|
40
|
-
heading_path: string[];
|
|
41
|
-
type: "markdown_note" | "bookmark_json";
|
|
42
|
-
doc_id: string;
|
|
43
|
-
};
|
|
44
|
-
//# sourceMappingURL=config.d.ts.map
|
package/dist/rag/config.d.ts.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../../src/rag/config.ts"],"names":[],"mappings":"AACA,eAAO,MAAM,UAAU;;;;;;;;;;;;;;;;;;;;;;CAmCb,CAAC;AAGX,MAAM,MAAM,aAAa,GAAG;IAC3B,IAAI,EAAE,eAAe,GAAG,eAAe,CAAC;IACxC,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,YAAY,EAAE,MAAM,CAAC;CACrB,CAAC;AAGF,MAAM,MAAM,YAAY,GAAG;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,IAAI,EAAE,eAAe,GAAG,eAAe,CAAC;IACxC,MAAM,EAAE,MAAM,CAAC;CACf,CAAC"}
|
package/dist/rag/config.js
DELETED
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
// RAG Configuration
|
|
2
|
-
export const RAG_CONFIG = {
|
|
3
|
-
// Qdrant settings
|
|
4
|
-
qdrant: {
|
|
5
|
-
collectionName: "knowledge_v1",
|
|
6
|
-
vectorSize: 384, // multilingual-e5-small
|
|
7
|
-
distance: "Cosine",
|
|
8
|
-
},
|
|
9
|
-
// Embedding settings
|
|
10
|
-
embedding: {
|
|
11
|
-
model: "intfloat/multilingual-e5-small",
|
|
12
|
-
prefix: {
|
|
13
|
-
query: "query: ",
|
|
14
|
-
passage: "passage: ",
|
|
15
|
-
},
|
|
16
|
-
},
|
|
17
|
-
// File paths
|
|
18
|
-
paths: {
|
|
19
|
-
markdown: [
|
|
20
|
-
"markdown/note/**/*.md",
|
|
21
|
-
"markdown/book/**/*.md",
|
|
22
|
-
"raw/article/**/*.md",
|
|
23
|
-
],
|
|
24
|
-
json: "json/article/**/*.json",
|
|
25
|
-
},
|
|
26
|
-
// Chunking settings
|
|
27
|
-
chunking: {
|
|
28
|
-
maxChunkLength: 2000,
|
|
29
|
-
headingLevels: [2, 3], // ## and ###
|
|
30
|
-
},
|
|
31
|
-
// Cache file for hash comparison
|
|
32
|
-
hashCachePath: ".rag-hash-cache.json",
|
|
33
|
-
};
|
|
34
|
-
//# sourceMappingURL=config.js.map
|
package/dist/rag/config.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"config.js","sourceRoot":"","sources":["../../src/rag/config.ts"],"names":[],"mappings":"AAAA,oBAAoB;AACpB,MAAM,CAAC,MAAM,UAAU,GAAG;IACzB,kBAAkB;IAClB,MAAM,EAAE;QACP,cAAc,EAAE,cAAc;QAC9B,UAAU,EAAE,GAAG,EAAE,wBAAwB;QACzC,QAAQ,EAAE,QAAiB;KAC3B;IAED,qBAAqB;IACrB,SAAS,EAAE;QACV,KAAK,EAAE,gCAAgC;QACvC,MAAM,EAAE;YACP,KAAK,EAAE,SAAS;YAChB,OAAO,EAAE,WAAW;SACpB;KACD;IAED,aAAa;IACb,KAAK,EAAE;QACN,QAAQ,EAAE;YACT,uBAAuB;YACvB,uBAAuB;YACvB,qBAAqB;SACrB;QACD,IAAI,EAAE,wBAAwB;KAC9B;IAED,oBAAoB;IACpB,QAAQ,EAAE;QACT,cAAc,EAAE,IAAI;QACpB,aAAa,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,aAAa;KACpC;IAED,iCAAiC;IACjC,aAAa,EAAE,sBAAsB;CAC5B,CAAC"}
|
package/dist/rag/embedding.d.ts
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Generate embedding for a single text
|
|
3
|
-
* @param text - Input text to embed
|
|
4
|
-
* @param isQuery - Whether this is a query (vs passage)
|
|
5
|
-
* @returns Embedding vector
|
|
6
|
-
*/
|
|
7
|
-
export declare function embed(text: string, isQuery?: boolean): Promise<number[]>;
|
|
8
|
-
/**
|
|
9
|
-
* Generate embeddings for multiple texts in batch
|
|
10
|
-
* @param texts - Array of input texts
|
|
11
|
-
* @param isQuery - Whether these are queries (vs passages)
|
|
12
|
-
* @returns Array of embedding vectors
|
|
13
|
-
*/
|
|
14
|
-
export declare function embedBatch(texts: string[], isQuery?: boolean): Promise<number[][]>;
|
|
15
|
-
//# sourceMappingURL=embedding.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"embedding.d.ts","sourceRoot":"","sources":["../../src/rag/embedding.ts"],"names":[],"mappings":"AAwBA;;;;;GAKG;AACH,wBAAsB,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,UAAQ,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAiB5E;AAED;;;;;GAKG;AACH,wBAAsB,UAAU,CAC/B,KAAK,EAAE,MAAM,EAAE,EACf,OAAO,UAAQ,GACb,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CA2BrB"}
|
package/dist/rag/embedding.js
DELETED
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
import { pipeline, } from "@huggingface/transformers";
|
|
2
|
-
import { RAG_CONFIG } from "./config.js";
|
|
3
|
-
let embeddingPipeline = null;
|
|
4
|
-
/**
|
|
5
|
-
* Initialize the embedding model (lazy loading)
|
|
6
|
-
*/
|
|
7
|
-
async function getEmbeddingPipeline() {
|
|
8
|
-
if (!embeddingPipeline) {
|
|
9
|
-
console.log(`Loading embedding model: ${RAG_CONFIG.embedding.model}...`);
|
|
10
|
-
embeddingPipeline = (await pipeline("feature-extraction", RAG_CONFIG.embedding.model, { dtype: "fp32" }));
|
|
11
|
-
console.log("Embedding model loaded successfully.");
|
|
12
|
-
}
|
|
13
|
-
return embeddingPipeline;
|
|
14
|
-
}
|
|
15
|
-
/**
|
|
16
|
-
* Generate embedding for a single text
|
|
17
|
-
* @param text - Input text to embed
|
|
18
|
-
* @param isQuery - Whether this is a query (vs passage)
|
|
19
|
-
* @returns Embedding vector
|
|
20
|
-
*/
|
|
21
|
-
export async function embed(text, isQuery = false) {
|
|
22
|
-
const pipe = await getEmbeddingPipeline();
|
|
23
|
-
// E5 models require prefixes
|
|
24
|
-
const prefix = isQuery
|
|
25
|
-
? RAG_CONFIG.embedding.prefix.query
|
|
26
|
-
: RAG_CONFIG.embedding.prefix.passage;
|
|
27
|
-
const prefixedText = prefix + text;
|
|
28
|
-
const output = await pipe(prefixedText, {
|
|
29
|
-
pooling: "mean",
|
|
30
|
-
normalize: true,
|
|
31
|
-
});
|
|
32
|
-
// Convert to array
|
|
33
|
-
return Array.from(output.data);
|
|
34
|
-
}
|
|
35
|
-
/**
|
|
36
|
-
* Generate embeddings for multiple texts in batch
|
|
37
|
-
* @param texts - Array of input texts
|
|
38
|
-
* @param isQuery - Whether these are queries (vs passages)
|
|
39
|
-
* @returns Array of embedding vectors
|
|
40
|
-
*/
|
|
41
|
-
export async function embedBatch(texts, isQuery = false) {
|
|
42
|
-
const pipe = await getEmbeddingPipeline();
|
|
43
|
-
const prefix = isQuery
|
|
44
|
-
? RAG_CONFIG.embedding.prefix.query
|
|
45
|
-
: RAG_CONFIG.embedding.prefix.passage;
|
|
46
|
-
const prefixedTexts = texts.map((t) => prefix + t);
|
|
47
|
-
const outputs = await pipe(prefixedTexts, {
|
|
48
|
-
pooling: "mean",
|
|
49
|
-
normalize: true,
|
|
50
|
-
});
|
|
51
|
-
// outputs.data is a flat Float32Array, need to reshape
|
|
52
|
-
const embeddings = [];
|
|
53
|
-
const dim = RAG_CONFIG.qdrant.vectorSize;
|
|
54
|
-
for (let i = 0; i < texts.length; i++) {
|
|
55
|
-
const start = i * dim;
|
|
56
|
-
const end = start + dim;
|
|
57
|
-
embeddings.push(Array.from(outputs.data.slice(start, end)));
|
|
58
|
-
}
|
|
59
|
-
return embeddings;
|
|
60
|
-
}
|
|
61
|
-
//# sourceMappingURL=embedding.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"embedding.js","sourceRoot":"","sources":["../../src/rag/embedding.ts"],"names":[],"mappings":"AAAA,OAAO,EAEN,QAAQ,GACR,MAAM,2BAA2B,CAAC;AACnC,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AAEtC,IAAI,iBAAiB,GAAqC,IAAI,CAAC;AAE/D;;GAEG;AACH,KAAK,UAAU,oBAAoB;IAClC,IAAI,CAAC,iBAAiB,EAAE,CAAC;QACxB,OAAO,CAAC,GAAG,CAAC,4BAA4B,UAAU,CAAC,SAAS,CAAC,KAAK,KAAK,CAAC,CAAC;QACzE,iBAAiB,GAAG,CAAC,MAAM,QAAQ,CAClC,oBAAoB,EACpB,UAAU,CAAC,SAAS,CAAC,KAAK,EAC1B,EAAE,KAAK,EAAE,MAAM,EAAE,CACjB,CAAyC,CAAC;QAC3C,OAAO,CAAC,GAAG,CAAC,sCAAsC,CAAC,CAAC;IACrD,CAAC;IACD,OAAO,iBAAiB,CAAC;AAC1B,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,KAAK,CAAC,IAAY,EAAE,OAAO,GAAG,KAAK;IACxD,MAAM,IAAI,GAAG,MAAM,oBAAoB,EAAE,CAAC;IAE1C,6BAA6B;IAC7B,MAAM,MAAM,GAAG,OAAO;QACrB,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK;QACnC,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,MAAM,CAAC,OAAO,CAAC;IAEvC,MAAM,YAAY,GAAG,MAAM,GAAG,IAAI,CAAC;IAEnC,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,YAAY,EAAE;QACvC,OAAO,EAAE,MAAM;QACf,SAAS,EAAE,IAAI;KACf,CAAC,CAAC;IAEH,mBAAmB;IACnB,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAoB,CAAC,CAAC;AAChD,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAC/B,KAAe,EACf,OAAO,GAAG,KAAK;IAEf,MAAM,IAAI,GAAG,MAAM,oBAAoB,EAAE,CAAC;IAE1C,MAAM,MAAM,GAAG,OAAO;QACrB,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK;QACnC,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,MAAM,CAAC,OAAO,CAAC;IAEvC,MAAM,aAAa,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAEnD,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,aAAa,EAAE;QACzC,OAAO,EAAE,MAAM;QACf,SAAS,EAAE,IAAI;KACf,CAAC,CAAC;IAEH,uDAAuD;IACvD,MAAM,UAAU,GAAe,EAAE,CAAC;IAClC,MAAM,GAAG,GAAG,UAAU,CAAC,MAAM,CAAC,UAAU,CAAC;IAEzC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,MAAM,KAAK,GAAG,CAAC,GAAG,GAAG,CAAC;QACtB,MAAM,GAAG,GAAG,KAAK,GAAG,GAAG,CAAC;QACxB,UAAU,CAAC,IAAI,CACd,KAAK,CAAC,IAAI,CAAE,OAAO,CAAC,IAAqB,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,CAC5D,CAAC;IACH,CAAC;IAED,OAAO,UAAU,CAAC;AACnB,CAAC"}
|
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
import { QdrantClient } from "@qdrant/js-client-rest";
|
|
2
|
-
import { type QdrantPayload, type SearchResult } from "./config.js";
|
|
3
|
-
/**
|
|
4
|
-
* Get or create Qdrant client
|
|
5
|
-
*/
|
|
6
|
-
export declare function getQdrantClient(): QdrantClient;
|
|
7
|
-
/**
|
|
8
|
-
* Create collection if not exists
|
|
9
|
-
*/
|
|
10
|
-
export declare function ensureCollection(): Promise<void>;
|
|
11
|
-
/**
|
|
12
|
-
* Upsert points to Qdrant
|
|
13
|
-
*/
|
|
14
|
-
export declare function upsertPoints(points: {
|
|
15
|
-
id: string;
|
|
16
|
-
vector: number[];
|
|
17
|
-
payload: QdrantPayload;
|
|
18
|
-
}[]): Promise<void>;
|
|
19
|
-
/**
|
|
20
|
-
* Get existing content hashes for a set of chunk IDs
|
|
21
|
-
*/
|
|
22
|
-
export declare function getExistingHashes(chunkIds: string[]): Promise<Map<string, string>>;
|
|
23
|
-
/**
|
|
24
|
-
* Search for similar documents
|
|
25
|
-
*/
|
|
26
|
-
export declare function search(queryVector: number[], options?: {
|
|
27
|
-
topK?: number;
|
|
28
|
-
filter?: {
|
|
29
|
-
type?: "markdown_note" | "bookmark_json";
|
|
30
|
-
top_heading?: string;
|
|
31
|
-
};
|
|
32
|
-
}): Promise<SearchResult[]>;
|
|
33
|
-
/**
|
|
34
|
-
* Get collection stats
|
|
35
|
-
*/
|
|
36
|
-
export declare function getCollectionStats(): Promise<{
|
|
37
|
-
pointsCount: number;
|
|
38
|
-
status: string;
|
|
39
|
-
}>;
|
|
40
|
-
//# sourceMappingURL=qdrant-client.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"qdrant-client.d.ts","sourceRoot":"","sources":["../../src/rag/qdrant-client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,wBAAwB,CAAC;AACtD,OAAO,EAAE,KAAK,aAAa,EAAc,KAAK,YAAY,EAAE,MAAM,UAAU,CAAC;AAI7E;;GAEG;AACH,wBAAgB,eAAe,IAAI,YAAY,CAgB9C;AAED;;GAEG;AACH,wBAAsB,gBAAgB,IAAI,OAAO,CAAC,IAAI,CAAC,CAmBtD;AAED;;GAEG;AACH,wBAAsB,YAAY,CACjC,MAAM,EAAE;IAAE,EAAE,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,EAAE,CAAC;IAAC,OAAO,EAAE,aAAa,CAAA;CAAE,EAAE,GAChE,OAAO,CAAC,IAAI,CAAC,CAef;AAED;;GAEG;AACH,wBAAsB,iBAAiB,CACtC,QAAQ,EAAE,MAAM,EAAE,GAChB,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CA4B9B;AAED;;GAEG;AACH,wBAAsB,MAAM,CAC3B,WAAW,EAAE,MAAM,EAAE,EACrB,OAAO,GAAE;IACR,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE;QACR,IAAI,CAAC,EAAE,eAAe,GAAG,eAAe,CAAC;QACzC,WAAW,CAAC,EAAE,MAAM,CAAC;KACrB,CAAC;CACG,GACJ,OAAO,CAAC,YAAY,EAAE,CAAC,CA6CzB;AAED;;GAEG;AACH,wBAAsB,kBAAkB,IAAI,OAAO,CAAC;IACnD,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,MAAM,CAAC;CACf,CAAC,CAgBD"}
|
|
@@ -1,160 +0,0 @@
|
|
|
1
|
-
import { QdrantClient } from "@qdrant/js-client-rest";
|
|
2
|
-
import { RAG_CONFIG } from "./config.js";
|
|
3
|
-
let client = null;
|
|
4
|
-
/**
|
|
5
|
-
* Get or create Qdrant client
|
|
6
|
-
*/
|
|
7
|
-
export function getQdrantClient() {
|
|
8
|
-
if (!client) {
|
|
9
|
-
const url = process.env.QDRANT_URL;
|
|
10
|
-
const apiKey = process.env.QDRANT_API_KEY;
|
|
11
|
-
if (!url) {
|
|
12
|
-
throw new Error("QDRANT_URL environment variable is required");
|
|
13
|
-
}
|
|
14
|
-
client = new QdrantClient({
|
|
15
|
-
url,
|
|
16
|
-
apiKey,
|
|
17
|
-
});
|
|
18
|
-
}
|
|
19
|
-
return client;
|
|
20
|
-
}
|
|
21
|
-
/**
|
|
22
|
-
* Create collection if not exists
|
|
23
|
-
*/
|
|
24
|
-
export async function ensureCollection() {
|
|
25
|
-
const qdrant = getQdrantClient();
|
|
26
|
-
const { collectionName, vectorSize, distance } = RAG_CONFIG.qdrant;
|
|
27
|
-
const collections = await qdrant.getCollections();
|
|
28
|
-
const exists = collections.collections.some((c) => c.name === collectionName);
|
|
29
|
-
if (!exists) {
|
|
30
|
-
console.log(`Creating collection: ${collectionName}`);
|
|
31
|
-
await qdrant.createCollection(collectionName, {
|
|
32
|
-
vectors: {
|
|
33
|
-
size: vectorSize,
|
|
34
|
-
distance,
|
|
35
|
-
},
|
|
36
|
-
});
|
|
37
|
-
console.log(`Collection ${collectionName} created successfully.`);
|
|
38
|
-
}
|
|
39
|
-
else {
|
|
40
|
-
console.log(`Collection ${collectionName} already exists.`);
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
/**
|
|
44
|
-
* Upsert points to Qdrant
|
|
45
|
-
*/
|
|
46
|
-
export async function upsertPoints(points) {
|
|
47
|
-
const qdrant = getQdrantClient();
|
|
48
|
-
const { collectionName } = RAG_CONFIG.qdrant;
|
|
49
|
-
// Qdrant requires numeric or UUID IDs, so we hash the chunk_id
|
|
50
|
-
const qdrantPoints = points.map((p) => ({
|
|
51
|
-
id: hashToUint(p.id),
|
|
52
|
-
vector: p.vector,
|
|
53
|
-
payload: p.payload,
|
|
54
|
-
}));
|
|
55
|
-
await qdrant.upsert(collectionName, {
|
|
56
|
-
wait: true,
|
|
57
|
-
points: qdrantPoints,
|
|
58
|
-
});
|
|
59
|
-
}
|
|
60
|
-
/**
|
|
61
|
-
* Get existing content hashes for a set of chunk IDs
|
|
62
|
-
*/
|
|
63
|
-
export async function getExistingHashes(chunkIds) {
|
|
64
|
-
const qdrant = getQdrantClient();
|
|
65
|
-
const { collectionName } = RAG_CONFIG.qdrant;
|
|
66
|
-
const hashMap = new Map();
|
|
67
|
-
if (chunkIds.length === 0)
|
|
68
|
-
return hashMap;
|
|
69
|
-
// Convert chunk IDs to numeric IDs
|
|
70
|
-
const numericIds = chunkIds.map((id) => hashToUint(id));
|
|
71
|
-
try {
|
|
72
|
-
const result = await qdrant.retrieve(collectionName, {
|
|
73
|
-
ids: numericIds,
|
|
74
|
-
with_payload: ["chunk_id", "content_hash"],
|
|
75
|
-
});
|
|
76
|
-
for (const point of result) {
|
|
77
|
-
const payload = point.payload;
|
|
78
|
-
if (payload?.chunk_id && payload?.content_hash) {
|
|
79
|
-
hashMap.set(payload.chunk_id, payload.content_hash);
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
}
|
|
83
|
-
catch {
|
|
84
|
-
// Collection might not exist or be empty
|
|
85
|
-
}
|
|
86
|
-
return hashMap;
|
|
87
|
-
}
|
|
88
|
-
/**
|
|
89
|
-
* Search for similar documents
|
|
90
|
-
*/
|
|
91
|
-
export async function search(queryVector, options = {}) {
|
|
92
|
-
const qdrant = getQdrantClient();
|
|
93
|
-
const { collectionName } = RAG_CONFIG.qdrant;
|
|
94
|
-
const { topK = 10, filter } = options;
|
|
95
|
-
// Build filter conditions
|
|
96
|
-
const filterConditions = [];
|
|
97
|
-
if (filter?.type) {
|
|
98
|
-
filterConditions.push({
|
|
99
|
-
key: "type",
|
|
100
|
-
match: { value: filter.type },
|
|
101
|
-
});
|
|
102
|
-
}
|
|
103
|
-
if (filter?.top_heading) {
|
|
104
|
-
filterConditions.push({
|
|
105
|
-
key: "top_heading",
|
|
106
|
-
match: { value: filter.top_heading },
|
|
107
|
-
});
|
|
108
|
-
}
|
|
109
|
-
const result = await qdrant.search(collectionName, {
|
|
110
|
-
vector: queryVector,
|
|
111
|
-
limit: topK,
|
|
112
|
-
with_payload: true,
|
|
113
|
-
filter: filterConditions.length > 0 ? { must: filterConditions } : undefined,
|
|
114
|
-
});
|
|
115
|
-
return result.map((r) => {
|
|
116
|
-
const payload = r.payload;
|
|
117
|
-
return {
|
|
118
|
-
score: r.score,
|
|
119
|
-
text: payload.text,
|
|
120
|
-
title: payload.title,
|
|
121
|
-
url: payload.url,
|
|
122
|
-
heading_path: payload.heading_path,
|
|
123
|
-
type: payload.type,
|
|
124
|
-
doc_id: payload.doc_id,
|
|
125
|
-
};
|
|
126
|
-
});
|
|
127
|
-
}
|
|
128
|
-
/**
|
|
129
|
-
* Get collection stats
|
|
130
|
-
*/
|
|
131
|
-
export async function getCollectionStats() {
|
|
132
|
-
const qdrant = getQdrantClient();
|
|
133
|
-
const { collectionName } = RAG_CONFIG.qdrant;
|
|
134
|
-
try {
|
|
135
|
-
const info = await qdrant.getCollection(collectionName);
|
|
136
|
-
return {
|
|
137
|
-
pointsCount: info.points_count ?? 0,
|
|
138
|
-
status: info.status,
|
|
139
|
-
};
|
|
140
|
-
}
|
|
141
|
-
catch {
|
|
142
|
-
return {
|
|
143
|
-
pointsCount: 0,
|
|
144
|
-
status: "not_found",
|
|
145
|
-
};
|
|
146
|
-
}
|
|
147
|
-
}
|
|
148
|
-
/**
|
|
149
|
-
* Hash string to unsigned integer (for Qdrant point ID)
|
|
150
|
-
*/
|
|
151
|
-
function hashToUint(str) {
|
|
152
|
-
let hash = 0;
|
|
153
|
-
for (let i = 0; i < str.length; i++) {
|
|
154
|
-
const char = str.charCodeAt(i);
|
|
155
|
-
hash = (hash << 5) - hash + char;
|
|
156
|
-
hash = hash & hash; // Convert to 32bit integer
|
|
157
|
-
}
|
|
158
|
-
return Math.abs(hash);
|
|
159
|
-
}
|
|
160
|
-
//# sourceMappingURL=qdrant-client.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"qdrant-client.js","sourceRoot":"","sources":["../../src/rag/qdrant-client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,wBAAwB,CAAC;AACtD,OAAO,EAAsB,UAAU,EAAqB,MAAM,UAAU,CAAC;AAE7E,IAAI,MAAM,GAAwB,IAAI,CAAC;AAEvC;;GAEG;AACH,MAAM,UAAU,eAAe;IAC9B,IAAI,CAAC,MAAM,EAAE,CAAC;QACb,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC;QACnC,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC;QAE1C,IAAI,CAAC,GAAG,EAAE,CAAC;YACV,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,CAAC;QAChE,CAAC;QAED,MAAM,GAAG,IAAI,YAAY,CAAC;YACzB,GAAG;YACH,MAAM;SACN,CAAC,CAAC;IACJ,CAAC;IAED,OAAO,MAAM,CAAC;AACf,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB;IACrC,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;IACjC,MAAM,EAAE,cAAc,EAAE,UAAU,EAAE,QAAQ,EAAE,GAAG,UAAU,CAAC,MAAM,CAAC;IAEnE,MAAM,WAAW,GAAG,MAAM,MAAM,CAAC,cAAc,EAAE,CAAC;IAClD,MAAM,MAAM,GAAG,WAAW,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,cAAc,CAAC,CAAC;IAE9E,IAAI,CAAC,MAAM,EAAE,CAAC;QACb,OAAO,CAAC,GAAG,CAAC,wBAAwB,cAAc,EAAE,CAAC,CAAC;QACtD,MAAM,MAAM,CAAC,gBAAgB,CAAC,cAAc,EAAE;YAC7C,OAAO,EAAE;gBACR,IAAI,EAAE,UAAU;gBAChB,QAAQ;aACR;SACD,CAAC,CAAC;QACH,OAAO,CAAC,GAAG,CAAC,cAAc,cAAc,wBAAwB,CAAC,CAAC;IACnE,CAAC;SAAM,CAAC;QACP,OAAO,CAAC,GAAG,CAAC,cAAc,cAAc,kBAAkB,CAAC,CAAC;IAC7D,CAAC;AACF,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CACjC,MAAkE;IAElE,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;IACjC,MAAM,EAAE,cAAc,EAAE,GAAG,UAAU,CAAC,MAAM,CAAC;IAE7C,+DAA+D;IAC/D,MAAM,YAAY,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACvC,EAAE,EAAE,UAAU,CAAC,CAAC,CAAC,EAAE,CAAC;QACpB,MAAM,EAAE,CAAC,CAAC,MAAM;QAChB,OAAO,EAAE,CAAC,CAAC,OAAO;KAClB,CAAC,CAAC,CAAC;IAEJ,MAAM,MAAM,CAAC,MAAM,CAAC,cAAc,EAAE;QACnC,IAAI,EAAE,IAAI;QACV,MAAM,EAAE,YAAY;KACpB,CAAC,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACtC,QAAkB;IAElB,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;IACjC,MAAM,EAAE,cAAc,EAAE,GAAG,UAAU,CAAC,MAAM,CAAC;IAE7C,MAAM,OAAO,GAAG,IAAI,GAAG,EAAkB,CAAC;IAE1C,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,OAAO,CAAC;IAE1C,mCAAmC;IACnC,MAAM,UAAU,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC;IAExD,IAAI,CAAC;QACJ,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,cAAc,EAAE;YACpD,GAAG,EAAE,UAAU;YACf,YAAY,EAAE,CAAC,UAAU,EAAE,cAAc,CAAC;SAC1C,CAAC,CAAC;QAEH,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC5B,MAAM,OAAO,GAAG,KAAK,CAAC,OAAwB,CAAC;YAC/C,IAAI,OAAO,EAAE,QAAQ,IAAI,OAAO,EAAE,YAAY,EAAE,CAAC;gBAChD,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,EAAE,OAAO,CAAC,YAAY,CAAC,CAAC;YACrD,CAAC;QACF,CAAC;IACF,CAAC;IAAC,MAAM,CAAC;QACR,yCAAyC;IAC1C,CAAC;IAED,OAAO,OAAO,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,MAAM,CAC3B,WAAqB,EACrB,UAMI,EAAE;IAEN,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;IACjC,MAAM,EAAE,cAAc,EAAE,GAAG,UAAU,CAAC,MAAM,CAAC;IAC7C,MAAM,EAAE,IAAI,GAAG,EAAE,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC;IAEtC,0BAA0B;IAC1B,MAAM,gBAAgB,GAGjB,EAAE,CAAC;IAER,IAAI,MAAM,EAAE,IAAI,EAAE,CAAC;QAClB,gBAAgB,CAAC,IAAI,CAAC;YACrB,GAAG,EAAE,MAAM;YACX,KAAK,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,IAAI,EAAE;SAC7B,CAAC,CAAC;IACJ,CAAC;IAED,IAAI,MAAM,EAAE,WAAW,EAAE,CAAC;QACzB,gBAAgB,CAAC,IAAI,CAAC;YACrB,GAAG,EAAE,aAAa;YAClB,KAAK,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,WAAW,EAAE;SACpC,CAAC,CAAC;IACJ,CAAC;IAED,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,cAAc,EAAE;QAClD,MAAM,EAAE,WAAW;QACnB,KAAK,EAAE,IAAI;QACX,YAAY,EAAE,IAAI;QAClB,MAAM,EACL,gBAAgB,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,gBAAgB,EAAE,CAAC,CAAC,CAAC,SAAS;KACrE,CAAC,CAAC;IAEH,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QACvB,MAAM,OAAO,GAAG,CAAC,CAAC,OAAwB,CAAC;QAC3C,OAAO;YACN,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,IAAI,EAAE,OAAO,CAAC,IAAI;YAClB,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,GAAG,EAAE,OAAO,CAAC,GAAG;YAChB,YAAY,EAAE,OAAO,CAAC,YAAY;YAClC,IAAI,EAAE,OAAO,CAAC,IAAI;YAClB,MAAM,EAAE,OAAO,CAAC,MAAM;SACtB,CAAC;IACH,CAAC,CAAC,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB;IAIvC,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;IACjC,MAAM,EAAE,cAAc,EAAE,GAAG,UAAU,CAAC,MAAM,CAAC;IAE7C,IAAI,CAAC;QACJ,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,cAAc,CAAC,CAAC;QACxD,OAAO;YACN,WAAW,EAAE,IAAI,CAAC,YAAY,IAAI,CAAC;YACnC,MAAM,EAAE,IAAI,CAAC,MAAM;SACnB,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACR,OAAO;YACN,WAAW,EAAE,CAAC;YACd,MAAM,EAAE,WAAW;SACnB,CAAC;IACH,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,UAAU,CAAC,GAAW;IAC9B,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,IAAI,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QAC/B,IAAI,GAAG,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,IAAI,GAAG,IAAI,CAAC;QACjC,IAAI,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC,2BAA2B;IAChD,CAAC;IACD,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;AACvB,CAAC"}
|