@s-hirano-ist/s-scripts 1.12.2 → 1.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cleanup-minio-images.d.ts +3 -0
- package/dist/cleanup-minio-images.d.ts.map +1 -0
- package/dist/cleanup-minio-images.js +111 -0
- package/dist/cleanup-minio-images.js.map +1 -0
- package/dist/fetch-articles.js +1 -3
- package/dist/fetch-articles.js.map +1 -1
- package/dist/fetch-books.js +1 -3
- package/dist/fetch-books.js.map +1 -1
- package/dist/fetch-images.js +1 -3
- package/dist/fetch-images.js.map +1 -1
- package/dist/fetch-notes.js +1 -3
- package/dist/fetch-notes.js.map +1 -1
- package/dist/infrastructures/articles-command-repository.d.ts +11 -1
- package/dist/infrastructures/articles-command-repository.d.ts.map +1 -1
- package/dist/infrastructures/articles-command-repository.js.map +1 -1
- package/dist/infrastructures/books-command-repository.d.ts +11 -1
- package/dist/infrastructures/books-command-repository.d.ts.map +1 -1
- package/dist/infrastructures/books-command-repository.js.map +1 -1
- package/dist/infrastructures/images-command-repository.d.ts +11 -1
- package/dist/infrastructures/images-command-repository.d.ts.map +1 -1
- package/dist/infrastructures/images-command-repository.js.map +1 -1
- package/dist/infrastructures/notes-command-repository.d.ts +11 -1
- package/dist/infrastructures/notes-command-repository.d.ts.map +1 -1
- package/dist/infrastructures/notes-command-repository.js.map +1 -1
- package/dist/ingest-articles.d.ts +3 -0
- package/dist/ingest-articles.d.ts.map +1 -0
- package/dist/ingest-articles.js +230 -0
- package/dist/ingest-articles.js.map +1 -0
- package/dist/ingest-books.d.ts +3 -0
- package/dist/ingest-books.d.ts.map +1 -0
- package/dist/ingest-books.js +167 -0
- package/dist/ingest-books.js.map +1 -0
- package/dist/ingest-images.d.ts +3 -0
- package/dist/ingest-images.d.ts.map +1 -0
- package/dist/ingest-images.js +196 -0
- package/dist/ingest-images.js.map +1 -0
- package/dist/ingest-notes.d.ts +3 -0
- package/dist/ingest-notes.d.ts.map +1 -0
- package/dist/ingest-notes.js +187 -0
- package/dist/ingest-notes.js.map +1 -0
- package/dist/rag/ingest-config.d.ts +7 -0
- package/dist/rag/ingest-config.d.ts.map +1 -0
- package/dist/rag/ingest-config.js +7 -0
- package/dist/rag/ingest-config.js.map +1 -0
- package/dist/rag/ingest.d.ts +1 -1
- package/dist/rag/ingest.d.ts.map +1 -1
- package/dist/rag/ingest.js +28 -67
- package/dist/rag/ingest.js.map +1 -1
- package/dist/rag/search.d.ts +1 -1
- package/dist/rag/search.d.ts.map +1 -1
- package/dist/rag/search.js +47 -71
- package/dist/rag/search.js.map +1 -1
- package/dist/reset-articles.js +1 -3
- package/dist/reset-articles.js.map +1 -1
- package/dist/reset-books.js +1 -3
- package/dist/reset-books.js.map +1 -1
- package/dist/reset-images.js +1 -3
- package/dist/reset-images.js.map +1 -1
- package/dist/reset-notes.js +1 -3
- package/dist/reset-notes.js.map +1 -1
- package/dist/revert-articles.js +1 -3
- package/dist/revert-articles.js.map +1 -1
- package/dist/revert-books.js +1 -3
- package/dist/revert-books.js.map +1 -1
- package/dist/revert-images.js +1 -3
- package/dist/revert-images.js.map +1 -1
- package/dist/revert-notes.js +1 -3
- package/dist/revert-notes.js.map +1 -1
- package/dist/update-raw-articles.js +40 -26
- package/dist/update-raw-articles.js.map +1 -1
- package/package.json +20 -8
- package/dist/rag/chunker.d.ts +0 -10
- package/dist/rag/chunker.d.ts.map +0 -1
- package/dist/rag/chunker.js +0 -188
- package/dist/rag/chunker.js.map +0 -1
- package/dist/rag/config.d.ts +0 -44
- package/dist/rag/config.d.ts.map +0 -1
- package/dist/rag/config.js +0 -34
- package/dist/rag/config.js.map +0 -1
- package/dist/rag/embedding.d.ts +0 -15
- package/dist/rag/embedding.d.ts.map +0 -1
- package/dist/rag/embedding.js +0 -61
- package/dist/rag/embedding.js.map +0 -1
- package/dist/rag/qdrant-client.d.ts +0 -40
- package/dist/rag/qdrant-client.d.ts.map +0 -1
- package/dist/rag/qdrant-client.js +0 -160
- package/dist/rag/qdrant-client.js.map +0 -1
package/dist/rag/config.d.ts.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../../src/rag/config.ts"],"names":[],"mappings":"AACA,eAAO,MAAM,UAAU;;;;;;;;;;;;;;;;;;;;;;CAmCb,CAAC;AAGX,MAAM,MAAM,aAAa,GAAG;IAC3B,IAAI,EAAE,eAAe,GAAG,eAAe,CAAC;IACxC,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,IAAI,EAAE,MAAM,CAAC;IACb,YAAY,EAAE,MAAM,CAAC;CACrB,CAAC;AAGF,MAAM,MAAM,YAAY,GAAG;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,YAAY,EAAE,MAAM,EAAE,CAAC;IACvB,IAAI,EAAE,eAAe,GAAG,eAAe,CAAC;IACxC,MAAM,EAAE,MAAM,CAAC;CACf,CAAC"}
|
package/dist/rag/config.js
DELETED
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
// RAG Configuration
|
|
2
|
-
export const RAG_CONFIG = {
|
|
3
|
-
// Qdrant settings
|
|
4
|
-
qdrant: {
|
|
5
|
-
collectionName: "knowledge_v1",
|
|
6
|
-
vectorSize: 384, // multilingual-e5-small
|
|
7
|
-
distance: "Cosine",
|
|
8
|
-
},
|
|
9
|
-
// Embedding settings
|
|
10
|
-
embedding: {
|
|
11
|
-
model: "intfloat/multilingual-e5-small",
|
|
12
|
-
prefix: {
|
|
13
|
-
query: "query: ",
|
|
14
|
-
passage: "passage: ",
|
|
15
|
-
},
|
|
16
|
-
},
|
|
17
|
-
// File paths
|
|
18
|
-
paths: {
|
|
19
|
-
markdown: [
|
|
20
|
-
"markdown/note/**/*.md",
|
|
21
|
-
"markdown/book/**/*.md",
|
|
22
|
-
"raw/article/**/*.md",
|
|
23
|
-
],
|
|
24
|
-
json: "json/article/**/*.json",
|
|
25
|
-
},
|
|
26
|
-
// Chunking settings
|
|
27
|
-
chunking: {
|
|
28
|
-
maxChunkLength: 2000,
|
|
29
|
-
headingLevels: [2, 3], // ## and ###
|
|
30
|
-
},
|
|
31
|
-
// Cache file for hash comparison
|
|
32
|
-
hashCachePath: ".rag-hash-cache.json",
|
|
33
|
-
};
|
|
34
|
-
//# sourceMappingURL=config.js.map
|
package/dist/rag/config.js.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"config.js","sourceRoot":"","sources":["../../src/rag/config.ts"],"names":[],"mappings":"AAAA,oBAAoB;AACpB,MAAM,CAAC,MAAM,UAAU,GAAG;IACzB,kBAAkB;IAClB,MAAM,EAAE;QACP,cAAc,EAAE,cAAc;QAC9B,UAAU,EAAE,GAAG,EAAE,wBAAwB;QACzC,QAAQ,EAAE,QAAiB;KAC3B;IAED,qBAAqB;IACrB,SAAS,EAAE;QACV,KAAK,EAAE,gCAAgC;QACvC,MAAM,EAAE;YACP,KAAK,EAAE,SAAS;YAChB,OAAO,EAAE,WAAW;SACpB;KACD;IAED,aAAa;IACb,KAAK,EAAE;QACN,QAAQ,EAAE;YACT,uBAAuB;YACvB,uBAAuB;YACvB,qBAAqB;SACrB;QACD,IAAI,EAAE,wBAAwB;KAC9B;IAED,oBAAoB;IACpB,QAAQ,EAAE;QACT,cAAc,EAAE,IAAI;QACpB,aAAa,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,aAAa;KACpC;IAED,iCAAiC;IACjC,aAAa,EAAE,sBAAsB;CAC5B,CAAC"}
|
package/dist/rag/embedding.d.ts
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Generate embedding for a single text
|
|
3
|
-
* @param text - Input text to embed
|
|
4
|
-
* @param isQuery - Whether this is a query (vs passage)
|
|
5
|
-
* @returns Embedding vector
|
|
6
|
-
*/
|
|
7
|
-
export declare function embed(text: string, isQuery?: boolean): Promise<number[]>;
|
|
8
|
-
/**
|
|
9
|
-
* Generate embeddings for multiple texts in batch
|
|
10
|
-
* @param texts - Array of input texts
|
|
11
|
-
* @param isQuery - Whether these are queries (vs passages)
|
|
12
|
-
* @returns Array of embedding vectors
|
|
13
|
-
*/
|
|
14
|
-
export declare function embedBatch(texts: string[], isQuery?: boolean): Promise<number[][]>;
|
|
15
|
-
//# sourceMappingURL=embedding.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"embedding.d.ts","sourceRoot":"","sources":["../../src/rag/embedding.ts"],"names":[],"mappings":"AAwBA;;;;;GAKG;AACH,wBAAsB,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,UAAQ,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAiB5E;AAED;;;;;GAKG;AACH,wBAAsB,UAAU,CAC/B,KAAK,EAAE,MAAM,EAAE,EACf,OAAO,UAAQ,GACb,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CA2BrB"}
|
package/dist/rag/embedding.js
DELETED
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
import { pipeline, } from "@huggingface/transformers";
|
|
2
|
-
import { RAG_CONFIG } from "./config.js";
|
|
3
|
-
let embeddingPipeline = null;
|
|
4
|
-
/**
|
|
5
|
-
* Initialize the embedding model (lazy loading)
|
|
6
|
-
*/
|
|
7
|
-
async function getEmbeddingPipeline() {
|
|
8
|
-
if (!embeddingPipeline) {
|
|
9
|
-
console.log(`Loading embedding model: ${RAG_CONFIG.embedding.model}...`);
|
|
10
|
-
embeddingPipeline = (await pipeline("feature-extraction", RAG_CONFIG.embedding.model, { dtype: "fp32" }));
|
|
11
|
-
console.log("Embedding model loaded successfully.");
|
|
12
|
-
}
|
|
13
|
-
return embeddingPipeline;
|
|
14
|
-
}
|
|
15
|
-
/**
|
|
16
|
-
* Generate embedding for a single text
|
|
17
|
-
* @param text - Input text to embed
|
|
18
|
-
* @param isQuery - Whether this is a query (vs passage)
|
|
19
|
-
* @returns Embedding vector
|
|
20
|
-
*/
|
|
21
|
-
export async function embed(text, isQuery = false) {
|
|
22
|
-
const pipe = await getEmbeddingPipeline();
|
|
23
|
-
// E5 models require prefixes
|
|
24
|
-
const prefix = isQuery
|
|
25
|
-
? RAG_CONFIG.embedding.prefix.query
|
|
26
|
-
: RAG_CONFIG.embedding.prefix.passage;
|
|
27
|
-
const prefixedText = prefix + text;
|
|
28
|
-
const output = await pipe(prefixedText, {
|
|
29
|
-
pooling: "mean",
|
|
30
|
-
normalize: true,
|
|
31
|
-
});
|
|
32
|
-
// Convert to array
|
|
33
|
-
return Array.from(output.data);
|
|
34
|
-
}
|
|
35
|
-
/**
|
|
36
|
-
* Generate embeddings for multiple texts in batch
|
|
37
|
-
* @param texts - Array of input texts
|
|
38
|
-
* @param isQuery - Whether these are queries (vs passages)
|
|
39
|
-
* @returns Array of embedding vectors
|
|
40
|
-
*/
|
|
41
|
-
export async function embedBatch(texts, isQuery = false) {
|
|
42
|
-
const pipe = await getEmbeddingPipeline();
|
|
43
|
-
const prefix = isQuery
|
|
44
|
-
? RAG_CONFIG.embedding.prefix.query
|
|
45
|
-
: RAG_CONFIG.embedding.prefix.passage;
|
|
46
|
-
const prefixedTexts = texts.map((t) => prefix + t);
|
|
47
|
-
const outputs = await pipe(prefixedTexts, {
|
|
48
|
-
pooling: "mean",
|
|
49
|
-
normalize: true,
|
|
50
|
-
});
|
|
51
|
-
// outputs.data is a flat Float32Array, need to reshape
|
|
52
|
-
const embeddings = [];
|
|
53
|
-
const dim = RAG_CONFIG.qdrant.vectorSize;
|
|
54
|
-
for (let i = 0; i < texts.length; i++) {
|
|
55
|
-
const start = i * dim;
|
|
56
|
-
const end = start + dim;
|
|
57
|
-
embeddings.push(Array.from(outputs.data.slice(start, end)));
|
|
58
|
-
}
|
|
59
|
-
return embeddings;
|
|
60
|
-
}
|
|
61
|
-
//# sourceMappingURL=embedding.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"embedding.js","sourceRoot":"","sources":["../../src/rag/embedding.ts"],"names":[],"mappings":"AAAA,OAAO,EAEN,QAAQ,GACR,MAAM,2BAA2B,CAAC;AACnC,OAAO,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AAEtC,IAAI,iBAAiB,GAAqC,IAAI,CAAC;AAE/D;;GAEG;AACH,KAAK,UAAU,oBAAoB;IAClC,IAAI,CAAC,iBAAiB,EAAE,CAAC;QACxB,OAAO,CAAC,GAAG,CAAC,4BAA4B,UAAU,CAAC,SAAS,CAAC,KAAK,KAAK,CAAC,CAAC;QACzE,iBAAiB,GAAG,CAAC,MAAM,QAAQ,CAClC,oBAAoB,EACpB,UAAU,CAAC,SAAS,CAAC,KAAK,EAC1B,EAAE,KAAK,EAAE,MAAM,EAAE,CACjB,CAAyC,CAAC;QAC3C,OAAO,CAAC,GAAG,CAAC,sCAAsC,CAAC,CAAC;IACrD,CAAC;IACD,OAAO,iBAAiB,CAAC;AAC1B,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,KAAK,CAAC,IAAY,EAAE,OAAO,GAAG,KAAK;IACxD,MAAM,IAAI,GAAG,MAAM,oBAAoB,EAAE,CAAC;IAE1C,6BAA6B;IAC7B,MAAM,MAAM,GAAG,OAAO;QACrB,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK;QACnC,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,MAAM,CAAC,OAAO,CAAC;IAEvC,MAAM,YAAY,GAAG,MAAM,GAAG,IAAI,CAAC;IAEnC,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,YAAY,EAAE;QACvC,OAAO,EAAE,MAAM;QACf,SAAS,EAAE,IAAI;KACf,CAAC,CAAC;IAEH,mBAAmB;IACnB,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAoB,CAAC,CAAC;AAChD,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,UAAU,CAC/B,KAAe,EACf,OAAO,GAAG,KAAK;IAEf,MAAM,IAAI,GAAG,MAAM,oBAAoB,EAAE,CAAC;IAE1C,MAAM,MAAM,GAAG,OAAO;QACrB,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK;QACnC,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,MAAM,CAAC,OAAO,CAAC;IAEvC,MAAM,aAAa,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAEnD,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,aAAa,EAAE;QACzC,OAAO,EAAE,MAAM;QACf,SAAS,EAAE,IAAI;KACf,CAAC,CAAC;IAEH,uDAAuD;IACvD,MAAM,UAAU,GAAe,EAAE,CAAC;IAClC,MAAM,GAAG,GAAG,UAAU,CAAC,MAAM,CAAC,UAAU,CAAC;IAEzC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,MAAM,KAAK,GAAG,CAAC,GAAG,GAAG,CAAC;QACtB,MAAM,GAAG,GAAG,KAAK,GAAG,GAAG,CAAC;QACxB,UAAU,CAAC,IAAI,CACd,KAAK,CAAC,IAAI,CAAE,OAAO,CAAC,IAAqB,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,CAC5D,CAAC;IACH,CAAC;IAED,OAAO,UAAU,CAAC;AACnB,CAAC"}
|
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
import { QdrantClient } from "@qdrant/js-client-rest";
|
|
2
|
-
import { type QdrantPayload, type SearchResult } from "./config.js";
|
|
3
|
-
/**
|
|
4
|
-
* Get or create Qdrant client
|
|
5
|
-
*/
|
|
6
|
-
export declare function getQdrantClient(): QdrantClient;
|
|
7
|
-
/**
|
|
8
|
-
* Create collection if not exists
|
|
9
|
-
*/
|
|
10
|
-
export declare function ensureCollection(): Promise<void>;
|
|
11
|
-
/**
|
|
12
|
-
* Upsert points to Qdrant
|
|
13
|
-
*/
|
|
14
|
-
export declare function upsertPoints(points: {
|
|
15
|
-
id: string;
|
|
16
|
-
vector: number[];
|
|
17
|
-
payload: QdrantPayload;
|
|
18
|
-
}[]): Promise<void>;
|
|
19
|
-
/**
|
|
20
|
-
* Get existing content hashes for a set of chunk IDs
|
|
21
|
-
*/
|
|
22
|
-
export declare function getExistingHashes(chunkIds: string[]): Promise<Map<string, string>>;
|
|
23
|
-
/**
|
|
24
|
-
* Search for similar documents
|
|
25
|
-
*/
|
|
26
|
-
export declare function search(queryVector: number[], options?: {
|
|
27
|
-
topK?: number;
|
|
28
|
-
filter?: {
|
|
29
|
-
type?: "markdown_note" | "bookmark_json";
|
|
30
|
-
top_heading?: string;
|
|
31
|
-
};
|
|
32
|
-
}): Promise<SearchResult[]>;
|
|
33
|
-
/**
|
|
34
|
-
* Get collection stats
|
|
35
|
-
*/
|
|
36
|
-
export declare function getCollectionStats(): Promise<{
|
|
37
|
-
pointsCount: number;
|
|
38
|
-
status: string;
|
|
39
|
-
}>;
|
|
40
|
-
//# sourceMappingURL=qdrant-client.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"qdrant-client.d.ts","sourceRoot":"","sources":["../../src/rag/qdrant-client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,wBAAwB,CAAC;AACtD,OAAO,EAAE,KAAK,aAAa,EAAc,KAAK,YAAY,EAAE,MAAM,UAAU,CAAC;AAI7E;;GAEG;AACH,wBAAgB,eAAe,IAAI,YAAY,CAgB9C;AAED;;GAEG;AACH,wBAAsB,gBAAgB,IAAI,OAAO,CAAC,IAAI,CAAC,CAmBtD;AAED;;GAEG;AACH,wBAAsB,YAAY,CACjC,MAAM,EAAE;IAAE,EAAE,EAAE,MAAM,CAAC;IAAC,MAAM,EAAE,MAAM,EAAE,CAAC;IAAC,OAAO,EAAE,aAAa,CAAA;CAAE,EAAE,GAChE,OAAO,CAAC,IAAI,CAAC,CAef;AAED;;GAEG;AACH,wBAAsB,iBAAiB,CACtC,QAAQ,EAAE,MAAM,EAAE,GAChB,OAAO,CAAC,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CA4B9B;AAED;;GAEG;AACH,wBAAsB,MAAM,CAC3B,WAAW,EAAE,MAAM,EAAE,EACrB,OAAO,GAAE;IACR,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,MAAM,CAAC,EAAE;QACR,IAAI,CAAC,EAAE,eAAe,GAAG,eAAe,CAAC;QACzC,WAAW,CAAC,EAAE,MAAM,CAAC;KACrB,CAAC;CACG,GACJ,OAAO,CAAC,YAAY,EAAE,CAAC,CA6CzB;AAED;;GAEG;AACH,wBAAsB,kBAAkB,IAAI,OAAO,CAAC;IACnD,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,EAAE,MAAM,CAAC;CACf,CAAC,CAgBD"}
|
|
@@ -1,160 +0,0 @@
|
|
|
1
|
-
import { QdrantClient } from "@qdrant/js-client-rest";
|
|
2
|
-
import { RAG_CONFIG } from "./config.js";
|
|
3
|
-
let client = null;
|
|
4
|
-
/**
|
|
5
|
-
* Get or create Qdrant client
|
|
6
|
-
*/
|
|
7
|
-
export function getQdrantClient() {
|
|
8
|
-
if (!client) {
|
|
9
|
-
const url = process.env.QDRANT_URL;
|
|
10
|
-
const apiKey = process.env.QDRANT_API_KEY;
|
|
11
|
-
if (!url) {
|
|
12
|
-
throw new Error("QDRANT_URL environment variable is required");
|
|
13
|
-
}
|
|
14
|
-
client = new QdrantClient({
|
|
15
|
-
url,
|
|
16
|
-
apiKey,
|
|
17
|
-
});
|
|
18
|
-
}
|
|
19
|
-
return client;
|
|
20
|
-
}
|
|
21
|
-
/**
|
|
22
|
-
* Create collection if not exists
|
|
23
|
-
*/
|
|
24
|
-
export async function ensureCollection() {
|
|
25
|
-
const qdrant = getQdrantClient();
|
|
26
|
-
const { collectionName, vectorSize, distance } = RAG_CONFIG.qdrant;
|
|
27
|
-
const collections = await qdrant.getCollections();
|
|
28
|
-
const exists = collections.collections.some((c) => c.name === collectionName);
|
|
29
|
-
if (!exists) {
|
|
30
|
-
console.log(`Creating collection: ${collectionName}`);
|
|
31
|
-
await qdrant.createCollection(collectionName, {
|
|
32
|
-
vectors: {
|
|
33
|
-
size: vectorSize,
|
|
34
|
-
distance,
|
|
35
|
-
},
|
|
36
|
-
});
|
|
37
|
-
console.log(`Collection ${collectionName} created successfully.`);
|
|
38
|
-
}
|
|
39
|
-
else {
|
|
40
|
-
console.log(`Collection ${collectionName} already exists.`);
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
/**
|
|
44
|
-
* Upsert points to Qdrant
|
|
45
|
-
*/
|
|
46
|
-
export async function upsertPoints(points) {
|
|
47
|
-
const qdrant = getQdrantClient();
|
|
48
|
-
const { collectionName } = RAG_CONFIG.qdrant;
|
|
49
|
-
// Qdrant requires numeric or UUID IDs, so we hash the chunk_id
|
|
50
|
-
const qdrantPoints = points.map((p) => ({
|
|
51
|
-
id: hashToUint(p.id),
|
|
52
|
-
vector: p.vector,
|
|
53
|
-
payload: p.payload,
|
|
54
|
-
}));
|
|
55
|
-
await qdrant.upsert(collectionName, {
|
|
56
|
-
wait: true,
|
|
57
|
-
points: qdrantPoints,
|
|
58
|
-
});
|
|
59
|
-
}
|
|
60
|
-
/**
|
|
61
|
-
* Get existing content hashes for a set of chunk IDs
|
|
62
|
-
*/
|
|
63
|
-
export async function getExistingHashes(chunkIds) {
|
|
64
|
-
const qdrant = getQdrantClient();
|
|
65
|
-
const { collectionName } = RAG_CONFIG.qdrant;
|
|
66
|
-
const hashMap = new Map();
|
|
67
|
-
if (chunkIds.length === 0)
|
|
68
|
-
return hashMap;
|
|
69
|
-
// Convert chunk IDs to numeric IDs
|
|
70
|
-
const numericIds = chunkIds.map((id) => hashToUint(id));
|
|
71
|
-
try {
|
|
72
|
-
const result = await qdrant.retrieve(collectionName, {
|
|
73
|
-
ids: numericIds,
|
|
74
|
-
with_payload: ["chunk_id", "content_hash"],
|
|
75
|
-
});
|
|
76
|
-
for (const point of result) {
|
|
77
|
-
const payload = point.payload;
|
|
78
|
-
if (payload?.chunk_id && payload?.content_hash) {
|
|
79
|
-
hashMap.set(payload.chunk_id, payload.content_hash);
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
}
|
|
83
|
-
catch {
|
|
84
|
-
// Collection might not exist or be empty
|
|
85
|
-
}
|
|
86
|
-
return hashMap;
|
|
87
|
-
}
|
|
88
|
-
/**
|
|
89
|
-
* Search for similar documents
|
|
90
|
-
*/
|
|
91
|
-
export async function search(queryVector, options = {}) {
|
|
92
|
-
const qdrant = getQdrantClient();
|
|
93
|
-
const { collectionName } = RAG_CONFIG.qdrant;
|
|
94
|
-
const { topK = 10, filter } = options;
|
|
95
|
-
// Build filter conditions
|
|
96
|
-
const filterConditions = [];
|
|
97
|
-
if (filter?.type) {
|
|
98
|
-
filterConditions.push({
|
|
99
|
-
key: "type",
|
|
100
|
-
match: { value: filter.type },
|
|
101
|
-
});
|
|
102
|
-
}
|
|
103
|
-
if (filter?.top_heading) {
|
|
104
|
-
filterConditions.push({
|
|
105
|
-
key: "top_heading",
|
|
106
|
-
match: { value: filter.top_heading },
|
|
107
|
-
});
|
|
108
|
-
}
|
|
109
|
-
const result = await qdrant.search(collectionName, {
|
|
110
|
-
vector: queryVector,
|
|
111
|
-
limit: topK,
|
|
112
|
-
with_payload: true,
|
|
113
|
-
filter: filterConditions.length > 0 ? { must: filterConditions } : undefined,
|
|
114
|
-
});
|
|
115
|
-
return result.map((r) => {
|
|
116
|
-
const payload = r.payload;
|
|
117
|
-
return {
|
|
118
|
-
score: r.score,
|
|
119
|
-
text: payload.text,
|
|
120
|
-
title: payload.title,
|
|
121
|
-
url: payload.url,
|
|
122
|
-
heading_path: payload.heading_path,
|
|
123
|
-
type: payload.type,
|
|
124
|
-
doc_id: payload.doc_id,
|
|
125
|
-
};
|
|
126
|
-
});
|
|
127
|
-
}
|
|
128
|
-
/**
|
|
129
|
-
* Get collection stats
|
|
130
|
-
*/
|
|
131
|
-
export async function getCollectionStats() {
|
|
132
|
-
const qdrant = getQdrantClient();
|
|
133
|
-
const { collectionName } = RAG_CONFIG.qdrant;
|
|
134
|
-
try {
|
|
135
|
-
const info = await qdrant.getCollection(collectionName);
|
|
136
|
-
return {
|
|
137
|
-
pointsCount: info.points_count ?? 0,
|
|
138
|
-
status: info.status,
|
|
139
|
-
};
|
|
140
|
-
}
|
|
141
|
-
catch {
|
|
142
|
-
return {
|
|
143
|
-
pointsCount: 0,
|
|
144
|
-
status: "not_found",
|
|
145
|
-
};
|
|
146
|
-
}
|
|
147
|
-
}
|
|
148
|
-
/**
|
|
149
|
-
* Hash string to unsigned integer (for Qdrant point ID)
|
|
150
|
-
*/
|
|
151
|
-
function hashToUint(str) {
|
|
152
|
-
let hash = 0;
|
|
153
|
-
for (let i = 0; i < str.length; i++) {
|
|
154
|
-
const char = str.charCodeAt(i);
|
|
155
|
-
hash = (hash << 5) - hash + char;
|
|
156
|
-
hash = hash & hash; // Convert to 32bit integer
|
|
157
|
-
}
|
|
158
|
-
return Math.abs(hash);
|
|
159
|
-
}
|
|
160
|
-
//# sourceMappingURL=qdrant-client.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"qdrant-client.js","sourceRoot":"","sources":["../../src/rag/qdrant-client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,wBAAwB,CAAC;AACtD,OAAO,EAAsB,UAAU,EAAqB,MAAM,UAAU,CAAC;AAE7E,IAAI,MAAM,GAAwB,IAAI,CAAC;AAEvC;;GAEG;AACH,MAAM,UAAU,eAAe;IAC9B,IAAI,CAAC,MAAM,EAAE,CAAC;QACb,MAAM,GAAG,GAAG,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC;QACnC,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC;QAE1C,IAAI,CAAC,GAAG,EAAE,CAAC;YACV,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,CAAC;QAChE,CAAC;QAED,MAAM,GAAG,IAAI,YAAY,CAAC;YACzB,GAAG;YACH,MAAM;SACN,CAAC,CAAC;IACJ,CAAC;IAED,OAAO,MAAM,CAAC;AACf,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,gBAAgB;IACrC,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;IACjC,MAAM,EAAE,cAAc,EAAE,UAAU,EAAE,QAAQ,EAAE,GAAG,UAAU,CAAC,MAAM,CAAC;IAEnE,MAAM,WAAW,GAAG,MAAM,MAAM,CAAC,cAAc,EAAE,CAAC;IAClD,MAAM,MAAM,GAAG,WAAW,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,cAAc,CAAC,CAAC;IAE9E,IAAI,CAAC,MAAM,EAAE,CAAC;QACb,OAAO,CAAC,GAAG,CAAC,wBAAwB,cAAc,EAAE,CAAC,CAAC;QACtD,MAAM,MAAM,CAAC,gBAAgB,CAAC,cAAc,EAAE;YAC7C,OAAO,EAAE;gBACR,IAAI,EAAE,UAAU;gBAChB,QAAQ;aACR;SACD,CAAC,CAAC;QACH,OAAO,CAAC,GAAG,CAAC,cAAc,cAAc,wBAAwB,CAAC,CAAC;IACnE,CAAC;SAAM,CAAC;QACP,OAAO,CAAC,GAAG,CAAC,cAAc,cAAc,kBAAkB,CAAC,CAAC;IAC7D,CAAC;AACF,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,YAAY,CACjC,MAAkE;IAElE,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;IACjC,MAAM,EAAE,cAAc,EAAE,GAAG,UAAU,CAAC,MAAM,CAAC;IAE7C,+DAA+D;IAC/D,MAAM,YAAY,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACvC,EAAE,EAAE,UAAU,CAAC,CAAC,CAAC,EAAE,CAAC;QACpB,MAAM,EAAE,CAAC,CAAC,MAAM;QAChB,OAAO,EAAE,CAAC,CAAC,OAAO;KAClB,CAAC,CAAC,CAAC;IAEJ,MAAM,MAAM,CAAC,MAAM,CAAC,cAAc,EAAE;QACnC,IAAI,EAAE,IAAI;QACV,MAAM,EAAE,YAAY;KACpB,CAAC,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACtC,QAAkB;IAElB,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;IACjC,MAAM,EAAE,cAAc,EAAE,GAAG,UAAU,CAAC,MAAM,CAAC;IAE7C,MAAM,OAAO,GAAG,IAAI,GAAG,EAAkB,CAAC;IAE1C,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,OAAO,CAAC;IAE1C,mCAAmC;IACnC,MAAM,UAAU,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,CAAC;IAExD,IAAI,CAAC;QACJ,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,cAAc,EAAE;YACpD,GAAG,EAAE,UAAU;YACf,YAAY,EAAE,CAAC,UAAU,EAAE,cAAc,CAAC;SAC1C,CAAC,CAAC;QAEH,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC5B,MAAM,OAAO,GAAG,KAAK,CAAC,OAAwB,CAAC;YAC/C,IAAI,OAAO,EAAE,QAAQ,IAAI,OAAO,EAAE,YAAY,EAAE,CAAC;gBAChD,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,EAAE,OAAO,CAAC,YAAY,CAAC,CAAC;YACrD,CAAC;QACF,CAAC;IACF,CAAC;IAAC,MAAM,CAAC;QACR,yCAAyC;IAC1C,CAAC;IAED,OAAO,OAAO,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,MAAM,CAC3B,WAAqB,EACrB,UAMI,EAAE;IAEN,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;IACjC,MAAM,EAAE,cAAc,EAAE,GAAG,UAAU,CAAC,MAAM,CAAC;IAC7C,MAAM,EAAE,IAAI,GAAG,EAAE,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC;IAEtC,0BAA0B;IAC1B,MAAM,gBAAgB,GAGjB,EAAE,CAAC;IAER,IAAI,MAAM,EAAE,IAAI,EAAE,CAAC;QAClB,gBAAgB,CAAC,IAAI,CAAC;YACrB,GAAG,EAAE,MAAM;YACX,KAAK,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,IAAI,EAAE;SAC7B,CAAC,CAAC;IACJ,CAAC;IAED,IAAI,MAAM,EAAE,WAAW,EAAE,CAAC;QACzB,gBAAgB,CAAC,IAAI,CAAC;YACrB,GAAG,EAAE,aAAa;YAClB,KAAK,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,WAAW,EAAE;SACpC,CAAC,CAAC;IACJ,CAAC;IAED,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,MAAM,CAAC,cAAc,EAAE;QAClD,MAAM,EAAE,WAAW;QACnB,KAAK,EAAE,IAAI;QACX,YAAY,EAAE,IAAI;QAClB,MAAM,EACL,gBAAgB,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,gBAAgB,EAAE,CAAC,CAAC,CAAC,SAAS;KACrE,CAAC,CAAC;IAEH,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;QACvB,MAAM,OAAO,GAAG,CAAC,CAAC,OAAwB,CAAC;QAC3C,OAAO;YACN,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,IAAI,EAAE,OAAO,CAAC,IAAI;YAClB,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,GAAG,EAAE,OAAO,CAAC,GAAG;YAChB,YAAY,EAAE,OAAO,CAAC,YAAY;YAClC,IAAI,EAAE,OAAO,CAAC,IAAI;YAClB,MAAM,EAAE,OAAO,CAAC,MAAM;SACtB,CAAC;IACH,CAAC,CAAC,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,kBAAkB;IAIvC,MAAM,MAAM,GAAG,eAAe,EAAE,CAAC;IACjC,MAAM,EAAE,cAAc,EAAE,GAAG,UAAU,CAAC,MAAM,CAAC;IAE7C,IAAI,CAAC;QACJ,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,cAAc,CAAC,CAAC;QACxD,OAAO;YACN,WAAW,EAAE,IAAI,CAAC,YAAY,IAAI,CAAC;YACnC,MAAM,EAAE,IAAI,CAAC,MAAM;SACnB,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACR,OAAO;YACN,WAAW,EAAE,CAAC;YACd,MAAM,EAAE,WAAW;SACnB,CAAC;IACH,CAAC;AACF,CAAC;AAED;;GAEG;AACH,SAAS,UAAU,CAAC,GAAW;IAC9B,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACrC,MAAM,IAAI,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QAC/B,IAAI,GAAG,CAAC,IAAI,IAAI,CAAC,CAAC,GAAG,IAAI,GAAG,IAAI,CAAC;QACjC,IAAI,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC,2BAA2B;IAChD,CAAC;IACD,OAAO,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;AACvB,CAAC"}
|