@nahisaho/katashiro-rag 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/RAGEngine.d.ts +58 -0
- package/dist/RAGEngine.d.ts.map +1 -0
- package/dist/RAGEngine.js +97 -0
- package/dist/RAGEngine.js.map +1 -0
- package/dist/RAGPipeline.d.ts +162 -0
- package/dist/RAGPipeline.d.ts.map +1 -0
- package/dist/RAGPipeline.js +222 -0
- package/dist/RAGPipeline.js.map +1 -0
- package/dist/Retriever.d.ts +49 -0
- package/dist/Retriever.d.ts.map +1 -0
- package/dist/Retriever.js +96 -0
- package/dist/Retriever.js.map +1 -0
- package/dist/chunking/DocumentChunker.d.ts +47 -0
- package/dist/chunking/DocumentChunker.d.ts.map +1 -0
- package/dist/chunking/DocumentChunker.js +171 -0
- package/dist/chunking/DocumentChunker.js.map +1 -0
- package/dist/chunking/index.d.ts +5 -0
- package/dist/chunking/index.d.ts.map +1 -0
- package/dist/chunking/index.js +5 -0
- package/dist/chunking/index.js.map +1 -0
- package/dist/embedding/AzureOpenAIEmbeddingProvider.d.ts +63 -0
- package/dist/embedding/AzureOpenAIEmbeddingProvider.d.ts.map +1 -0
- package/dist/embedding/AzureOpenAIEmbeddingProvider.js +133 -0
- package/dist/embedding/AzureOpenAIEmbeddingProvider.js.map +1 -0
- package/dist/embedding/BaseEmbeddingProvider.d.ts +43 -0
- package/dist/embedding/BaseEmbeddingProvider.d.ts.map +1 -0
- package/dist/embedding/BaseEmbeddingProvider.js +98 -0
- package/dist/embedding/BaseEmbeddingProvider.js.map +1 -0
- package/dist/embedding/EmbeddingFactory.d.ts +75 -0
- package/dist/embedding/EmbeddingFactory.d.ts.map +1 -0
- package/dist/embedding/EmbeddingFactory.js +153 -0
- package/dist/embedding/EmbeddingFactory.js.map +1 -0
- package/dist/embedding/EmbeddingManager.d.ts +41 -0
- package/dist/embedding/EmbeddingManager.d.ts.map +1 -0
- package/dist/embedding/EmbeddingManager.js +93 -0
- package/dist/embedding/EmbeddingManager.js.map +1 -0
- package/dist/embedding/MockEmbeddingProvider.d.ts +54 -0
- package/dist/embedding/MockEmbeddingProvider.d.ts.map +1 -0
- package/dist/embedding/MockEmbeddingProvider.js +91 -0
- package/dist/embedding/MockEmbeddingProvider.js.map +1 -0
- package/dist/embedding/OllamaEmbeddingProvider.d.ts +69 -0
- package/dist/embedding/OllamaEmbeddingProvider.d.ts.map +1 -0
- package/dist/embedding/OllamaEmbeddingProvider.js +136 -0
- package/dist/embedding/OllamaEmbeddingProvider.js.map +1 -0
- package/dist/embedding/OpenAIEmbeddingProvider.d.ts +83 -0
- package/dist/embedding/OpenAIEmbeddingProvider.d.ts.map +1 -0
- package/dist/embedding/OpenAIEmbeddingProvider.js +150 -0
- package/dist/embedding/OpenAIEmbeddingProvider.js.map +1 -0
- package/dist/embedding/index.d.ts +16 -0
- package/dist/embedding/index.d.ts.map +1 -0
- package/dist/embedding/index.js +15 -0
- package/dist/embedding/index.js.map +1 -0
- package/dist/index.d.ts +20 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +22 -0
- package/dist/index.js.map +1 -0
- package/dist/reranking/LLMReranker.d.ts +147 -0
- package/dist/reranking/LLMReranker.d.ts.map +1 -0
- package/dist/reranking/LLMReranker.js +262 -0
- package/dist/reranking/LLMReranker.js.map +1 -0
- package/dist/reranking/index.d.ts +7 -0
- package/dist/reranking/index.d.ts.map +1 -0
- package/dist/reranking/index.js +7 -0
- package/dist/reranking/index.js.map +1 -0
- package/dist/types.d.ts +144 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +8 -0
- package/dist/types.js.map +1 -0
- package/dist/vectordb/FileVectorStore.d.ts +93 -0
- package/dist/vectordb/FileVectorStore.d.ts.map +1 -0
- package/dist/vectordb/FileVectorStore.js +218 -0
- package/dist/vectordb/FileVectorStore.js.map +1 -0
- package/dist/vectordb/InMemoryVectorStore.d.ts +48 -0
- package/dist/vectordb/InMemoryVectorStore.d.ts.map +1 -0
- package/dist/vectordb/InMemoryVectorStore.js +86 -0
- package/dist/vectordb/InMemoryVectorStore.js.map +1 -0
- package/dist/vectordb/index.d.ts +8 -0
- package/dist/vectordb/index.d.ts.map +1 -0
- package/dist/vectordb/index.js +6 -0
- package/dist/vectordb/index.js.map +1 -0
- package/package.json +37 -0
- package/src/RAGEngine.ts +127 -0
- package/src/RAGPipeline.ts +357 -0
- package/src/Retriever.ts +121 -0
- package/src/chunking/DocumentChunker.ts +207 -0
- package/src/chunking/index.ts +5 -0
- package/src/embedding/AzureOpenAIEmbeddingProvider.ts +208 -0
- package/src/embedding/BaseEmbeddingProvider.ts +133 -0
- package/src/embedding/EmbeddingFactory.ts +225 -0
- package/src/embedding/EmbeddingManager.ts +110 -0
- package/src/embedding/MockEmbeddingProvider.ts +123 -0
- package/src/embedding/OllamaEmbeddingProvider.ts +197 -0
- package/src/embedding/OpenAIEmbeddingProvider.ts +226 -0
- package/src/embedding/index.ts +33 -0
- package/src/index.ts +55 -0
- package/src/reranking/LLMReranker.ts +401 -0
- package/src/reranking/index.ts +15 -0
- package/src/types.ts +157 -0
- package/src/vectordb/FileVectorStore.ts +289 -0
- package/src/vectordb/InMemoryVectorStore.ts +121 -0
- package/src/vectordb/index.ts +9 -0
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Retriever - 検索エンジン
|
|
3
|
+
*
|
|
4
|
+
* @requirement REQ-RAG-004
|
|
5
|
+
* @design DES-KATASHIRO-003-RAG §3.4
|
|
6
|
+
*/
|
|
7
|
+
import { EmbeddingManager } from './embedding/EmbeddingManager.js';
|
|
8
|
+
/**
|
|
9
|
+
* デフォルト検索設定
|
|
10
|
+
*/
|
|
11
|
+
const DEFAULT_CONFIG = {
|
|
12
|
+
topK: 5,
|
|
13
|
+
minScore: 0.5,
|
|
14
|
+
};
|
|
15
|
+
/**
|
|
16
|
+
* 検索エンジン
|
|
17
|
+
* Embedding生成とVector検索を統合
|
|
18
|
+
*/
|
|
19
|
+
export class Retriever {
|
|
20
|
+
embeddingManager;
|
|
21
|
+
vectorStore;
|
|
22
|
+
config;
|
|
23
|
+
constructor(embeddingProvider, vectorStore, config = {}) {
|
|
24
|
+
this.embeddingManager = new EmbeddingManager(embeddingProvider);
|
|
25
|
+
this.vectorStore = vectorStore;
|
|
26
|
+
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* ドキュメントをインデックスに追加
|
|
30
|
+
*/
|
|
31
|
+
async addDocument(_document, chunks) {
|
|
32
|
+
const items = await Promise.all(chunks.map(async (chunk) => ({
|
|
33
|
+
chunk,
|
|
34
|
+
vector: await this.embeddingManager.embed(chunk.content),
|
|
35
|
+
})));
|
|
36
|
+
await this.vectorStore.addBatch(items);
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* 複数ドキュメントをバッチでインデックスに追加
|
|
40
|
+
*/
|
|
41
|
+
async addDocuments(documents) {
|
|
42
|
+
for (const { chunks } of documents) {
|
|
43
|
+
const items = await Promise.all(chunks.map(async (chunk) => ({
|
|
44
|
+
chunk,
|
|
45
|
+
vector: await this.embeddingManager.embed(chunk.content),
|
|
46
|
+
})));
|
|
47
|
+
await this.vectorStore.addBatch(items);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
/**
|
|
51
|
+
* クエリで検索
|
|
52
|
+
*/
|
|
53
|
+
async search(query) {
|
|
54
|
+
const queryVector = await this.embeddingManager.embed(query);
|
|
55
|
+
const results = await this.vectorStore.search(queryVector, this.config.topK);
|
|
56
|
+
// minScoreでフィルタリング
|
|
57
|
+
return results.filter((r) => r.score >= this.config.minScore);
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* 複数クエリで検索(結果をマージ)
|
|
61
|
+
*/
|
|
62
|
+
async searchMultiple(queries) {
|
|
63
|
+
const allResults = [];
|
|
64
|
+
const seen = new Set();
|
|
65
|
+
for (const query of queries) {
|
|
66
|
+
const results = await this.search(query);
|
|
67
|
+
for (const result of results) {
|
|
68
|
+
if (!seen.has(result.chunk.id)) {
|
|
69
|
+
seen.add(result.chunk.id);
|
|
70
|
+
allResults.push(result);
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
// スコア降順でソートして返却
|
|
75
|
+
return allResults.sort((a, b) => b.score - a.score).slice(0, this.config.topK);
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* チャンクを削除
|
|
79
|
+
*/
|
|
80
|
+
async deleteChunk(chunkId) {
|
|
81
|
+
return this.vectorStore.delete(chunkId);
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* 検索設定を取得
|
|
85
|
+
*/
|
|
86
|
+
getConfig() {
|
|
87
|
+
return { ...this.config };
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* 検索設定を更新
|
|
91
|
+
*/
|
|
92
|
+
updateConfig(config) {
|
|
93
|
+
this.config = { ...this.config, ...config };
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
//# sourceMappingURL=Retriever.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"Retriever.js","sourceRoot":"","sources":["../src/Retriever.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,gBAAgB,EAAE,MAAM,iCAAiC,CAAC;AAGnE;;GAEG;AACH,MAAM,cAAc,GAA8B;IAChD,IAAI,EAAE,CAAC;IACP,QAAQ,EAAE,GAAG;CACd,CAAC;AAEF;;;GAGG;AACH,MAAM,OAAO,SAAS;IACZ,gBAAgB,CAAmB;IACnC,WAAW,CAAc;IACzB,MAAM,CAA4B;IAE1C,YACE,iBAAoC,EACpC,WAAwB,EACxB,SAA0B,EAAE;QAE5B,IAAI,CAAC,gBAAgB,GAAG,IAAI,gBAAgB,CAAC,iBAAiB,CAAC,CAAC;QAChE,IAAI,CAAC,WAAW,GAAG,WAAW,CAAC;QAC/B,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,cAAc,EAAE,GAAG,MAAM,EAAE,CAAC;IACjD,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,WAAW,CAAC,SAAmB,EAAE,MAAe;QACpD,MAAM,KAAK,GAAG,MAAM,OAAO,CAAC,GAAG,CAC7B,MAAM,CAAC,GAAG,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC;YAC3B,KAAK;YACL,MAAM,EAAE,MAAM,IAAI,CAAC,gBAAgB,CAAC,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC;SACzD,CAAC,CAAC,CACJ,CAAC;QAEF,MAAM,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;IACzC,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,YAAY,CAAC,SAAyD;QAC1E,KAAK,MAAM,EAAE,MAAM,EAAE,IAAI,SAAS,EAAE,CAAC;YACnC,MAAM,KAAK,GAAG,MAAM,OAAO,CAAC,GAAG,CAC7B,MAAM,CAAC,GAAG,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC;gBAC3B,KAAK;gBACL,MAAM,EAAE,MAAM,IAAI,CAAC,gBAAgB,CAAC,KAAK,CAAC,KAAK,CAAC,OAAO,CAAC;aACzD,CAAC,CAAC,CACJ,CAAC;YAEF,MAAM,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;QACzC,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,MAAM,CAAC,KAAa;QACxB,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAC7D,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,WAAW,EAAE,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QAE7E,mBAAmB;QACnB,OAAO,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;IAChE,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,cAAc,CAAC,OAAiB;QACpC,MAAM,UAAU,GAAmB,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;QAE/B,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YAEzC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;gBAC7B,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,EAAE,CAAC;oBAC/B,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;oBAC1B,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBAC1B,CAAC;YACH,CAAC;QACH,CAAC;QAED,gBAAgB;QAChB,OAAO,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IACjF,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,WAAW,CAAC,OAAe;QAC/B,OAAO,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAC1C,CAAC;IAED;;OAEG;IACH,SAAS;QACP,OAAO,EAAE,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;IAC5B,CAAC;IAED;;OAEG;IACH,YAAY,CAAC,MAAgC;QAC3C,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,IAAI,CAAC,MAAM,EAAE,GAAG,MAAM,EAAE,CAAC;IAC9C,CAAC;CACF"}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Document Chunker - ドキュメント分割
|
|
3
|
+
*
|
|
4
|
+
* @requirement REQ-RAG-003
|
|
5
|
+
* @design DES-KATASHIRO-003-RAG §3.3
|
|
6
|
+
*/
|
|
7
|
+
import type { Chunk, ChunkingConfig, Document } from '../types.js';
|
|
8
|
+
/**
|
|
9
|
+
* ドキュメントをチャンクに分割するクラス
|
|
10
|
+
*/
|
|
11
|
+
export declare class DocumentChunker {
|
|
12
|
+
private config;
|
|
13
|
+
constructor(config?: ChunkingConfig);
|
|
14
|
+
/**
|
|
15
|
+
* ドキュメントをチャンクに分割
|
|
16
|
+
*/
|
|
17
|
+
chunk(document: Document): Chunk[];
|
|
18
|
+
/**
|
|
19
|
+
* 複数ドキュメントをチャンクに分割
|
|
20
|
+
*/
|
|
21
|
+
chunkBatch(documents: Document[]): Chunk[];
|
|
22
|
+
/**
|
|
23
|
+
* 固定サイズでチャンク分割
|
|
24
|
+
*/
|
|
25
|
+
private chunkByFixed;
|
|
26
|
+
/**
|
|
27
|
+
* 文単位でチャンク分割
|
|
28
|
+
*/
|
|
29
|
+
private chunkBySentence;
|
|
30
|
+
/**
|
|
31
|
+
* 段落単位でチャンク分割
|
|
32
|
+
*/
|
|
33
|
+
private chunkByParagraph;
|
|
34
|
+
/**
|
|
35
|
+
* 文に分割
|
|
36
|
+
*/
|
|
37
|
+
private splitBySentence;
|
|
38
|
+
/**
|
|
39
|
+
* オーバーラップテキストを取得
|
|
40
|
+
*/
|
|
41
|
+
private getOverlapText;
|
|
42
|
+
/**
|
|
43
|
+
* チャンクを作成
|
|
44
|
+
*/
|
|
45
|
+
private createChunk;
|
|
46
|
+
}
|
|
47
|
+
//# sourceMappingURL=DocumentChunker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"DocumentChunker.d.ts","sourceRoot":"","sources":["../../src/chunking/DocumentChunker.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAYnE;;GAEG;AACH,qBAAa,eAAe;IAC1B,OAAO,CAAC,MAAM,CAA2B;gBAE7B,MAAM,GAAE,cAAmB;IAIvC;;OAEG;IACH,KAAK,CAAC,QAAQ,EAAE,QAAQ,GAAG,KAAK,EAAE;IAclC;;OAEG;IACH,UAAU,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,KAAK,EAAE;IAI1C;;OAEG;IACH,OAAO,CAAC,YAAY;IAuBpB;;OAEG;IACH,OAAO,CAAC,eAAe;IAmCvB;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAoCxB;;OAEG;IACH,OAAO,CAAC,eAAe;IAqBvB;;OAEG;IACH,OAAO,CAAC,cAAc;IAOtB;;OAEG;IACH,OAAO,CAAC,WAAW;CAapB"}
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Document Chunker - ドキュメント分割
|
|
3
|
+
*
|
|
4
|
+
* @requirement REQ-RAG-003
|
|
5
|
+
* @design DES-KATASHIRO-003-RAG §3.3
|
|
6
|
+
*/
|
|
7
|
+
/**
|
|
8
|
+
* デフォルトチャンキング設定
|
|
9
|
+
*/
|
|
10
|
+
const DEFAULT_CONFIG = {
|
|
11
|
+
strategy: 'fixed',
|
|
12
|
+
chunkSize: 512,
|
|
13
|
+
chunkOverlap: 64,
|
|
14
|
+
separators: ['\n\n', '\n', '. ', ' '],
|
|
15
|
+
};
|
|
16
|
+
/**
|
|
17
|
+
* ドキュメントをチャンクに分割するクラス
|
|
18
|
+
*/
|
|
19
|
+
export class DocumentChunker {
|
|
20
|
+
config;
|
|
21
|
+
constructor(config = {}) {
|
|
22
|
+
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* ドキュメントをチャンクに分割
|
|
26
|
+
*/
|
|
27
|
+
chunk(document) {
|
|
28
|
+
const text = document.content;
|
|
29
|
+
switch (this.config.strategy) {
|
|
30
|
+
case 'sentence':
|
|
31
|
+
return this.chunkBySentence(document, text);
|
|
32
|
+
case 'paragraph':
|
|
33
|
+
return this.chunkByParagraph(document, text);
|
|
34
|
+
case 'fixed':
|
|
35
|
+
default:
|
|
36
|
+
return this.chunkByFixed(document, text);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* 複数ドキュメントをチャンクに分割
|
|
41
|
+
*/
|
|
42
|
+
chunkBatch(documents) {
|
|
43
|
+
return documents.flatMap((doc) => this.chunk(doc));
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* 固定サイズでチャンク分割
|
|
47
|
+
*/
|
|
48
|
+
chunkByFixed(document, text) {
|
|
49
|
+
const chunks = [];
|
|
50
|
+
const { chunkSize, chunkOverlap } = this.config;
|
|
51
|
+
// strideが最低でも1になるように保証(無限ループ防止)
|
|
52
|
+
const stride = Math.max(1, chunkSize - chunkOverlap);
|
|
53
|
+
let position = 0;
|
|
54
|
+
let index = 0;
|
|
55
|
+
while (position < text.length) {
|
|
56
|
+
const content = text.slice(position, position + chunkSize);
|
|
57
|
+
if (content.trim().length > 0) {
|
|
58
|
+
chunks.push(this.createChunk(document, content, index, position));
|
|
59
|
+
index++;
|
|
60
|
+
}
|
|
61
|
+
position += stride;
|
|
62
|
+
}
|
|
63
|
+
return chunks;
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* 文単位でチャンク分割
|
|
67
|
+
*/
|
|
68
|
+
chunkBySentence(document, text) {
|
|
69
|
+
const chunks = [];
|
|
70
|
+
const { chunkSize, chunkOverlap } = this.config;
|
|
71
|
+
// 文に分割(句点またはピリオドで分割)
|
|
72
|
+
const sentences = this.splitBySentence(text);
|
|
73
|
+
let currentChunk = '';
|
|
74
|
+
let currentPosition = 0;
|
|
75
|
+
let index = 0;
|
|
76
|
+
let overlapBuffer = '';
|
|
77
|
+
for (const sentence of sentences) {
|
|
78
|
+
// チャンクサイズを超える場合
|
|
79
|
+
if (currentChunk.length + sentence.length > chunkSize && currentChunk.length > 0) {
|
|
80
|
+
chunks.push(this.createChunk(document, currentChunk.trim(), index, currentPosition));
|
|
81
|
+
index++;
|
|
82
|
+
// オーバーラップ用のバッファを保持
|
|
83
|
+
overlapBuffer = this.getOverlapText(currentChunk, chunkOverlap);
|
|
84
|
+
currentPosition += currentChunk.length - overlapBuffer.length;
|
|
85
|
+
currentChunk = overlapBuffer;
|
|
86
|
+
}
|
|
87
|
+
currentChunk += sentence;
|
|
88
|
+
}
|
|
89
|
+
// 残りを追加
|
|
90
|
+
if (currentChunk.trim().length > 0) {
|
|
91
|
+
chunks.push(this.createChunk(document, currentChunk.trim(), index, currentPosition));
|
|
92
|
+
}
|
|
93
|
+
return chunks;
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* 段落単位でチャンク分割
|
|
97
|
+
*/
|
|
98
|
+
chunkByParagraph(document, text) {
|
|
99
|
+
const chunks = [];
|
|
100
|
+
const { chunkSize, chunkOverlap } = this.config;
|
|
101
|
+
// 段落に分割
|
|
102
|
+
const paragraphs = text.split(/\n\s*\n/).filter((p) => p.trim().length > 0);
|
|
103
|
+
let currentChunk = '';
|
|
104
|
+
let currentPosition = 0;
|
|
105
|
+
let index = 0;
|
|
106
|
+
let overlapBuffer = '';
|
|
107
|
+
for (const paragraph of paragraphs) {
|
|
108
|
+
const paragraphWithSeparator = paragraph + '\n\n';
|
|
109
|
+
// チャンクサイズを超える場合
|
|
110
|
+
if (currentChunk.length + paragraphWithSeparator.length > chunkSize && currentChunk.length > 0) {
|
|
111
|
+
chunks.push(this.createChunk(document, currentChunk.trim(), index, currentPosition));
|
|
112
|
+
index++;
|
|
113
|
+
overlapBuffer = this.getOverlapText(currentChunk, chunkOverlap);
|
|
114
|
+
currentPosition += currentChunk.length - overlapBuffer.length;
|
|
115
|
+
currentChunk = overlapBuffer;
|
|
116
|
+
}
|
|
117
|
+
currentChunk += paragraphWithSeparator;
|
|
118
|
+
}
|
|
119
|
+
// 残りを追加
|
|
120
|
+
if (currentChunk.trim().length > 0) {
|
|
121
|
+
chunks.push(this.createChunk(document, currentChunk.trim(), index, currentPosition));
|
|
122
|
+
}
|
|
123
|
+
return chunks;
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* 文に分割
|
|
127
|
+
*/
|
|
128
|
+
splitBySentence(text) {
|
|
129
|
+
// 日本語と英語の両方に対応
|
|
130
|
+
const pattern = /([^。.!?!?]+[。.!?!?]+\s*)/g;
|
|
131
|
+
const matches = text.match(pattern);
|
|
132
|
+
if (!matches) {
|
|
133
|
+
return [text];
|
|
134
|
+
}
|
|
135
|
+
// マッチしなかった残りを追加
|
|
136
|
+
const matched = matches.join('');
|
|
137
|
+
if (matched.length < text.length) {
|
|
138
|
+
const remaining = text.slice(matched.length);
|
|
139
|
+
if (remaining.trim().length > 0) {
|
|
140
|
+
matches.push(remaining);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
return matches;
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* オーバーラップテキストを取得
|
|
147
|
+
*/
|
|
148
|
+
getOverlapText(text, overlapSize) {
|
|
149
|
+
if (text.length <= overlapSize) {
|
|
150
|
+
return text;
|
|
151
|
+
}
|
|
152
|
+
return text.slice(-overlapSize);
|
|
153
|
+
}
|
|
154
|
+
/**
|
|
155
|
+
* チャンクを作成
|
|
156
|
+
*/
|
|
157
|
+
createChunk(document, content, index, position) {
|
|
158
|
+
return {
|
|
159
|
+
id: `${document.id}_chunk_${index}`,
|
|
160
|
+
documentId: document.id,
|
|
161
|
+
content,
|
|
162
|
+
metadata: {
|
|
163
|
+
...document.metadata,
|
|
164
|
+
chunkIndex: index,
|
|
165
|
+
startPosition: position,
|
|
166
|
+
endPosition: position + content.length,
|
|
167
|
+
},
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
//# sourceMappingURL=DocumentChunker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"DocumentChunker.js","sourceRoot":"","sources":["../../src/chunking/DocumentChunker.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAIH;;GAEG;AACH,MAAM,cAAc,GAA6B;IAC/C,QAAQ,EAAE,OAAO;IACjB,SAAS,EAAE,GAAG;IACd,YAAY,EAAE,EAAE;IAChB,UAAU,EAAE,CAAC,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,GAAG,CAAC;CACtC,CAAC;AAEF;;GAEG;AACH,MAAM,OAAO,eAAe;IAClB,MAAM,CAA2B;IAEzC,YAAY,SAAyB,EAAE;QACrC,IAAI,CAAC,MAAM,GAAG,EAAE,GAAG,cAAc,EAAE,GAAG,MAAM,EAAE,CAAC;IACjD,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,QAAkB;QACtB,MAAM,IAAI,GAAG,QAAQ,CAAC,OAAO,CAAC;QAE9B,QAAQ,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC;YAC7B,KAAK,UAAU;gBACb,OAAO,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;YAC9C,KAAK,WAAW;gBACd,OAAO,IAAI,CAAC,gBAAgB,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;YAC/C,KAAK,OAAO,CAAC;YACb;gBACE,OAAO,IAAI,CAAC,YAAY,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;QAC7C,CAAC;IACH,CAAC;IAED;;OAEG;IACH,UAAU,CAAC,SAAqB;QAC9B,OAAO,SAAS,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC;IACrD,CAAC;IAED;;OAEG;IACK,YAAY,CAAC,QAAkB,EAAE,IAAY;QACnD,MAAM,MAAM,GAAY,EAAE,CAAC;QAC3B,MAAM,EAAE,SAAS,EAAE,YAAY,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC;QAChD,gCAAgC;QAChC,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,SAAS,GAAG,YAAY,CAAC,CAAC;QAErD,IAAI,QAAQ,GAAG,CAAC,CAAC;QACjB,IAAI,KAAK,GAAG,CAAC,CAAC;QAEd,OAAO,QAAQ,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;YAC9B,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,EAAE,QAAQ,GAAG,SAAS,CAAC,CAAC;YAE3D,IAAI,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC9B,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAC;gBAClE,KAAK,EAAE,CAAC;YACV,CAAC;YAED,QAAQ,IAAI,MAAM,CAAC;QACrB,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,eAAe,CAAC,QAAkB,EAAE,IAAY;QACtD,MAAM,MAAM,GAAY,EAAE,CAAC;QAC3B,MAAM,EAAE,SAAS,EAAE,YAAY,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC;QAEhD,qBAAqB;QACrB,MAAM,SAAS,GAAG,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,CAAC;QAE7C,IAAI,YAAY,GAAG,EAAE,CAAC;QACtB,IAAI,eAAe,GAAG,CAAC,CAAC;QACxB,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,aAAa,GAAG,EAAE,CAAC;QAEvB,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;YACjC,gBAAgB;YAChB,IAAI,YAAY,CAAC,MAAM,GAAG,QAAQ,CAAC,MAAM,GAAG,SAAS,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACjF,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,QAAQ,EAAE,YAAY,CAAC,IAAI,EAAE,EAAE,KAAK,EAAE,eAAe,CAAC,CAAC,CAAC;gBACrF,KAAK,EAAE,CAAC;gBAER,mBAAmB;gBACnB,aAAa,GAAG,IAAI,CAAC,cAAc,CAAC,YAAY,EAAE,YAAY,CAAC,CAAC;gBAChE,eAAe,IAAI,YAAY,CAAC,MAAM,GAAG,aAAa,CAAC,MAAM,CAAC;gBAC9D,YAAY,GAAG,aAAa,CAAC;YAC/B,CAAC;YAED,YAAY,IAAI,QAAQ,CAAC;QAC3B,CAAC;QAED,QAAQ;QACR,IAAI,YAAY,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACnC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,QAAQ,EAAE,YAAY,CAAC,IAAI,EAAE,EAAE,KAAK,EAAE,eAAe,CAAC,CAAC,CAAC;QACvF,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,gBAAgB,CAAC,QAAkB,EAAE,IAAY;QACvD,MAAM,MAAM,GAAY,EAAE,CAAC;QAC3B,MAAM,EAAE,SAAS,EAAE,YAAY,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC;QAEhD,QAAQ;QACR,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAE5E,IAAI,YAAY,GAAG,EAAE,CAAC;QACtB,IAAI,eAAe,GAAG,CAAC,CAAC;QACxB,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,aAAa,GAAG,EAAE,CAAC;QAEvB,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;YACnC,MAAM,sBAAsB,GAAG,SAAS,GAAG,MAAM,CAAC;YAElD,gBAAgB;YAChB,IAAI,YAAY,CAAC,MAAM,GAAG,sBAAsB,CAAC,MAAM,GAAG,SAAS,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC/F,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,QAAQ,EAAE,YAAY,CAAC,IAAI,EAAE,EAAE,KAAK,EAAE,eAAe,CAAC,CAAC,CAAC;gBACrF,KAAK,EAAE,CAAC;gBAER,aAAa,GAAG,IAAI,CAAC,cAAc,CAAC,YAAY,EAAE,YAAY,CAAC,CAAC;gBAChE,eAAe,IAAI,YAAY,CAAC,MAAM,GAAG,aAAa,CAAC,MAAM,CAAC;gBAC9D,YAAY,GAAG,aAAa,CAAC;YAC/B,CAAC;YAED,YAAY,IAAI,sBAAsB,CAAC;QACzC,CAAC;QAED,QAAQ;QACR,IAAI,YAAY,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACnC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,CAAC,QAAQ,EAAE,YAAY,CAAC,IAAI,EAAE,EAAE,KAAK,EAAE,eAAe,CAAC,CAAC,CAAC;QACvF,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,eAAe,CAAC,IAAY;QAClC,eAAe;QACf,MAAM,OAAO,GAAG,2BAA2B,CAAC;QAC5C,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAEpC,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,OAAO,CAAC,IAAI,CAAC,CAAC;QAChB,CAAC;QAED,gBAAgB;QAChB,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACjC,IAAI,OAAO,CAAC,MAAM,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;YACjC,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;YAC7C,IAAI,SAAS,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAChC,OAAO,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YAC1B,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,IAAY,EAAE,WAAmB;QACtD,IAAI,IAAI,CAAC,MAAM,IAAI,WAAW,EAAE,CAAC;YAC/B,OAAO,IAAI,CAAC;QACd,CAAC;QACD,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,WAAW,CAAC,CAAC;IAClC,CAAC;IAED;;OAEG;IACK,WAAW,CAAC,QAAkB,EAAE,OAAe,EAAE,KAAa,EAAE,QAAgB;QACtF,OAAO;YACL,EAAE,EAAE,GAAG,QAAQ,CAAC,EAAE,UAAU,KAAK,EAAE;YACnC,UAAU,EAAE,QAAQ,CAAC,EAAE;YACvB,OAAO;YACP,QAAQ,EAAE;gBACR,GAAG,QAAQ,CAAC,QAAQ;gBACpB,UAAU,EAAE,KAAK;gBACjB,aAAa,EAAE,QAAQ;gBACvB,WAAW,EAAE,QAAQ,GAAG,OAAO,CAAC,MAAM;aACvC;SACF,CAAC;IACJ,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/chunking/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/chunking/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC"}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Azure OpenAI Embedding Provider
|
|
3
|
+
*
|
|
4
|
+
* Azure OpenAI Service embedding provider
|
|
5
|
+
*
|
|
6
|
+
* @requirement REQ-RAG-001
|
|
7
|
+
* @design DES-KATASHIRO-003-RAG
|
|
8
|
+
*/
|
|
9
|
+
import { BaseEmbeddingProvider } from './BaseEmbeddingProvider.js';
|
|
10
|
+
import type { EmbeddingConfig } from '../types.js';
|
|
11
|
+
/**
|
|
12
|
+
* Azure OpenAI Embedding設定
|
|
13
|
+
*/
|
|
14
|
+
export interface AzureOpenAIEmbeddingConfig extends EmbeddingConfig {
|
|
15
|
+
/** Azure OpenAI エンドポイント */
|
|
16
|
+
endpoint?: string;
|
|
17
|
+
/** APIキー */
|
|
18
|
+
apiKey?: string;
|
|
19
|
+
/** デプロイメント名 */
|
|
20
|
+
deploymentName?: string;
|
|
21
|
+
/** APIバージョン */
|
|
22
|
+
apiVersion?: string;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Azure OpenAI Embeddingプロバイダー
|
|
26
|
+
*
|
|
27
|
+
* Azure OpenAI Serviceを使用した埋め込み生成
|
|
28
|
+
*
|
|
29
|
+
* @example
|
|
30
|
+
* ```typescript
|
|
31
|
+
* const provider = new AzureOpenAIEmbeddingProvider({
|
|
32
|
+
* endpoint: 'https://your-resource.openai.azure.com',
|
|
33
|
+
* apiKey: process.env.AZURE_OPENAI_API_KEY,
|
|
34
|
+
* deploymentName: 'text-embedding-ada-002',
|
|
35
|
+
* apiVersion: '2024-02-15-preview',
|
|
36
|
+
* });
|
|
37
|
+
*
|
|
38
|
+
* const embedding = await provider.embed('Hello, world!');
|
|
39
|
+
* ```
|
|
40
|
+
*/
|
|
41
|
+
export declare class AzureOpenAIEmbeddingProvider extends BaseEmbeddingProvider {
|
|
42
|
+
readonly name = "azure-openai";
|
|
43
|
+
private readonly endpoint;
|
|
44
|
+
private readonly apiKey;
|
|
45
|
+
private readonly deploymentName;
|
|
46
|
+
private readonly apiVersion;
|
|
47
|
+
private _dimensions;
|
|
48
|
+
get dimensions(): number;
|
|
49
|
+
constructor(config?: AzureOpenAIEmbeddingConfig);
|
|
50
|
+
/**
|
|
51
|
+
* 単一テキストの埋め込み生成
|
|
52
|
+
*/
|
|
53
|
+
embed(text: string): Promise<number[]>;
|
|
54
|
+
/**
|
|
55
|
+
* バッチ埋め込み生成(Azure OpenAI固有実装)
|
|
56
|
+
*/
|
|
57
|
+
protected embedBatchInternal(texts: string[]): Promise<number[][]>;
|
|
58
|
+
/**
|
|
59
|
+
* デプロイメント一覧取得
|
|
60
|
+
*/
|
|
61
|
+
listDeployments(): Promise<string[]>;
|
|
62
|
+
}
|
|
63
|
+
//# sourceMappingURL=AzureOpenAIEmbeddingProvider.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AzureOpenAIEmbeddingProvider.d.ts","sourceRoot":"","sources":["../../src/embedding/AzureOpenAIEmbeddingProvider.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,qBAAqB,EAAE,MAAM,4BAA4B,CAAC;AACnE,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAEnD;;GAEG;AACH,MAAM,WAAW,0BAA2B,SAAQ,eAAe;IACjE,2BAA2B;IAC3B,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY;IACZ,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,eAAe;IACf,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,eAAe;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAmBD;;;;;;;;;;;;;;;;GAgBG;AACH,qBAAa,4BAA6B,SAAQ,qBAAqB;IACrE,QAAQ,CAAC,IAAI,kBAAkB;IAE/B,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;IACxC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAS;IACpC,OAAO,CAAC,WAAW,CAAS;IAE5B,IAAI,UAAU,IAAI,MAAM,CAEvB;gBAEW,MAAM,GAAE,0BAA+B;IA8BnD;;OAEG;IACG,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAS5C;;OAEG;cACsB,kBAAkB,CACzC,KAAK,EAAE,MAAM,EAAE,GACd,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAgDtB;;OAEG;IACG,eAAe,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;CAoC3C"}
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Azure OpenAI Embedding Provider
|
|
3
|
+
*
|
|
4
|
+
* Azure OpenAI Service embedding provider
|
|
5
|
+
*
|
|
6
|
+
* @requirement REQ-RAG-001
|
|
7
|
+
* @design DES-KATASHIRO-003-RAG
|
|
8
|
+
*/
|
|
9
|
+
import { BaseEmbeddingProvider } from './BaseEmbeddingProvider.js';
|
|
10
|
+
/**
|
|
11
|
+
* Azure OpenAI Embeddingプロバイダー
|
|
12
|
+
*
|
|
13
|
+
* Azure OpenAI Serviceを使用した埋め込み生成
|
|
14
|
+
*
|
|
15
|
+
* @example
|
|
16
|
+
* ```typescript
|
|
17
|
+
* const provider = new AzureOpenAIEmbeddingProvider({
|
|
18
|
+
* endpoint: 'https://your-resource.openai.azure.com',
|
|
19
|
+
* apiKey: process.env.AZURE_OPENAI_API_KEY,
|
|
20
|
+
* deploymentName: 'text-embedding-ada-002',
|
|
21
|
+
* apiVersion: '2024-02-15-preview',
|
|
22
|
+
* });
|
|
23
|
+
*
|
|
24
|
+
* const embedding = await provider.embed('Hello, world!');
|
|
25
|
+
* ```
|
|
26
|
+
*/
|
|
27
|
+
export class AzureOpenAIEmbeddingProvider extends BaseEmbeddingProvider {
|
|
28
|
+
name = 'azure-openai';
|
|
29
|
+
endpoint;
|
|
30
|
+
apiKey;
|
|
31
|
+
deploymentName;
|
|
32
|
+
apiVersion;
|
|
33
|
+
_dimensions;
|
|
34
|
+
get dimensions() {
|
|
35
|
+
return this._dimensions;
|
|
36
|
+
}
|
|
37
|
+
constructor(config = {}) {
|
|
38
|
+
super(config);
|
|
39
|
+
this.endpoint = config.endpoint ?? process.env.AZURE_OPENAI_ENDPOINT ?? '';
|
|
40
|
+
this.apiKey = config.apiKey ?? process.env.AZURE_OPENAI_API_KEY ?? '';
|
|
41
|
+
this.deploymentName =
|
|
42
|
+
config.deploymentName ?? process.env.AZURE_OPENAI_EMBEDDING_DEPLOYMENT ?? '';
|
|
43
|
+
this.apiVersion = config.apiVersion ?? '2024-02-15-preview';
|
|
44
|
+
// デフォルト次元数
|
|
45
|
+
this._dimensions = config.dimensions ?? 1536;
|
|
46
|
+
// 設定検証
|
|
47
|
+
if (!this.endpoint) {
|
|
48
|
+
throw new Error('Azure OpenAI endpoint is required. Set AZURE_OPENAI_ENDPOINT or provide endpoint in config.');
|
|
49
|
+
}
|
|
50
|
+
if (!this.apiKey) {
|
|
51
|
+
throw new Error('Azure OpenAI API key is required. Set AZURE_OPENAI_API_KEY or provide apiKey in config.');
|
|
52
|
+
}
|
|
53
|
+
if (!this.deploymentName) {
|
|
54
|
+
throw new Error('Azure OpenAI deployment name is required. Set AZURE_OPENAI_EMBEDDING_DEPLOYMENT or provide deploymentName in config.');
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* 単一テキストの埋め込み生成
|
|
59
|
+
*/
|
|
60
|
+
async embed(text) {
|
|
61
|
+
const embeddings = await this.embedBatchInternal([text]);
|
|
62
|
+
const result = embeddings[0];
|
|
63
|
+
if (!result) {
|
|
64
|
+
throw new Error('Failed to generate embedding');
|
|
65
|
+
}
|
|
66
|
+
return result;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* バッチ埋め込み生成(Azure OpenAI固有実装)
|
|
70
|
+
*/
|
|
71
|
+
async embedBatchInternal(texts) {
|
|
72
|
+
return this.withRetry(async () => {
|
|
73
|
+
// エンドポイントの正規化
|
|
74
|
+
const baseUrl = this.endpoint.endsWith('/')
|
|
75
|
+
? this.endpoint.slice(0, -1)
|
|
76
|
+
: this.endpoint;
|
|
77
|
+
const url = `${baseUrl}/openai/deployments/${this.deploymentName}/embeddings?api-version=${this.apiVersion}`;
|
|
78
|
+
const headers = {
|
|
79
|
+
'Content-Type': 'application/json',
|
|
80
|
+
'api-key': this.apiKey,
|
|
81
|
+
};
|
|
82
|
+
const body = {
|
|
83
|
+
input: texts,
|
|
84
|
+
};
|
|
85
|
+
// 次元数指定(embedding-3系のみ)
|
|
86
|
+
if (this._dimensions && this.deploymentName.includes('embedding-3')) {
|
|
87
|
+
body.dimensions = this._dimensions;
|
|
88
|
+
}
|
|
89
|
+
const response = await this.fetchWithTimeout(url, {
|
|
90
|
+
method: 'POST',
|
|
91
|
+
headers,
|
|
92
|
+
body: JSON.stringify(body),
|
|
93
|
+
}, this.config.timeout);
|
|
94
|
+
const data = (await response.json());
|
|
95
|
+
// インデックス順にソート
|
|
96
|
+
const sortedData = [...data.data].sort((a, b) => a.index - b.index);
|
|
97
|
+
// 実際の次元数を更新
|
|
98
|
+
const firstItem = sortedData[0];
|
|
99
|
+
if (firstItem) {
|
|
100
|
+
this._dimensions = firstItem.embedding.length;
|
|
101
|
+
}
|
|
102
|
+
return sortedData.map((d) => d.embedding);
|
|
103
|
+
});
|
|
104
|
+
}
|
|
105
|
+
/**
|
|
106
|
+
* デプロイメント一覧取得
|
|
107
|
+
*/
|
|
108
|
+
async listDeployments() {
|
|
109
|
+
const baseUrl = this.endpoint.endsWith('/')
|
|
110
|
+
? this.endpoint.slice(0, -1)
|
|
111
|
+
: this.endpoint;
|
|
112
|
+
const url = `${baseUrl}/openai/deployments?api-version=${this.apiVersion}`;
|
|
113
|
+
const headers = {
|
|
114
|
+
'api-key': this.apiKey,
|
|
115
|
+
};
|
|
116
|
+
try {
|
|
117
|
+
const response = await this.fetchWithTimeout(url, {
|
|
118
|
+
method: 'GET',
|
|
119
|
+
headers,
|
|
120
|
+
});
|
|
121
|
+
const data = (await response.json());
|
|
122
|
+
// embeddingモデルをフィルタ
|
|
123
|
+
return data.data
|
|
124
|
+
.filter((d) => d.model.includes('embedding') && d.status === 'succeeded')
|
|
125
|
+
.map((d) => d.id);
|
|
126
|
+
}
|
|
127
|
+
catch {
|
|
128
|
+
// Azure Management API権限がない場合はエラー
|
|
129
|
+
return [];
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
//# sourceMappingURL=AzureOpenAIEmbeddingProvider.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AzureOpenAIEmbeddingProvider.js","sourceRoot":"","sources":["../../src/embedding/AzureOpenAIEmbeddingProvider.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EAAE,qBAAqB,EAAE,MAAM,4BAA4B,CAAC;AAkCnE;;;;;;;;;;;;;;;;GAgBG;AACH,MAAM,OAAO,4BAA6B,SAAQ,qBAAqB;IAC5D,IAAI,GAAG,cAAc,CAAC;IAEd,QAAQ,CAAS;IACjB,MAAM,CAAS;IACf,cAAc,CAAS;IACvB,UAAU,CAAS;IAC5B,WAAW,CAAS;IAE5B,IAAI,UAAU;QACZ,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;IAED,YAAY,SAAqC,EAAE;QACjD,KAAK,CAAC,MAAM,CAAC,CAAC;QAEd,IAAI,CAAC,QAAQ,GAAG,MAAM,CAAC,QAAQ,IAAI,OAAO,CAAC,GAAG,CAAC,qBAAqB,IAAI,EAAE,CAAC;QAC3E,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,oBAAoB,IAAI,EAAE,CAAC;QACtE,IAAI,CAAC,cAAc;YACjB,MAAM,CAAC,cAAc,IAAI,OAAO,CAAC,GAAG,CAAC,iCAAiC,IAAI,EAAE,CAAC;QAC/E,IAAI,CAAC,UAAU,GAAG,MAAM,CAAC,UAAU,IAAI,oBAAoB,CAAC;QAE5D,WAAW;QACX,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,UAAU,IAAI,IAAI,CAAC;QAE7C,OAAO;QACP,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACnB,MAAM,IAAI,KAAK,CACb,6FAA6F,CAC9F,CAAC;QACJ,CAAC;QACD,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CACb,yFAAyF,CAC1F,CAAC;QACJ,CAAC;QACD,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,CAAC;YACzB,MAAM,IAAI,KAAK,CACb,sHAAsH,CACvH,CAAC;QACJ,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,KAAK,CAAC,IAAY;QACtB,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,kBAAkB,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC;QACzD,MAAM,MAAM,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;QAC7B,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,8BAA8B,CAAC,CAAC;QAClD,CAAC;QACD,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACgB,KAAK,CAAC,kBAAkB,CACzC,KAAe;QAEf,OAAO,IAAI,CAAC,SAAS,CAAC,KAAK,IAAI,EAAE;YAC/B,cAAc;YACd,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC;gBACzC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;gBAC5B,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC;YAElB,MAAM,GAAG,GAAG,GAAG,OAAO,uBAAuB,IAAI,CAAC,cAAc,2BAA2B,IAAI,CAAC,UAAU,EAAE,CAAC;YAE7G,MAAM,OAAO,GAA2B;gBACtC,cAAc,EAAE,kBAAkB;gBAClC,SAAS,EAAE,IAAI,CAAC,MAAM;aACvB,CAAC;YAEF,MAAM,IAAI,GAA4B;gBACpC,KAAK,EAAE,KAAK;aACb,CAAC;YAEF,wBAAwB;YACxB,IAAI,IAAI,CAAC,WAAW,IAAI,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,aAAa,CAAC,EAAE,CAAC;gBACpE,IAAI,CAAC,UAAU,GAAG,IAAI,CAAC,WAAW,CAAC;YACrC,CAAC;YAED,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAC1C,GAAG,EACH;gBACE,MAAM,EAAE,MAAM;gBACd,OAAO;gBACP,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;aAC3B,EACD,IAAI,CAAC,MAAM,CAAC,OAAO,CACpB,CAAC;YAEF,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAA2B,CAAC;YAE/D,cAAc;YACd,MAAM,UAAU,GAAG,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;YAEpE,YAAY;YACZ,MAAM,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;YAChC,IAAI,SAAS,EAAE,CAAC;gBACd,IAAI,CAAC,WAAW,GAAG,SAAS,CAAC,SAAS,CAAC,MAAM,CAAC;YAChD,CAAC;YAED,OAAO,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;QAC5C,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,eAAe;QACnB,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC;YACzC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;YAC5B,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC;QAElB,MAAM,GAAG,GAAG,GAAG,OAAO,mCAAmC,IAAI,CAAC,UAAU,EAAE,CAAC;QAE3E,MAAM,OAAO,GAA2B;YACtC,SAAS,EAAE,IAAI,CAAC,MAAM;SACvB,CAAC;QAEF,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,GAAG,EAAE;gBAChD,MAAM,EAAE,KAAK;gBACb,OAAO;aACR,CAAC,CAAC;YAUH,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAA6B,CAAC;YAEjE,oBAAoB;YACpB,OAAO,IAAI,CAAC,IAAI;iBACb,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,MAAM,KAAK,WAAW,CAAC;iBACxE,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACtB,CAAC;QAAC,MAAM,CAAC;YACP,kCAAkC;YAClC,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Base Embedding Provider
|
|
3
|
+
*
|
|
4
|
+
* Esperanto-style abstraction for embedding providers
|
|
5
|
+
*
|
|
6
|
+
* @requirement REQ-RAG-001
|
|
7
|
+
* @design DES-KATASHIRO-003-RAG
|
|
8
|
+
*/
|
|
9
|
+
import type { EmbeddingProvider, EmbeddingConfig } from '../types.js';
|
|
10
|
+
/**
|
|
11
|
+
* 抽象Embeddingプロバイダー基底クラス
|
|
12
|
+
*/
|
|
13
|
+
export declare abstract class BaseEmbeddingProvider implements EmbeddingProvider {
|
|
14
|
+
abstract readonly name: string;
|
|
15
|
+
abstract readonly dimensions: number;
|
|
16
|
+
protected config: EmbeddingConfig;
|
|
17
|
+
constructor(config?: Partial<EmbeddingConfig>);
|
|
18
|
+
/**
|
|
19
|
+
* 単一テキストの埋め込み生成
|
|
20
|
+
*/
|
|
21
|
+
abstract embed(text: string): Promise<number[]>;
|
|
22
|
+
/**
|
|
23
|
+
* バッチ埋め込み生成
|
|
24
|
+
*/
|
|
25
|
+
embedBatch(texts: string[]): Promise<number[][]>;
|
|
26
|
+
/**
|
|
27
|
+
* 内部バッチ処理(プロバイダー固有実装)
|
|
28
|
+
*/
|
|
29
|
+
protected embedBatchInternal(texts: string[]): Promise<number[][]>;
|
|
30
|
+
/**
|
|
31
|
+
* HTTPリクエストヘルパー(タイムアウト対応)
|
|
32
|
+
*/
|
|
33
|
+
protected fetchWithTimeout(url: string, options: RequestInit, timeout?: number): Promise<Response>;
|
|
34
|
+
/**
|
|
35
|
+
* リトライ付き実行
|
|
36
|
+
*/
|
|
37
|
+
protected withRetry<T>(operation: () => Promise<T>, maxRetries?: number): Promise<T>;
|
|
38
|
+
/**
|
|
39
|
+
* スリープヘルパー
|
|
40
|
+
*/
|
|
41
|
+
protected sleep(ms: number): Promise<void>;
|
|
42
|
+
}
|
|
43
|
+
//# sourceMappingURL=BaseEmbeddingProvider.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"BaseEmbeddingProvider.d.ts","sourceRoot":"","sources":["../../src/embedding/BaseEmbeddingProvider.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAYtE;;GAEG;AACH,8BAAsB,qBAAsB,YAAW,iBAAiB;IACtE,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAErC,SAAS,CAAC,MAAM,EAAE,eAAe,CAAC;gBAEtB,MAAM,GAAE,OAAO,CAAC,eAAe,CAAM;IAOjD;;OAEG;IACH,QAAQ,CAAC,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAE/C;;OAEG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAatD;;OAEG;cACa,kBAAkB,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAKxE;;OAEG;cACa,gBAAgB,CAC9B,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,WAAW,EACpB,OAAO,CAAC,EAAE,MAAM,GACf,OAAO,CAAC,QAAQ,CAAC;IA0BpB;;OAEG;cACa,SAAS,CAAC,CAAC,EACzB,SAAS,EAAE,MAAM,OAAO,CAAC,CAAC,CAAC,EAC3B,UAAU,CAAC,EAAE,MAAM,GAClB,OAAO,CAAC,CAAC,CAAC;IAqBb;;OAEG;IACH,SAAS,CAAC,KAAK,CAAC,EAAE,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;CAG3C"}
|