@nahisaho/katashiro-rag 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/RAGEngine.d.ts +58 -0
- package/dist/RAGEngine.d.ts.map +1 -0
- package/dist/RAGEngine.js +97 -0
- package/dist/RAGEngine.js.map +1 -0
- package/dist/RAGPipeline.d.ts +162 -0
- package/dist/RAGPipeline.d.ts.map +1 -0
- package/dist/RAGPipeline.js +222 -0
- package/dist/RAGPipeline.js.map +1 -0
- package/dist/Retriever.d.ts +49 -0
- package/dist/Retriever.d.ts.map +1 -0
- package/dist/Retriever.js +96 -0
- package/dist/Retriever.js.map +1 -0
- package/dist/chunking/DocumentChunker.d.ts +47 -0
- package/dist/chunking/DocumentChunker.d.ts.map +1 -0
- package/dist/chunking/DocumentChunker.js +171 -0
- package/dist/chunking/DocumentChunker.js.map +1 -0
- package/dist/chunking/index.d.ts +5 -0
- package/dist/chunking/index.d.ts.map +1 -0
- package/dist/chunking/index.js +5 -0
- package/dist/chunking/index.js.map +1 -0
- package/dist/embedding/AzureOpenAIEmbeddingProvider.d.ts +63 -0
- package/dist/embedding/AzureOpenAIEmbeddingProvider.d.ts.map +1 -0
- package/dist/embedding/AzureOpenAIEmbeddingProvider.js +133 -0
- package/dist/embedding/AzureOpenAIEmbeddingProvider.js.map +1 -0
- package/dist/embedding/BaseEmbeddingProvider.d.ts +43 -0
- package/dist/embedding/BaseEmbeddingProvider.d.ts.map +1 -0
- package/dist/embedding/BaseEmbeddingProvider.js +98 -0
- package/dist/embedding/BaseEmbeddingProvider.js.map +1 -0
- package/dist/embedding/EmbeddingFactory.d.ts +75 -0
- package/dist/embedding/EmbeddingFactory.d.ts.map +1 -0
- package/dist/embedding/EmbeddingFactory.js +153 -0
- package/dist/embedding/EmbeddingFactory.js.map +1 -0
- package/dist/embedding/EmbeddingManager.d.ts +41 -0
- package/dist/embedding/EmbeddingManager.d.ts.map +1 -0
- package/dist/embedding/EmbeddingManager.js +93 -0
- package/dist/embedding/EmbeddingManager.js.map +1 -0
- package/dist/embedding/MockEmbeddingProvider.d.ts +54 -0
- package/dist/embedding/MockEmbeddingProvider.d.ts.map +1 -0
- package/dist/embedding/MockEmbeddingProvider.js +91 -0
- package/dist/embedding/MockEmbeddingProvider.js.map +1 -0
- package/dist/embedding/OllamaEmbeddingProvider.d.ts +69 -0
- package/dist/embedding/OllamaEmbeddingProvider.d.ts.map +1 -0
- package/dist/embedding/OllamaEmbeddingProvider.js +136 -0
- package/dist/embedding/OllamaEmbeddingProvider.js.map +1 -0
- package/dist/embedding/OpenAIEmbeddingProvider.d.ts +83 -0
- package/dist/embedding/OpenAIEmbeddingProvider.d.ts.map +1 -0
- package/dist/embedding/OpenAIEmbeddingProvider.js +150 -0
- package/dist/embedding/OpenAIEmbeddingProvider.js.map +1 -0
- package/dist/embedding/index.d.ts +16 -0
- package/dist/embedding/index.d.ts.map +1 -0
- package/dist/embedding/index.js +15 -0
- package/dist/embedding/index.js.map +1 -0
- package/dist/index.d.ts +20 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +22 -0
- package/dist/index.js.map +1 -0
- package/dist/reranking/LLMReranker.d.ts +147 -0
- package/dist/reranking/LLMReranker.d.ts.map +1 -0
- package/dist/reranking/LLMReranker.js +262 -0
- package/dist/reranking/LLMReranker.js.map +1 -0
- package/dist/reranking/index.d.ts +7 -0
- package/dist/reranking/index.d.ts.map +1 -0
- package/dist/reranking/index.js +7 -0
- package/dist/reranking/index.js.map +1 -0
- package/dist/types.d.ts +144 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +8 -0
- package/dist/types.js.map +1 -0
- package/dist/vectordb/FileVectorStore.d.ts +93 -0
- package/dist/vectordb/FileVectorStore.d.ts.map +1 -0
- package/dist/vectordb/FileVectorStore.js +218 -0
- package/dist/vectordb/FileVectorStore.js.map +1 -0
- package/dist/vectordb/InMemoryVectorStore.d.ts +48 -0
- package/dist/vectordb/InMemoryVectorStore.d.ts.map +1 -0
- package/dist/vectordb/InMemoryVectorStore.js +86 -0
- package/dist/vectordb/InMemoryVectorStore.js.map +1 -0
- package/dist/vectordb/index.d.ts +8 -0
- package/dist/vectordb/index.d.ts.map +1 -0
- package/dist/vectordb/index.js +6 -0
- package/dist/vectordb/index.js.map +1 -0
- package/package.json +37 -0
- package/src/RAGEngine.ts +127 -0
- package/src/RAGPipeline.ts +357 -0
- package/src/Retriever.ts +121 -0
- package/src/chunking/DocumentChunker.ts +207 -0
- package/src/chunking/index.ts +5 -0
- package/src/embedding/AzureOpenAIEmbeddingProvider.ts +208 -0
- package/src/embedding/BaseEmbeddingProvider.ts +133 -0
- package/src/embedding/EmbeddingFactory.ts +225 -0
- package/src/embedding/EmbeddingManager.ts +110 -0
- package/src/embedding/MockEmbeddingProvider.ts +123 -0
- package/src/embedding/OllamaEmbeddingProvider.ts +197 -0
- package/src/embedding/OpenAIEmbeddingProvider.ts +226 -0
- package/src/embedding/index.ts +33 -0
- package/src/index.ts +55 -0
- package/src/reranking/LLMReranker.ts +401 -0
- package/src/reranking/index.ts +15 -0
- package/src/types.ts +157 -0
- package/src/vectordb/FileVectorStore.ts +289 -0
- package/src/vectordb/InMemoryVectorStore.ts +121 -0
- package/src/vectordb/index.ts +9 -0
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Azure OpenAI Embedding Provider
|
|
3
|
+
*
|
|
4
|
+
* Azure OpenAI Service embedding provider
|
|
5
|
+
*
|
|
6
|
+
* @requirement REQ-RAG-001
|
|
7
|
+
* @design DES-KATASHIRO-003-RAG
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { BaseEmbeddingProvider } from './BaseEmbeddingProvider.js';
|
|
11
|
+
import type { EmbeddingConfig } from '../types.js';
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Azure OpenAI Embedding設定
|
|
15
|
+
*/
|
|
16
|
+
export interface AzureOpenAIEmbeddingConfig extends EmbeddingConfig {
|
|
17
|
+
/** Azure OpenAI エンドポイント */
|
|
18
|
+
endpoint?: string;
|
|
19
|
+
/** APIキー */
|
|
20
|
+
apiKey?: string;
|
|
21
|
+
/** デプロイメント名 */
|
|
22
|
+
deploymentName?: string;
|
|
23
|
+
/** APIバージョン */
|
|
24
|
+
apiVersion?: string;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Azure OpenAI Embedding APIレスポンス
|
|
29
|
+
*/
|
|
30
|
+
interface AzureEmbeddingResponse {
|
|
31
|
+
object: 'list';
|
|
32
|
+
data: Array<{
|
|
33
|
+
object: 'embedding';
|
|
34
|
+
embedding: number[];
|
|
35
|
+
index: number;
|
|
36
|
+
}>;
|
|
37
|
+
model: string;
|
|
38
|
+
usage: {
|
|
39
|
+
prompt_tokens: number;
|
|
40
|
+
total_tokens: number;
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Azure OpenAI Embeddingプロバイダー
|
|
46
|
+
*
|
|
47
|
+
* Azure OpenAI Serviceを使用した埋め込み生成
|
|
48
|
+
*
|
|
49
|
+
* @example
|
|
50
|
+
* ```typescript
|
|
51
|
+
* const provider = new AzureOpenAIEmbeddingProvider({
|
|
52
|
+
* endpoint: 'https://your-resource.openai.azure.com',
|
|
53
|
+
* apiKey: process.env.AZURE_OPENAI_API_KEY,
|
|
54
|
+
* deploymentName: 'text-embedding-ada-002',
|
|
55
|
+
* apiVersion: '2024-02-15-preview',
|
|
56
|
+
* });
|
|
57
|
+
*
|
|
58
|
+
* const embedding = await provider.embed('Hello, world!');
|
|
59
|
+
* ```
|
|
60
|
+
*/
|
|
61
|
+
export class AzureOpenAIEmbeddingProvider extends BaseEmbeddingProvider {
|
|
62
|
+
readonly name = 'azure-openai';
|
|
63
|
+
|
|
64
|
+
private readonly endpoint: string;
|
|
65
|
+
private readonly apiKey: string;
|
|
66
|
+
private readonly deploymentName: string;
|
|
67
|
+
private readonly apiVersion: string;
|
|
68
|
+
private _dimensions: number;
|
|
69
|
+
|
|
70
|
+
get dimensions(): number {
|
|
71
|
+
return this._dimensions;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
constructor(config: AzureOpenAIEmbeddingConfig = {}) {
|
|
75
|
+
super(config);
|
|
76
|
+
|
|
77
|
+
this.endpoint = config.endpoint ?? process.env.AZURE_OPENAI_ENDPOINT ?? '';
|
|
78
|
+
this.apiKey = config.apiKey ?? process.env.AZURE_OPENAI_API_KEY ?? '';
|
|
79
|
+
this.deploymentName =
|
|
80
|
+
config.deploymentName ?? process.env.AZURE_OPENAI_EMBEDDING_DEPLOYMENT ?? '';
|
|
81
|
+
this.apiVersion = config.apiVersion ?? '2024-02-15-preview';
|
|
82
|
+
|
|
83
|
+
// デフォルト次元数
|
|
84
|
+
this._dimensions = config.dimensions ?? 1536;
|
|
85
|
+
|
|
86
|
+
// 設定検証
|
|
87
|
+
if (!this.endpoint) {
|
|
88
|
+
throw new Error(
|
|
89
|
+
'Azure OpenAI endpoint is required. Set AZURE_OPENAI_ENDPOINT or provide endpoint in config.'
|
|
90
|
+
);
|
|
91
|
+
}
|
|
92
|
+
if (!this.apiKey) {
|
|
93
|
+
throw new Error(
|
|
94
|
+
'Azure OpenAI API key is required. Set AZURE_OPENAI_API_KEY or provide apiKey in config.'
|
|
95
|
+
);
|
|
96
|
+
}
|
|
97
|
+
if (!this.deploymentName) {
|
|
98
|
+
throw new Error(
|
|
99
|
+
'Azure OpenAI deployment name is required. Set AZURE_OPENAI_EMBEDDING_DEPLOYMENT or provide deploymentName in config.'
|
|
100
|
+
);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* 単一テキストの埋め込み生成
|
|
106
|
+
*/
|
|
107
|
+
async embed(text: string): Promise<number[]> {
|
|
108
|
+
const embeddings = await this.embedBatchInternal([text]);
|
|
109
|
+
const result = embeddings[0];
|
|
110
|
+
if (!result) {
|
|
111
|
+
throw new Error('Failed to generate embedding');
|
|
112
|
+
}
|
|
113
|
+
return result;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/**
|
|
117
|
+
* バッチ埋め込み生成(Azure OpenAI固有実装)
|
|
118
|
+
*/
|
|
119
|
+
protected override async embedBatchInternal(
|
|
120
|
+
texts: string[]
|
|
121
|
+
): Promise<number[][]> {
|
|
122
|
+
return this.withRetry(async () => {
|
|
123
|
+
// エンドポイントの正規化
|
|
124
|
+
const baseUrl = this.endpoint.endsWith('/')
|
|
125
|
+
? this.endpoint.slice(0, -1)
|
|
126
|
+
: this.endpoint;
|
|
127
|
+
|
|
128
|
+
const url = `${baseUrl}/openai/deployments/${this.deploymentName}/embeddings?api-version=${this.apiVersion}`;
|
|
129
|
+
|
|
130
|
+
const headers: Record<string, string> = {
|
|
131
|
+
'Content-Type': 'application/json',
|
|
132
|
+
'api-key': this.apiKey,
|
|
133
|
+
};
|
|
134
|
+
|
|
135
|
+
const body: Record<string, unknown> = {
|
|
136
|
+
input: texts,
|
|
137
|
+
};
|
|
138
|
+
|
|
139
|
+
// 次元数指定(embedding-3系のみ)
|
|
140
|
+
if (this._dimensions && this.deploymentName.includes('embedding-3')) {
|
|
141
|
+
body.dimensions = this._dimensions;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
const response = await this.fetchWithTimeout(
|
|
145
|
+
url,
|
|
146
|
+
{
|
|
147
|
+
method: 'POST',
|
|
148
|
+
headers,
|
|
149
|
+
body: JSON.stringify(body),
|
|
150
|
+
},
|
|
151
|
+
this.config.timeout
|
|
152
|
+
);
|
|
153
|
+
|
|
154
|
+
const data = (await response.json()) as AzureEmbeddingResponse;
|
|
155
|
+
|
|
156
|
+
// インデックス順にソート
|
|
157
|
+
const sortedData = [...data.data].sort((a, b) => a.index - b.index);
|
|
158
|
+
|
|
159
|
+
// 実際の次元数を更新
|
|
160
|
+
const firstItem = sortedData[0];
|
|
161
|
+
if (firstItem) {
|
|
162
|
+
this._dimensions = firstItem.embedding.length;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
return sortedData.map((d) => d.embedding);
|
|
166
|
+
});
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* デプロイメント一覧取得
|
|
171
|
+
*/
|
|
172
|
+
async listDeployments(): Promise<string[]> {
|
|
173
|
+
const baseUrl = this.endpoint.endsWith('/')
|
|
174
|
+
? this.endpoint.slice(0, -1)
|
|
175
|
+
: this.endpoint;
|
|
176
|
+
|
|
177
|
+
const url = `${baseUrl}/openai/deployments?api-version=${this.apiVersion}`;
|
|
178
|
+
|
|
179
|
+
const headers: Record<string, string> = {
|
|
180
|
+
'api-key': this.apiKey,
|
|
181
|
+
};
|
|
182
|
+
|
|
183
|
+
try {
|
|
184
|
+
const response = await this.fetchWithTimeout(url, {
|
|
185
|
+
method: 'GET',
|
|
186
|
+
headers,
|
|
187
|
+
});
|
|
188
|
+
|
|
189
|
+
interface AzureDeploymentsResponse {
|
|
190
|
+
data: Array<{
|
|
191
|
+
id: string;
|
|
192
|
+
model: string;
|
|
193
|
+
status: string;
|
|
194
|
+
}>;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
const data = (await response.json()) as AzureDeploymentsResponse;
|
|
198
|
+
|
|
199
|
+
// embeddingモデルをフィルタ
|
|
200
|
+
return data.data
|
|
201
|
+
.filter((d) => d.model.includes('embedding') && d.status === 'succeeded')
|
|
202
|
+
.map((d) => d.id);
|
|
203
|
+
} catch {
|
|
204
|
+
// Azure Management API権限がない場合はエラー
|
|
205
|
+
return [];
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Base Embedding Provider
|
|
3
|
+
*
|
|
4
|
+
* Esperanto-style abstraction for embedding providers
|
|
5
|
+
*
|
|
6
|
+
* @requirement REQ-RAG-001
|
|
7
|
+
* @design DES-KATASHIRO-003-RAG
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import type { EmbeddingProvider, EmbeddingConfig } from '../types.js';
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* デフォルト設定
|
|
14
|
+
*/
|
|
15
|
+
const DEFAULT_CONFIG: EmbeddingConfig = {
|
|
16
|
+
batchSize: 100,
|
|
17
|
+
dimensions: 1536,
|
|
18
|
+
timeout: 60000,
|
|
19
|
+
maxRetries: 3,
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* 抽象Embeddingプロバイダー基底クラス
|
|
24
|
+
*/
|
|
25
|
+
export abstract class BaseEmbeddingProvider implements EmbeddingProvider {
|
|
26
|
+
abstract readonly name: string;
|
|
27
|
+
abstract readonly dimensions: number;
|
|
28
|
+
|
|
29
|
+
protected config: EmbeddingConfig;
|
|
30
|
+
|
|
31
|
+
constructor(config: Partial<EmbeddingConfig> = {}) {
|
|
32
|
+
this.config = {
|
|
33
|
+
...DEFAULT_CONFIG,
|
|
34
|
+
...config,
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* 単一テキストの埋め込み生成
|
|
40
|
+
*/
|
|
41
|
+
abstract embed(text: string): Promise<number[]>;
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* バッチ埋め込み生成
|
|
45
|
+
*/
|
|
46
|
+
async embedBatch(texts: string[]): Promise<number[][]> {
|
|
47
|
+
const batchSize = this.config.batchSize ?? 100;
|
|
48
|
+
const results: number[][] = [];
|
|
49
|
+
|
|
50
|
+
for (let i = 0; i < texts.length; i += batchSize) {
|
|
51
|
+
const batch = texts.slice(i, i + batchSize);
|
|
52
|
+
const embeddings = await this.embedBatchInternal(batch);
|
|
53
|
+
results.push(...embeddings);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
return results;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/**
|
|
60
|
+
* 内部バッチ処理(プロバイダー固有実装)
|
|
61
|
+
*/
|
|
62
|
+
protected async embedBatchInternal(texts: string[]): Promise<number[][]> {
|
|
63
|
+
// デフォルト: 個別に処理
|
|
64
|
+
return Promise.all(texts.map((text) => this.embed(text)));
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* HTTPリクエストヘルパー(タイムアウト対応)
|
|
69
|
+
*/
|
|
70
|
+
protected async fetchWithTimeout(
|
|
71
|
+
url: string,
|
|
72
|
+
options: RequestInit,
|
|
73
|
+
timeout?: number
|
|
74
|
+
): Promise<Response> {
|
|
75
|
+
const controller = new AbortController();
|
|
76
|
+
const timeoutId = setTimeout(
|
|
77
|
+
() => controller.abort(),
|
|
78
|
+
timeout ?? this.config.timeout
|
|
79
|
+
);
|
|
80
|
+
|
|
81
|
+
try {
|
|
82
|
+
const response = await fetch(url, {
|
|
83
|
+
...options,
|
|
84
|
+
signal: controller.signal,
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
if (!response.ok) {
|
|
88
|
+
const errorText = await response.text();
|
|
89
|
+
throw new Error(
|
|
90
|
+
`HTTP ${response.status}: ${response.statusText} - ${errorText}`
|
|
91
|
+
);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
return response;
|
|
95
|
+
} finally {
|
|
96
|
+
clearTimeout(timeoutId);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/**
|
|
101
|
+
* リトライ付き実行
|
|
102
|
+
*/
|
|
103
|
+
protected async withRetry<T>(
|
|
104
|
+
operation: () => Promise<T>,
|
|
105
|
+
maxRetries?: number
|
|
106
|
+
): Promise<T> {
|
|
107
|
+
const retries = maxRetries ?? this.config.maxRetries ?? 3;
|
|
108
|
+
let lastError: Error | undefined;
|
|
109
|
+
|
|
110
|
+
for (let attempt = 0; attempt <= retries; attempt++) {
|
|
111
|
+
try {
|
|
112
|
+
return await operation();
|
|
113
|
+
} catch (error) {
|
|
114
|
+
lastError = error instanceof Error ? error : new Error(String(error));
|
|
115
|
+
|
|
116
|
+
if (attempt < retries) {
|
|
117
|
+
// Exponential backoff
|
|
118
|
+
const delay = Math.min(1000 * Math.pow(2, attempt), 10000);
|
|
119
|
+
await this.sleep(delay);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
throw lastError;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* スリープヘルパー
|
|
129
|
+
*/
|
|
130
|
+
protected sleep(ms: number): Promise<void> {
|
|
131
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
132
|
+
}
|
|
133
|
+
}
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding Provider Factory
|
|
3
|
+
*
|
|
4
|
+
* Esperanto-style factory for creating embedding providers
|
|
5
|
+
*
|
|
6
|
+
* @requirement REQ-RAG-001
|
|
7
|
+
* @design DES-KATASHIRO-003-RAG
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import type { EmbeddingProvider, EmbeddingConfig } from '../types.js';
|
|
11
|
+
import { MockEmbeddingProvider } from './MockEmbeddingProvider.js';
|
|
12
|
+
import type { MockEmbeddingProviderConfig } from './MockEmbeddingProvider.js';
|
|
13
|
+
import { OllamaEmbeddingProvider } from './OllamaEmbeddingProvider.js';
|
|
14
|
+
import type { OllamaEmbeddingConfig } from './OllamaEmbeddingProvider.js';
|
|
15
|
+
import { OpenAIEmbeddingProvider } from './OpenAIEmbeddingProvider.js';
|
|
16
|
+
import type { OpenAIEmbeddingConfig } from './OpenAIEmbeddingProvider.js';
|
|
17
|
+
import { AzureOpenAIEmbeddingProvider } from './AzureOpenAIEmbeddingProvider.js';
|
|
18
|
+
import type { AzureOpenAIEmbeddingConfig } from './AzureOpenAIEmbeddingProvider.js';
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* プロバイダータイプ
|
|
22
|
+
*/
|
|
23
|
+
export type EmbeddingProviderType =
|
|
24
|
+
| 'mock'
|
|
25
|
+
| 'ollama'
|
|
26
|
+
| 'openai'
|
|
27
|
+
| 'openai-compatible'
|
|
28
|
+
| 'azure-openai';
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* プロバイダー設定マップ
|
|
32
|
+
*/
|
|
33
|
+
export interface EmbeddingProviderConfigMap {
|
|
34
|
+
mock: EmbeddingConfig;
|
|
35
|
+
ollama: OllamaEmbeddingConfig;
|
|
36
|
+
openai: OpenAIEmbeddingConfig;
|
|
37
|
+
'openai-compatible': OpenAIEmbeddingConfig;
|
|
38
|
+
'azure-openai': AzureOpenAIEmbeddingConfig;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* 環境変数から設定を取得
|
|
43
|
+
*/
|
|
44
|
+
function getConfigFromEnv(
|
|
45
|
+
provider: EmbeddingProviderType
|
|
46
|
+
): Partial<EmbeddingProviderConfigMap[EmbeddingProviderType]> {
|
|
47
|
+
switch (provider) {
|
|
48
|
+
case 'ollama':
|
|
49
|
+
return {
|
|
50
|
+
baseUrl: process.env.OLLAMA_BASE_URL ?? process.env.OLLAMA_HOST,
|
|
51
|
+
model: process.env.OLLAMA_EMBEDDING_MODEL,
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
case 'openai':
|
|
55
|
+
return {
|
|
56
|
+
apiKey: process.env.OPENAI_API_KEY,
|
|
57
|
+
organization: process.env.OPENAI_ORGANIZATION,
|
|
58
|
+
baseUrl: process.env.OPENAI_BASE_URL,
|
|
59
|
+
model: process.env.OPENAI_EMBEDDING_MODEL,
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
case 'openai-compatible':
|
|
63
|
+
return {
|
|
64
|
+
apiKey:
|
|
65
|
+
process.env.OPENAI_COMPATIBLE_API_KEY_EMBEDDING ??
|
|
66
|
+
process.env.OPENAI_COMPATIBLE_API_KEY,
|
|
67
|
+
baseUrl:
|
|
68
|
+
process.env.OPENAI_COMPATIBLE_BASE_URL_EMBEDDING ??
|
|
69
|
+
process.env.OPENAI_COMPATIBLE_BASE_URL,
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
case 'azure-openai':
|
|
73
|
+
return {
|
|
74
|
+
endpoint: process.env.AZURE_OPENAI_ENDPOINT,
|
|
75
|
+
apiKey: process.env.AZURE_OPENAI_API_KEY,
|
|
76
|
+
deploymentName: process.env.AZURE_OPENAI_EMBEDDING_DEPLOYMENT,
|
|
77
|
+
apiVersion: process.env.AZURE_OPENAI_API_VERSION,
|
|
78
|
+
};
|
|
79
|
+
|
|
80
|
+
default:
|
|
81
|
+
return {};
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Embedding Provider Factory
|
|
87
|
+
*
|
|
88
|
+
* Esperantoスタイルのファクトリクラス
|
|
89
|
+
*
|
|
90
|
+
* @example
|
|
91
|
+
* ```typescript
|
|
92
|
+
* // 利用可能なプロバイダーを確認
|
|
93
|
+
* const providers = EmbeddingFactory.getAvailableProviders();
|
|
94
|
+
*
|
|
95
|
+
* // プロバイダーを作成
|
|
96
|
+
* const provider = EmbeddingFactory.create('ollama', {
|
|
97
|
+
* baseUrl: 'http://192.168.224.1:11434',
|
|
98
|
+
* model: 'nomic-embed-text',
|
|
99
|
+
* });
|
|
100
|
+
*
|
|
101
|
+
* // または環境変数から自動設定
|
|
102
|
+
* const provider = EmbeddingFactory.create('openai');
|
|
103
|
+
* ```
|
|
104
|
+
*/
|
|
105
|
+
export class EmbeddingFactory {
|
|
106
|
+
/**
|
|
107
|
+
* プロバイダー作成
|
|
108
|
+
*/
|
|
109
|
+
static create<T extends EmbeddingProviderType>(
|
|
110
|
+
provider: T,
|
|
111
|
+
config?: Partial<EmbeddingProviderConfigMap[T]>
|
|
112
|
+
): EmbeddingProvider {
|
|
113
|
+
// 環境変数からの設定とマージ
|
|
114
|
+
const envConfig = getConfigFromEnv(provider);
|
|
115
|
+
const mergedConfig = { ...envConfig, ...config };
|
|
116
|
+
|
|
117
|
+
switch (provider) {
|
|
118
|
+
case 'mock':
|
|
119
|
+
return new MockEmbeddingProvider({
|
|
120
|
+
dimensions: (mergedConfig as MockEmbeddingProviderConfig).dimensions ?? 1536,
|
|
121
|
+
delay: (mergedConfig as MockEmbeddingProviderConfig).delay ?? 0,
|
|
122
|
+
shouldFail: (mergedConfig as MockEmbeddingProviderConfig).shouldFail ?? false,
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
case 'ollama':
|
|
126
|
+
return new OllamaEmbeddingProvider(
|
|
127
|
+
mergedConfig as OllamaEmbeddingConfig
|
|
128
|
+
);
|
|
129
|
+
|
|
130
|
+
case 'openai':
|
|
131
|
+
case 'openai-compatible':
|
|
132
|
+
return new OpenAIEmbeddingProvider(
|
|
133
|
+
mergedConfig as OpenAIEmbeddingConfig
|
|
134
|
+
);
|
|
135
|
+
|
|
136
|
+
case 'azure-openai':
|
|
137
|
+
return new AzureOpenAIEmbeddingProvider(
|
|
138
|
+
mergedConfig as AzureOpenAIEmbeddingConfig
|
|
139
|
+
);
|
|
140
|
+
|
|
141
|
+
default:
|
|
142
|
+
throw new Error(`Unknown embedding provider: ${provider}`);
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* 利用可能なプロバイダー一覧
|
|
148
|
+
*/
|
|
149
|
+
static getAvailableProviders(): EmbeddingProviderType[] {
|
|
150
|
+
return ['mock', 'ollama', 'openai', 'openai-compatible', 'azure-openai'];
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* デフォルトプロバイダー取得
|
|
155
|
+
*
|
|
156
|
+
* 環境変数から自動判定
|
|
157
|
+
*/
|
|
158
|
+
static getDefaultProvider(): EmbeddingProvider {
|
|
159
|
+
// 優先順位: AZURE > OPENAI > OLLAMA > MOCK
|
|
160
|
+
if (
|
|
161
|
+
process.env.AZURE_OPENAI_ENDPOINT &&
|
|
162
|
+
process.env.AZURE_OPENAI_API_KEY
|
|
163
|
+
) {
|
|
164
|
+
return this.create('azure-openai');
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
if (process.env.OPENAI_API_KEY) {
|
|
168
|
+
return this.create('openai');
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
if (process.env.OLLAMA_BASE_URL || process.env.OLLAMA_HOST) {
|
|
172
|
+
return this.create('ollama');
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// フォールバック: モック
|
|
176
|
+
return this.create('mock');
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* プロバイダーの利用可能確認
|
|
181
|
+
*/
|
|
182
|
+
static isProviderConfigured(provider: EmbeddingProviderType): boolean {
|
|
183
|
+
switch (provider) {
|
|
184
|
+
case 'mock':
|
|
185
|
+
return true;
|
|
186
|
+
|
|
187
|
+
case 'ollama':
|
|
188
|
+
return !!(process.env.OLLAMA_BASE_URL || process.env.OLLAMA_HOST);
|
|
189
|
+
|
|
190
|
+
case 'openai':
|
|
191
|
+
return !!process.env.OPENAI_API_KEY;
|
|
192
|
+
|
|
193
|
+
case 'openai-compatible':
|
|
194
|
+
return !!(
|
|
195
|
+
process.env.OPENAI_COMPATIBLE_BASE_URL_EMBEDDING ||
|
|
196
|
+
process.env.OPENAI_COMPATIBLE_BASE_URL
|
|
197
|
+
);
|
|
198
|
+
|
|
199
|
+
case 'azure-openai':
|
|
200
|
+
return !!(
|
|
201
|
+
process.env.AZURE_OPENAI_ENDPOINT && process.env.AZURE_OPENAI_API_KEY
|
|
202
|
+
);
|
|
203
|
+
|
|
204
|
+
default:
|
|
205
|
+
return false;
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
/**
|
|
211
|
+
* 便利関数: Embeddingプロバイダー作成
|
|
212
|
+
*/
|
|
213
|
+
export function createEmbeddingProvider<T extends EmbeddingProviderType>(
|
|
214
|
+
provider: T,
|
|
215
|
+
config?: Partial<EmbeddingProviderConfigMap[T]>
|
|
216
|
+
): EmbeddingProvider {
|
|
217
|
+
return EmbeddingFactory.create(provider, config);
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
/**
|
|
221
|
+
* 便利関数: デフォルトプロバイダー取得
|
|
222
|
+
*/
|
|
223
|
+
export function getDefaultEmbeddingProvider(): EmbeddingProvider {
|
|
224
|
+
return EmbeddingFactory.getDefaultProvider();
|
|
225
|
+
}
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding Manager
|
|
3
|
+
*
|
|
4
|
+
* @requirement REQ-RAG-001
|
|
5
|
+
* @design DES-KATASHIRO-003-RAG §3.1
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { EmbeddingProvider, Vector } from '../types.js';
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* EmbeddingManager - Embedding生成の管理
|
|
12
|
+
* キャッシュとバッチ処理をサポート
|
|
13
|
+
*/
|
|
14
|
+
export class EmbeddingManager {
|
|
15
|
+
private provider: EmbeddingProvider;
|
|
16
|
+
private cache: Map<string, Vector> = new Map();
|
|
17
|
+
|
|
18
|
+
constructor(provider: EmbeddingProvider) {
|
|
19
|
+
this.provider = provider;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* ベクトル次元数
|
|
24
|
+
*/
|
|
25
|
+
get dimensions(): number {
|
|
26
|
+
return this.provider.dimensions;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* プロバイダー名
|
|
31
|
+
*/
|
|
32
|
+
get providerName(): string {
|
|
33
|
+
return this.provider.name;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* 単一テキストの埋め込み生成
|
|
38
|
+
*/
|
|
39
|
+
async embed(text: string): Promise<Vector> {
|
|
40
|
+
// キャッシュチェック
|
|
41
|
+
const cacheKey = this.getCacheKey(text);
|
|
42
|
+
const cached = this.cache.get(cacheKey);
|
|
43
|
+
if (cached) {
|
|
44
|
+
return cached;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// 生成
|
|
48
|
+
const embedding = await this.provider.embed(text);
|
|
49
|
+
|
|
50
|
+
// キャッシュ保存
|
|
51
|
+
this.cache.set(cacheKey, embedding);
|
|
52
|
+
|
|
53
|
+
return embedding;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* バッチ埋め込み生成
|
|
58
|
+
*/
|
|
59
|
+
async embedBatch(texts: string[]): Promise<Vector[]> {
|
|
60
|
+
// 全部キャッシュチェックして、なければ一括生成
|
|
61
|
+
const results: Vector[] = new Array(texts.length);
|
|
62
|
+
const uncachedTexts: string[] = [];
|
|
63
|
+
const uncachedIndices: number[] = [];
|
|
64
|
+
|
|
65
|
+
for (let i = 0; i < texts.length; i++) {
|
|
66
|
+
const text = texts[i]!;
|
|
67
|
+
const cacheKey = this.getCacheKey(text);
|
|
68
|
+
const cached = this.cache.get(cacheKey);
|
|
69
|
+
if (cached) {
|
|
70
|
+
results[i] = cached;
|
|
71
|
+
} else {
|
|
72
|
+
uncachedTexts.push(text);
|
|
73
|
+
uncachedIndices.push(i);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// 未キャッシュのものを生成
|
|
78
|
+
if (uncachedTexts.length > 0) {
|
|
79
|
+
const newEmbeddings = await this.provider.embedBatch(uncachedTexts);
|
|
80
|
+
|
|
81
|
+
for (let j = 0; j < newEmbeddings.length; j++) {
|
|
82
|
+
const originalIndex = uncachedIndices[j]!;
|
|
83
|
+
const text = uncachedTexts[j]!;
|
|
84
|
+
const embedding = newEmbeddings[j]!;
|
|
85
|
+
|
|
86
|
+
results[originalIndex] = embedding;
|
|
87
|
+
|
|
88
|
+
// キャッシュ保存
|
|
89
|
+
const cacheKey = this.getCacheKey(text);
|
|
90
|
+
this.cache.set(cacheKey, embedding);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
return results;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* キャッシュをクリア
|
|
99
|
+
*/
|
|
100
|
+
clearCache(): void {
|
|
101
|
+
this.cache.clear();
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* キャッシュキーを生成
|
|
106
|
+
*/
|
|
107
|
+
private getCacheKey(text: string): string {
|
|
108
|
+
return `${this.provider.name}:${text}`;
|
|
109
|
+
}
|
|
110
|
+
}
|