@objectstack/embedder-openai 6.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +22 -0
- package/CHANGELOG.md +45 -0
- package/LICENSE +202 -0
- package/README.md +121 -0
- package/dist/index.d.mts +132 -0
- package/dist/index.d.ts +132 -0
- package/dist/index.js +124 -0
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +97 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +40 -0
- package/src/__tests__/openai-embedder.test.ts +166 -0
- package/src/index.ts +222 -0
- package/tsconfig.json +20 -0
- package/vitest.config.ts +17 -0
package/src/index.ts
ADDED
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
// Copyright (c) 2026 ObjectStack. Licensed under the Apache-2.0 license.
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* `@objectstack/embedder-openai`
|
|
5
|
+
*
|
|
6
|
+
* OpenAI-compatible embedder. Drop-in for any endpoint that speaks the
|
|
7
|
+
* `POST /v1/embeddings` shape:
|
|
8
|
+
*
|
|
9
|
+
* - OpenAI https://api.openai.com/v1
|
|
10
|
+
* - Azure OpenAI https://{resource}.openai.azure.com/openai/deployments/{deployment}
|
|
11
|
+
* - 阿里通义 DashScope https://dashscope.aliyuncs.com/compatible-mode/v1
|
|
12
|
+
* - 智谱 BigModel https://open.bigmodel.cn/api/paas/v4
|
|
13
|
+
* - 硅基流动 SiliconFlow https://api.siliconflow.cn/v1
|
|
14
|
+
* - 火山引擎 Doubao https://ark.cn-beijing.volces.com/api/v3
|
|
15
|
+
* - MiniMax https://api.minimax.chat/v1
|
|
16
|
+
* - Ollama (openai shim) http://localhost:11434/v1
|
|
17
|
+
* - LiteLLM / vLLM / 任何兼容服务
|
|
18
|
+
*
|
|
19
|
+
* Implements the `IEmbedder` contract from `@objectstack/spec/contracts`.
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
import type { IEmbedder } from '@objectstack/spec/contracts';
|
|
23
|
+
|
|
24
|
+
export interface OpenAIEmbedderOptions {
|
|
25
|
+
/** Bearer token sent as `Authorization: Bearer <apiKey>`. Required. */
|
|
26
|
+
apiKey: string;
|
|
27
|
+
/**
|
|
28
|
+
* Model id sent in the request body. Choose to match your provider:
|
|
29
|
+
* - OpenAI: `'text-embedding-3-small'` (default), `'text-embedding-3-large'`
|
|
30
|
+
* - 阿里通义: `'text-embedding-v3'`
|
|
31
|
+
* - 智谱: `'embedding-3'`
|
|
32
|
+
* - 硅基流动: `'BAAI/bge-m3'`, `'BAAI/bge-large-zh-v1.5'`
|
|
33
|
+
* - 火山 Doubao: `'doubao-embedding-large-text-240915'`
|
|
34
|
+
* - Ollama: `'bge-m3'`, `'nomic-embed-text'`
|
|
35
|
+
*
|
|
36
|
+
* @default 'text-embedding-3-small'
|
|
37
|
+
*/
|
|
38
|
+
model?: string;
|
|
39
|
+
/**
|
|
40
|
+
* Override dimensions. Only Matryoshka-style models (OpenAI v3, 智谱 embedding-3,
|
|
41
|
+
* BGE-m3 dense) support truncation. When set, also forwarded to the upstream
|
|
42
|
+
* `dimensions` body field for providers that honour it.
|
|
43
|
+
*/
|
|
44
|
+
dimensions?: number;
|
|
45
|
+
/**
|
|
46
|
+
* Endpoint base URL (without `/embeddings`). Defaults to OpenAI's. Set this
|
|
47
|
+
* to point at any compatible provider.
|
|
48
|
+
*
|
|
49
|
+
* @default 'https://api.openai.com/v1'
|
|
50
|
+
*/
|
|
51
|
+
baseUrl?: string;
|
|
52
|
+
/** Stable id surfaced as `IEmbedder.id`. @default 'openai' */
|
|
53
|
+
id?: string;
|
|
54
|
+
/** Inject for tests. Defaults to `globalThis.fetch`. */
|
|
55
|
+
fetch?: typeof fetch;
|
|
56
|
+
/** Additional headers (e.g. provider-specific keys, tracing). */
|
|
57
|
+
headers?: Record<string, string>;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Known dimensions for popular models. Used as the default when the
|
|
62
|
+
* caller doesn't pass `dimensions` explicitly.
|
|
63
|
+
*/
|
|
64
|
+
const KNOWN_DIMENSIONS: Record<string, number> = {
|
|
65
|
+
// OpenAI
|
|
66
|
+
'text-embedding-3-small': 1536,
|
|
67
|
+
'text-embedding-3-large': 3072,
|
|
68
|
+
'text-embedding-ada-002': 1536,
|
|
69
|
+
// 阿里通义
|
|
70
|
+
'text-embedding-v3': 1024,
|
|
71
|
+
'text-embedding-v2': 1536,
|
|
72
|
+
'text-embedding-v1': 1536,
|
|
73
|
+
// 智谱
|
|
74
|
+
'embedding-3': 2048,
|
|
75
|
+
'embedding-2': 1024,
|
|
76
|
+
// 硅基流动 / BGE 家族
|
|
77
|
+
'BAAI/bge-m3': 1024,
|
|
78
|
+
'BAAI/bge-large-zh-v1.5': 1024,
|
|
79
|
+
'BAAI/bge-large-en-v1.5': 1024,
|
|
80
|
+
'BAAI/bge-base-zh-v1.5': 768,
|
|
81
|
+
'BAAI/bge-small-zh-v1.5': 512,
|
|
82
|
+
'bge-m3': 1024,
|
|
83
|
+
// 火山 Doubao
|
|
84
|
+
'doubao-embedding-large-text-240915': 4096,
|
|
85
|
+
'doubao-embedding-text-240715': 2048,
|
|
86
|
+
// Nomic / Ollama defaults
|
|
87
|
+
'nomic-embed-text': 768,
|
|
88
|
+
// MiniMax
|
|
89
|
+
'embo-01': 1536,
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* `OpenAIEmbedder` — OpenAI-compatible embedder. One instance per
|
|
94
|
+
* upstream provider + model combination. Pass into any knowledge
|
|
95
|
+
* adapter that expects `IEmbedder`.
|
|
96
|
+
*
|
|
97
|
+
* @example
|
|
98
|
+
* // OpenAI
|
|
99
|
+
* new OpenAIEmbedder({ apiKey: process.env.OPENAI_API_KEY! });
|
|
100
|
+
*
|
|
101
|
+
* @example
|
|
102
|
+
* // 阿里通义 DashScope
|
|
103
|
+
* new OpenAIEmbedder({
|
|
104
|
+
* apiKey: process.env.DASHSCOPE_API_KEY!,
|
|
105
|
+
* baseUrl: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
|
|
106
|
+
* model: 'text-embedding-v3',
|
|
107
|
+
* });
|
|
108
|
+
*
|
|
109
|
+
* @example
|
|
110
|
+
* // 硅基流动 SiliconFlow + BGE-m3
|
|
111
|
+
* new OpenAIEmbedder({
|
|
112
|
+
* apiKey: process.env.SILICONFLOW_API_KEY!,
|
|
113
|
+
* baseUrl: 'https://api.siliconflow.cn/v1',
|
|
114
|
+
* model: 'BAAI/bge-m3',
|
|
115
|
+
* });
|
|
116
|
+
*
|
|
117
|
+
* @example
|
|
118
|
+
* // Local Ollama
|
|
119
|
+
* new OpenAIEmbedder({
|
|
120
|
+
* apiKey: 'ollama',
|
|
121
|
+
* baseUrl: 'http://localhost:11434/v1',
|
|
122
|
+
* model: 'bge-m3',
|
|
123
|
+
* });
|
|
124
|
+
*/
|
|
125
|
+
export class OpenAIEmbedder implements IEmbedder {
|
|
126
|
+
readonly id: string;
|
|
127
|
+
readonly dimensions: number;
|
|
128
|
+
private readonly model: string;
|
|
129
|
+
private readonly baseUrl: string;
|
|
130
|
+
private readonly apiKey: string;
|
|
131
|
+
private readonly fetchImpl: typeof fetch;
|
|
132
|
+
private readonly requestedDims?: number;
|
|
133
|
+
private readonly extraHeaders: Record<string, string>;
|
|
134
|
+
|
|
135
|
+
constructor(opts: OpenAIEmbedderOptions) {
|
|
136
|
+
if (!opts.apiKey) throw new Error('OpenAIEmbedder: apiKey required');
|
|
137
|
+
this.apiKey = opts.apiKey;
|
|
138
|
+
this.id = opts.id ?? 'openai';
|
|
139
|
+
this.model = opts.model ?? 'text-embedding-3-small';
|
|
140
|
+
this.baseUrl = (opts.baseUrl ?? 'https://api.openai.com/v1').replace(/\/+$/, '');
|
|
141
|
+
this.fetchImpl = opts.fetch ?? (globalThis.fetch as typeof fetch);
|
|
142
|
+
this.requestedDims = opts.dimensions;
|
|
143
|
+
this.extraHeaders = opts.headers ?? {};
|
|
144
|
+
this.dimensions =
|
|
145
|
+
opts.dimensions ?? KNOWN_DIMENSIONS[this.model] ?? 1536;
|
|
146
|
+
if (!this.fetchImpl) {
|
|
147
|
+
throw new Error(
|
|
148
|
+
'OpenAIEmbedder: no fetch available; pass options.fetch or run on Node 18+ / a fetch-capable runtime',
|
|
149
|
+
);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
async embed(texts: string[]): Promise<number[][]> {
|
|
154
|
+
if (texts.length === 0) return [];
|
|
155
|
+
const body: Record<string, unknown> = { model: this.model, input: texts };
|
|
156
|
+
if (this.requestedDims) body.dimensions = this.requestedDims;
|
|
157
|
+
const res = await this.fetchImpl(`${this.baseUrl}/embeddings`, {
|
|
158
|
+
method: 'POST',
|
|
159
|
+
headers: {
|
|
160
|
+
'content-type': 'application/json',
|
|
161
|
+
authorization: `Bearer ${this.apiKey}`,
|
|
162
|
+
...this.extraHeaders,
|
|
163
|
+
},
|
|
164
|
+
body: JSON.stringify(body),
|
|
165
|
+
});
|
|
166
|
+
if (!res.ok) {
|
|
167
|
+
const text = await res.text().catch(() => '');
|
|
168
|
+
throw new Error(
|
|
169
|
+
`OpenAIEmbedder (${this.baseUrl}) → ${res.status} ${res.statusText}${text ? `: ${text.slice(0, 200)}` : ''}`,
|
|
170
|
+
);
|
|
171
|
+
}
|
|
172
|
+
const json = (await res.json()) as { data?: Array<{ embedding: number[] }> };
|
|
173
|
+
const data = json.data ?? [];
|
|
174
|
+
if (data.length !== texts.length) {
|
|
175
|
+
throw new Error(
|
|
176
|
+
`OpenAIEmbedder: expected ${texts.length} vectors, got ${data.length}`,
|
|
177
|
+
);
|
|
178
|
+
}
|
|
179
|
+
return data.map((d) => d.embedding);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Convenience presets for popular Chinese providers — saves callers
|
|
185
|
+
* from memorising base URLs. Pass through `createXxxEmbedder({...})`.
|
|
186
|
+
*/
|
|
187
|
+
export const OPENAI_COMPATIBLE_PRESETS = {
|
|
188
|
+
openai: 'https://api.openai.com/v1',
|
|
189
|
+
azure: '', // user must provide full deployment URL via baseUrl
|
|
190
|
+
dashscope: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
|
|
191
|
+
zhipu: 'https://open.bigmodel.cn/api/paas/v4',
|
|
192
|
+
siliconflow: 'https://api.siliconflow.cn/v1',
|
|
193
|
+
doubao: 'https://ark.cn-beijing.volces.com/api/v3',
|
|
194
|
+
minimax: 'https://api.minimax.chat/v1',
|
|
195
|
+
ollama: 'http://localhost:11434/v1',
|
|
196
|
+
} as const;
|
|
197
|
+
|
|
198
|
+
export type OpenAICompatiblePreset = keyof typeof OPENAI_COMPATIBLE_PRESETS;
|
|
199
|
+
|
|
200
|
+
export interface PresetEmbedderOptions
|
|
201
|
+
extends Omit<OpenAIEmbedderOptions, 'baseUrl'> {
|
|
202
|
+
/** Pick a known provider; sets `baseUrl` automatically. */
|
|
203
|
+
preset?: OpenAICompatiblePreset;
|
|
204
|
+
/** Explicit override; takes precedence over `preset`. */
|
|
205
|
+
baseUrl?: string;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Helper: pick a provider by preset name. Equivalent to constructing
|
|
210
|
+
* `OpenAIEmbedder` with the matching `baseUrl`.
|
|
211
|
+
*
|
|
212
|
+
* @example
|
|
213
|
+
* createOpenAIEmbedder({ preset: 'dashscope', apiKey, model: 'text-embedding-v3' })
|
|
214
|
+
*/
|
|
215
|
+
export function createOpenAIEmbedder(opts: PresetEmbedderOptions): OpenAIEmbedder {
|
|
216
|
+
const baseUrl =
|
|
217
|
+
opts.baseUrl ??
|
|
218
|
+
(opts.preset ? OPENAI_COMPATIBLE_PRESETS[opts.preset] : undefined);
|
|
219
|
+
return new OpenAIEmbedder({ ...opts, baseUrl, id: opts.id ?? opts.preset ?? 'openai' });
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
export default OpenAIEmbedder;
|
package/tsconfig.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
{
|
|
2
|
+
"extends": "../../../tsconfig.json",
|
|
3
|
+
"compilerOptions": {
|
|
4
|
+
"target": "ES2020",
|
|
5
|
+
"module": "ES2020",
|
|
6
|
+
"moduleResolution": "bundler",
|
|
7
|
+
"declaration": true,
|
|
8
|
+
"outDir": "./dist",
|
|
9
|
+
"strict": true,
|
|
10
|
+
"esModuleInterop": true,
|
|
11
|
+
"skipLibCheck": true,
|
|
12
|
+
"noUnusedLocals": false,
|
|
13
|
+
"noUnusedParameters": false,
|
|
14
|
+
"forceConsistentCasingInFileNames": true,
|
|
15
|
+
"types": ["node"],
|
|
16
|
+
"rootDir": "./src"
|
|
17
|
+
},
|
|
18
|
+
"include": ["src/**/*"],
|
|
19
|
+
"exclude": ["node_modules", "dist"]
|
|
20
|
+
}
|
package/vitest.config.ts
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
// Copyright (c) 2026 ObjectStack. Licensed under the Apache-2.0 license.
|
|
2
|
+
|
|
3
|
+
import { defineConfig } from 'vitest/config';
|
|
4
|
+
import path from 'path';
|
|
5
|
+
|
|
6
|
+
export default defineConfig({
|
|
7
|
+
test: {
|
|
8
|
+
globals: true,
|
|
9
|
+
environment: 'node',
|
|
10
|
+
},
|
|
11
|
+
resolve: {
|
|
12
|
+
alias: {
|
|
13
|
+
'@objectstack/spec/contracts': path.resolve(__dirname, '../../spec/src/contracts/index.ts'),
|
|
14
|
+
'@objectstack/spec': path.resolve(__dirname, '../../spec/src/index.ts'),
|
|
15
|
+
},
|
|
16
|
+
},
|
|
17
|
+
});
|