@objectstack/embedder-openai 6.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts ADDED
@@ -0,0 +1,222 @@
1
+ // Copyright (c) 2026 ObjectStack. Licensed under the Apache-2.0 license.
2
+
3
+ /**
4
+ * `@objectstack/embedder-openai`
5
+ *
6
+ * OpenAI-compatible embedder. Drop-in for any endpoint that speaks the
7
+ * `POST /v1/embeddings` shape:
8
+ *
9
+ * - OpenAI https://api.openai.com/v1
10
+ * - Azure OpenAI https://{resource}.openai.azure.com/openai/deployments/{deployment}
11
+ * - 阿里通义 DashScope https://dashscope.aliyuncs.com/compatible-mode/v1
12
+ * - 智谱 BigModel https://open.bigmodel.cn/api/paas/v4
13
+ * - 硅基流动 SiliconFlow https://api.siliconflow.cn/v1
14
+ * - 火山引擎 Doubao https://ark.cn-beijing.volces.com/api/v3
15
+ * - MiniMax https://api.minimax.chat/v1
16
+ * - Ollama (openai shim) http://localhost:11434/v1
17
+ * - LiteLLM / vLLM / 任何兼容服务
18
+ *
19
+ * Implements the `IEmbedder` contract from `@objectstack/spec/contracts`.
20
+ */
21
+
22
+ import type { IEmbedder } from '@objectstack/spec/contracts';
23
+
24
+ export interface OpenAIEmbedderOptions {
25
+ /** Bearer token sent as `Authorization: Bearer <apiKey>`. Required. */
26
+ apiKey: string;
27
+ /**
28
+ * Model id sent in the request body. Choose to match your provider:
29
+ * - OpenAI: `'text-embedding-3-small'` (default), `'text-embedding-3-large'`
30
+ * - 阿里通义: `'text-embedding-v3'`
31
+ * - 智谱: `'embedding-3'`
32
+ * - 硅基流动: `'BAAI/bge-m3'`, `'BAAI/bge-large-zh-v1.5'`
33
+ * - 火山 Doubao: `'doubao-embedding-large-text-240915'`
34
+ * - Ollama: `'bge-m3'`, `'nomic-embed-text'`
35
+ *
36
+ * @default 'text-embedding-3-small'
37
+ */
38
+ model?: string;
39
+ /**
40
+ * Override dimensions. Only Matryoshka-style models (OpenAI v3, 智谱 embedding-3,
41
+ * BGE-m3 dense) support truncation. When set, also forwarded to the upstream
42
+ * `dimensions` body field for providers that honour it.
43
+ */
44
+ dimensions?: number;
45
+ /**
46
+ * Endpoint base URL (without `/embeddings`). Defaults to OpenAI's. Set this
47
+ * to point at any compatible provider.
48
+ *
49
+ * @default 'https://api.openai.com/v1'
50
+ */
51
+ baseUrl?: string;
52
+ /** Stable id surfaced as `IEmbedder.id`. @default 'openai' */
53
+ id?: string;
54
+ /** Inject for tests. Defaults to `globalThis.fetch`. */
55
+ fetch?: typeof fetch;
56
+ /** Additional headers (e.g. provider-specific keys, tracing). */
57
+ headers?: Record<string, string>;
58
+ }
59
+
60
+ /**
61
+ * Known dimensions for popular models. Used as the default when the
62
+ * caller doesn't pass `dimensions` explicitly.
63
+ */
64
+ const KNOWN_DIMENSIONS: Record<string, number> = {
65
+ // OpenAI
66
+ 'text-embedding-3-small': 1536,
67
+ 'text-embedding-3-large': 3072,
68
+ 'text-embedding-ada-002': 1536,
69
+ // 阿里通义
70
+ 'text-embedding-v3': 1024,
71
+ 'text-embedding-v2': 1536,
72
+ 'text-embedding-v1': 1536,
73
+ // 智谱
74
+ 'embedding-3': 2048,
75
+ 'embedding-2': 1024,
76
+ // 硅基流动 / BGE 家族
77
+ 'BAAI/bge-m3': 1024,
78
+ 'BAAI/bge-large-zh-v1.5': 1024,
79
+ 'BAAI/bge-large-en-v1.5': 1024,
80
+ 'BAAI/bge-base-zh-v1.5': 768,
81
+ 'BAAI/bge-small-zh-v1.5': 512,
82
+ 'bge-m3': 1024,
83
+ // 火山 Doubao
84
+ 'doubao-embedding-large-text-240915': 4096,
85
+ 'doubao-embedding-text-240715': 2048,
86
+ // Nomic / Ollama defaults
87
+ 'nomic-embed-text': 768,
88
+ // MiniMax
89
+ 'embo-01': 1536,
90
+ };
91
+
92
+ /**
93
+ * `OpenAIEmbedder` — OpenAI-compatible embedder. One instance per
94
+ * upstream provider + model combination. Pass into any knowledge
95
+ * adapter that expects `IEmbedder`.
96
+ *
97
+ * @example
98
+ * // OpenAI
99
+ * new OpenAIEmbedder({ apiKey: process.env.OPENAI_API_KEY! });
100
+ *
101
+ * @example
102
+ * // 阿里通义 DashScope
103
+ * new OpenAIEmbedder({
104
+ * apiKey: process.env.DASHSCOPE_API_KEY!,
105
+ * baseUrl: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
106
+ * model: 'text-embedding-v3',
107
+ * });
108
+ *
109
+ * @example
110
+ * // 硅基流动 SiliconFlow + BGE-m3
111
+ * new OpenAIEmbedder({
112
+ * apiKey: process.env.SILICONFLOW_API_KEY!,
113
+ * baseUrl: 'https://api.siliconflow.cn/v1',
114
+ * model: 'BAAI/bge-m3',
115
+ * });
116
+ *
117
+ * @example
118
+ * // Local Ollama
119
+ * new OpenAIEmbedder({
120
+ * apiKey: 'ollama',
121
+ * baseUrl: 'http://localhost:11434/v1',
122
+ * model: 'bge-m3',
123
+ * });
124
+ */
125
+ export class OpenAIEmbedder implements IEmbedder {
126
+ readonly id: string;
127
+ readonly dimensions: number;
128
+ private readonly model: string;
129
+ private readonly baseUrl: string;
130
+ private readonly apiKey: string;
131
+ private readonly fetchImpl: typeof fetch;
132
+ private readonly requestedDims?: number;
133
+ private readonly extraHeaders: Record<string, string>;
134
+
135
+ constructor(opts: OpenAIEmbedderOptions) {
136
+ if (!opts.apiKey) throw new Error('OpenAIEmbedder: apiKey required');
137
+ this.apiKey = opts.apiKey;
138
+ this.id = opts.id ?? 'openai';
139
+ this.model = opts.model ?? 'text-embedding-3-small';
140
+ this.baseUrl = (opts.baseUrl ?? 'https://api.openai.com/v1').replace(/\/+$/, '');
141
+ this.fetchImpl = opts.fetch ?? (globalThis.fetch as typeof fetch);
142
+ this.requestedDims = opts.dimensions;
143
+ this.extraHeaders = opts.headers ?? {};
144
+ this.dimensions =
145
+ opts.dimensions ?? KNOWN_DIMENSIONS[this.model] ?? 1536;
146
+ if (!this.fetchImpl) {
147
+ throw new Error(
148
+ 'OpenAIEmbedder: no fetch available; pass options.fetch or run on Node 18+ / a fetch-capable runtime',
149
+ );
150
+ }
151
+ }
152
+
153
+ async embed(texts: string[]): Promise<number[][]> {
154
+ if (texts.length === 0) return [];
155
+ const body: Record<string, unknown> = { model: this.model, input: texts };
156
+ if (this.requestedDims) body.dimensions = this.requestedDims;
157
+ const res = await this.fetchImpl(`${this.baseUrl}/embeddings`, {
158
+ method: 'POST',
159
+ headers: {
160
+ 'content-type': 'application/json',
161
+ authorization: `Bearer ${this.apiKey}`,
162
+ ...this.extraHeaders,
163
+ },
164
+ body: JSON.stringify(body),
165
+ });
166
+ if (!res.ok) {
167
+ const text = await res.text().catch(() => '');
168
+ throw new Error(
169
+ `OpenAIEmbedder (${this.baseUrl}) → ${res.status} ${res.statusText}${text ? `: ${text.slice(0, 200)}` : ''}`,
170
+ );
171
+ }
172
+ const json = (await res.json()) as { data?: Array<{ embedding: number[] }> };
173
+ const data = json.data ?? [];
174
+ if (data.length !== texts.length) {
175
+ throw new Error(
176
+ `OpenAIEmbedder: expected ${texts.length} vectors, got ${data.length}`,
177
+ );
178
+ }
179
+ return data.map((d) => d.embedding);
180
+ }
181
+ }
182
+
183
+ /**
184
+ * Convenience presets for popular Chinese providers — saves callers
185
+ * from memorising base URLs. Pass through `createXxxEmbedder({...})`.
186
+ */
187
+ export const OPENAI_COMPATIBLE_PRESETS = {
188
+ openai: 'https://api.openai.com/v1',
189
+ azure: '', // user must provide full deployment URL via baseUrl
190
+ dashscope: 'https://dashscope.aliyuncs.com/compatible-mode/v1',
191
+ zhipu: 'https://open.bigmodel.cn/api/paas/v4',
192
+ siliconflow: 'https://api.siliconflow.cn/v1',
193
+ doubao: 'https://ark.cn-beijing.volces.com/api/v3',
194
+ minimax: 'https://api.minimax.chat/v1',
195
+ ollama: 'http://localhost:11434/v1',
196
+ } as const;
197
+
198
+ export type OpenAICompatiblePreset = keyof typeof OPENAI_COMPATIBLE_PRESETS;
199
+
200
+ export interface PresetEmbedderOptions
201
+ extends Omit<OpenAIEmbedderOptions, 'baseUrl'> {
202
+ /** Pick a known provider; sets `baseUrl` automatically. */
203
+ preset?: OpenAICompatiblePreset;
204
+ /** Explicit override; takes precedence over `preset`. */
205
+ baseUrl?: string;
206
+ }
207
+
208
+ /**
209
+ * Helper: pick a provider by preset name. Equivalent to constructing
210
+ * `OpenAIEmbedder` with the matching `baseUrl`.
211
+ *
212
+ * @example
213
+ * createOpenAIEmbedder({ preset: 'dashscope', apiKey, model: 'text-embedding-v3' })
214
+ */
215
+ export function createOpenAIEmbedder(opts: PresetEmbedderOptions): OpenAIEmbedder {
216
+ const baseUrl =
217
+ opts.baseUrl ??
218
+ (opts.preset ? OPENAI_COMPATIBLE_PRESETS[opts.preset] : undefined);
219
+ return new OpenAIEmbedder({ ...opts, baseUrl, id: opts.id ?? opts.preset ?? 'openai' });
220
+ }
221
+
222
+ export default OpenAIEmbedder;
package/tsconfig.json ADDED
@@ -0,0 +1,20 @@
1
+ {
2
+ "extends": "../../../tsconfig.json",
3
+ "compilerOptions": {
4
+ "target": "ES2020",
5
+ "module": "ES2020",
6
+ "moduleResolution": "bundler",
7
+ "declaration": true,
8
+ "outDir": "./dist",
9
+ "strict": true,
10
+ "esModuleInterop": true,
11
+ "skipLibCheck": true,
12
+ "noUnusedLocals": false,
13
+ "noUnusedParameters": false,
14
+ "forceConsistentCasingInFileNames": true,
15
+ "types": ["node"],
16
+ "rootDir": "./src"
17
+ },
18
+ "include": ["src/**/*"],
19
+ "exclude": ["node_modules", "dist"]
20
+ }
@@ -0,0 +1,17 @@
1
+ // Copyright (c) 2026 ObjectStack. Licensed under the Apache-2.0 license.
2
+
3
+ import { defineConfig } from 'vitest/config';
4
+ import path from 'path';
5
+
6
+ export default defineConfig({
7
+ test: {
8
+ globals: true,
9
+ environment: 'node',
10
+ },
11
+ resolve: {
12
+ alias: {
13
+ '@objectstack/spec/contracts': path.resolve(__dirname, '../../spec/src/contracts/index.ts'),
14
+ '@objectstack/spec': path.resolve(__dirname, '../../spec/src/index.ts'),
15
+ },
16
+ },
17
+ });