utilitas 2000.3.46 → 2000.3.48
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -9
- package/dist/utilitas.lite.mjs +1 -1
- package/dist/utilitas.lite.mjs.map +1 -1
- package/index.mjs +4 -4
- package/lib/alan.mjs +1 -0
- package/lib/manifest.mjs +2 -1
- package/lib/rag.mjs +252 -0
- package/package.json +2 -1
- package/lib/embedding.mjs +0 -160
package/index.mjs
CHANGED
|
@@ -12,7 +12,7 @@ import * as cache from './lib/cache.mjs';
|
|
|
12
12
|
import * as callosum from './lib/callosum.mjs';
|
|
13
13
|
import * as dbio from './lib/dbio.mjs';
|
|
14
14
|
import * as email from './lib/email.mjs';
|
|
15
|
-
import * as
|
|
15
|
+
import * as rag from './lib/rag.mjs';
|
|
16
16
|
import * as encryption from './lib/encryption.mjs';
|
|
17
17
|
import * as event from './lib/event.mjs';
|
|
18
18
|
import * as media from './lib/media.mjs';
|
|
@@ -38,9 +38,9 @@ export {
|
|
|
38
38
|
// dependencies
|
|
39
39
|
fileType, math, uuid,
|
|
40
40
|
// features
|
|
41
|
-
alan, bee, bot, boxes, cache, callosum, color, dbio, email,
|
|
42
|
-
|
|
43
|
-
|
|
41
|
+
alan, bee, bot, boxes, cache, callosum, color, dbio, email, rag, encryption,
|
|
42
|
+
event, manifest, media, memory, network, sentinel, shell, sms, speech, ssl,
|
|
43
|
+
storage, tape, uoid, utilitas, vision, web
|
|
44
44
|
};
|
|
45
45
|
|
|
46
46
|
if (utilitas.inBrowser() && !globalThis.utilitas) {
|
package/lib/alan.mjs
CHANGED
|
@@ -1398,6 +1398,7 @@ const trimText = async (text, limit = Infinity) => {
|
|
|
1398
1398
|
text = ensureString(text, { trim: true });
|
|
1399
1399
|
let trimmed = false;
|
|
1400
1400
|
let lastCheck = null;
|
|
1401
|
+
limit = Math.max(limit, 0);
|
|
1401
1402
|
while ((lastCheck = await countTokens(
|
|
1402
1403
|
buildTextWithEllipsis(text, trimmed), { fast: true }
|
|
1403
1404
|
)) > limit) {
|
package/lib/manifest.mjs
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
const manifest = {
|
|
2
2
|
"name": "utilitas",
|
|
3
3
|
"description": "Just another common utility for JavaScript.",
|
|
4
|
-
"version": "2000.3.
|
|
4
|
+
"version": "2000.3.48",
|
|
5
5
|
"private": false,
|
|
6
6
|
"homepage": "https://github.com/Leask/utilitas",
|
|
7
7
|
"main": "index.mjs",
|
|
@@ -26,6 +26,7 @@ const manifest = {
|
|
|
26
26
|
"devDependencies": {
|
|
27
27
|
"@ffmpeg-installer/ffmpeg": "^1.1.0",
|
|
28
28
|
"@ffprobe-installer/ffprobe": "^2.1.2",
|
|
29
|
+
"@google-cloud/discoveryengine": "^2.5.2",
|
|
29
30
|
"@google-cloud/storage": "^7.18.0",
|
|
30
31
|
"@google/genai": "^1.31.0",
|
|
31
32
|
"@mozilla/readability": "github:mozilla/readability",
|
package/lib/rag.mjs
ADDED
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
import { countTokens, trimText } from './alan.mjs';
|
|
2
|
+
import { convert } from './storage.mjs';
|
|
3
|
+
import { ensureArray, ensureString, need } from './utilitas.mjs';
|
|
4
|
+
|
|
5
|
+
const _NEED = ['openai', '@google-cloud/discoveryengine'];
|
|
6
|
+
const embeddingClients = {};
|
|
7
|
+
const rerankerClients = {};
|
|
8
|
+
|
|
9
|
+
const [
|
|
10
|
+
OPENAI, GOOGLE, OPENROUTER, JINA,
|
|
11
|
+
GOOGLE_DEFAULT_LOCATION, GOOGLE_RERANK_CONFIG_ID,
|
|
12
|
+
OPENAI_MODEL_EMBED_SMALL,
|
|
13
|
+
OPENAI_MODEL_EMBED_LARGE,
|
|
14
|
+
GOOGLE_MODEL_GEMINI_EMBED,
|
|
15
|
+
JINA_MODEL_V_4,
|
|
16
|
+
GOOGLE_MODEL_SEMANTIC_RANKER,
|
|
17
|
+
] = [
|
|
18
|
+
'OPENAI', 'GOOGLE', 'OPENROUTER', 'JINA',
|
|
19
|
+
'global', 'default_ranking_config',
|
|
20
|
+
'text-embedding-3-small', // dim: 1536
|
|
21
|
+
'text-embedding-3-large', // dim: 3072
|
|
22
|
+
'gemini-embedding-001', // dim: 768(default), 1536, or 3072(google default)
|
|
23
|
+
'jina-embeddings-v4', // dim: 256‑2048
|
|
24
|
+
'semantic-ranker-default@latest',
|
|
25
|
+
];
|
|
26
|
+
|
|
27
|
+
const PROVIDER_BASE_URL = {
|
|
28
|
+
[OPENROUTER]: 'https://openrouter.ai/api/v1',
|
|
29
|
+
[JINA]: 'https://api.jina.ai/v1/',
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
const DEFAULT_EMBEDDING_MODELS = {
|
|
33
|
+
[OPENAI]: OPENAI_MODEL_EMBED_SMALL,
|
|
34
|
+
[OPENROUTER]: GOOGLE_MODEL_GEMINI_EMBED,
|
|
35
|
+
[JINA]: JINA_MODEL_V_4,
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
const DEFAULT_RERANKER_MODELS = {
|
|
39
|
+
[GOOGLE]: GOOGLE_MODEL_SEMANTIC_RANKER,
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
const MODEL_CONFIG = {
|
|
43
|
+
[OPENAI_MODEL_EMBED_SMALL]: {
|
|
44
|
+
source: 'openai', image: false, maxTokens: 8192,
|
|
45
|
+
},
|
|
46
|
+
[OPENAI_MODEL_EMBED_LARGE]: {
|
|
47
|
+
source: 'openai', image: false, maxTokens: 8192,
|
|
48
|
+
},
|
|
49
|
+
[GOOGLE_MODEL_GEMINI_EMBED]: {
|
|
50
|
+
source: 'google', image: false, maxTokens: 2048,
|
|
51
|
+
options: { dimensions: 768 },
|
|
52
|
+
},
|
|
53
|
+
// Token calculation may be incorrect because its limitation applies to the
|
|
54
|
+
// entire request rather than individual entries.
|
|
55
|
+
// https://jina.ai/embeddings
|
|
56
|
+
[JINA_MODEL_V_4]: {
|
|
57
|
+
source: 'jina', image: true, maxTokens: 8192, recordsLimit: 512,
|
|
58
|
+
options: {
|
|
59
|
+
task: 'text-matching', // 'retrieval.query', 'retrieval.passage'
|
|
60
|
+
dimensions: 768, // normalized: true, by default DONT submit
|
|
61
|
+
truncate: true, // late_chunking: true, by default DONT submit
|
|
62
|
+
embedding_type: 'float',
|
|
63
|
+
},
|
|
64
|
+
},
|
|
65
|
+
[GOOGLE_MODEL_SEMANTIC_RANKER]: {
|
|
66
|
+
source: 'google', image: false, maxTokens: 1024, recordsLimit: 200,
|
|
67
|
+
},
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
const ensureEmbeddingProvider = (options) => {
|
|
71
|
+
options.provider = ensureString(options?.provider, { case: 'UP' });
|
|
72
|
+
assert(
|
|
73
|
+
DEFAULT_EMBEDDING_MODELS?.[options.provider],
|
|
74
|
+
'Embedding provider is required.', 400
|
|
75
|
+
);
|
|
76
|
+
return options.provider;
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
const ensureRerankerProvider = (options) => {
|
|
80
|
+
options.provider = ensureString(options?.provider, { case: 'UP' });
|
|
81
|
+
assert(
|
|
82
|
+
DEFAULT_RERANKER_MODELS?.[options.provider],
|
|
83
|
+
'Reranker provider is required.', 400
|
|
84
|
+
);
|
|
85
|
+
return options.provider;
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
const ensureApiKey = (options) => {
|
|
89
|
+
assert(options?.apiKey, 'API key is required.', 400);
|
|
90
|
+
return options.apiKey;
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
const ensureGoogleCredentials = (options) => {
|
|
94
|
+
assert(options?.googleCredentials, 'Google credentials are required.', 400);
|
|
95
|
+
assert(options?.projectId, 'Google project ID is required.', 400);
|
|
96
|
+
return options;
|
|
97
|
+
};
|
|
98
|
+
|
|
99
|
+
const getEmbeddingClient = (provider) => {
|
|
100
|
+
provider = ensureString(provider, { case: 'UP' })
|
|
101
|
+
|| Object.keys(embeddingClients || {})[0];
|
|
102
|
+
assert(provider, 'No embedding provider has been initialized.', 500);
|
|
103
|
+
return { ...embeddingClients?.[provider], provider };
|
|
104
|
+
};
|
|
105
|
+
|
|
106
|
+
const getRerankerClient = (provider) => {
|
|
107
|
+
provider = ensureString(provider, { case: 'UP' })
|
|
108
|
+
|| Object.keys(rerankerClients || {})[0];
|
|
109
|
+
assert(provider, 'No reranker provider has been initialized.', 500);
|
|
110
|
+
return { ...rerankerClients?.[provider], provider };
|
|
111
|
+
};
|
|
112
|
+
|
|
113
|
+
const initEmbedding = async (options = {}) => {
|
|
114
|
+
if (options?.debug) {
|
|
115
|
+
(await need('node:util')).inspect.defaultOptions.depth = null;
|
|
116
|
+
options.logLevel = 'debug';
|
|
117
|
+
}
|
|
118
|
+
ensureApiKey(options);
|
|
119
|
+
const provider = ensureEmbeddingProvider(options);
|
|
120
|
+
const OpenAI = await need('openai');
|
|
121
|
+
const baseURL = options?.baseURL || PROVIDER_BASE_URL[provider];
|
|
122
|
+
const model = options?.model || DEFAULT_EMBEDDING_MODELS[provider];
|
|
123
|
+
embeddingClients[provider] = {
|
|
124
|
+
client: new OpenAI({ ...options, baseURL }),
|
|
125
|
+
model, source: MODEL_CONFIG[model]?.source,
|
|
126
|
+
};
|
|
127
|
+
return getEmbeddingClient(provider);
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
const embed = async (input, options = {}) => {
|
|
131
|
+
let [{ client, model: selectedModel, provider, source }, resp]
|
|
132
|
+
= [getEmbeddingClient(options?.provider), null];
|
|
133
|
+
const model = options?.model || selectedModel;
|
|
134
|
+
const multiple = Array.isArray(input);
|
|
135
|
+
input = await Promise.all(ensureArray(input).map(async x => {
|
|
136
|
+
x = Object.isObject(x) ? x : { text: x };
|
|
137
|
+
assert(
|
|
138
|
+
Object.keys(x).length == 1,
|
|
139
|
+
'Only one type of input is allowed at a time.', 400
|
|
140
|
+
);
|
|
141
|
+
if (x.text) {
|
|
142
|
+
x.text = await trimText(x.text, MODEL_CONFIG[model]?.maxTokens);
|
|
143
|
+
} else if (x.image) {
|
|
144
|
+
assert(
|
|
145
|
+
MODEL_CONFIG[model]?.image,
|
|
146
|
+
`Model ${model} does not support image embeddings.`, 400
|
|
147
|
+
);
|
|
148
|
+
if (options?.input) {
|
|
149
|
+
x.image = await convert(
|
|
150
|
+
x.image, { ...options, expected: 'base64' }
|
|
151
|
+
);
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
return x;
|
|
155
|
+
}));
|
|
156
|
+
MODEL_CONFIG[model]?.image || (input = input.map(x => x.text));
|
|
157
|
+
assert(input.length, 'Input is required.', 400);
|
|
158
|
+
const body = {
|
|
159
|
+
model, input, ...MODEL_CONFIG[model]?.options || {},
|
|
160
|
+
...options?.requestOptions || {},
|
|
161
|
+
};
|
|
162
|
+
switch (provider) {
|
|
163
|
+
case JINA:
|
|
164
|
+
resp = await client.post('/embeddings', { body });
|
|
165
|
+
break;
|
|
166
|
+
case OPENROUTER:
|
|
167
|
+
source = options?.source || source
|
|
168
|
+
|| MODEL_CONFIG[body.model]?.source;
|
|
169
|
+
body.model = `${source ? `${source}/` : ''}${body.model}`;
|
|
170
|
+
case OPENAI:
|
|
171
|
+
resp = await client.embeddings.create(body);
|
|
172
|
+
break;
|
|
173
|
+
default:
|
|
174
|
+
throw new Error(`Unsupported embedding provider: ${provider}`);
|
|
175
|
+
}
|
|
176
|
+
assert(resp?.data?.length, 'No embeddings returned.', 500);
|
|
177
|
+
if (options?.raw) { return resp; }
|
|
178
|
+
const vectors = resp.data.map(x => x.embedding);
|
|
179
|
+
return multiple ? vectors : vectors[0];
|
|
180
|
+
};
|
|
181
|
+
|
|
182
|
+
const initReranker = async (options = {}) => {
|
|
183
|
+
const provider = ensureRerankerProvider(options);
|
|
184
|
+
switch (provider) {
|
|
185
|
+
case GOOGLE:
|
|
186
|
+
ensureGoogleCredentials(options);
|
|
187
|
+
const { RankServiceClient } = await need(
|
|
188
|
+
'@google-cloud/discoveryengine', { raw: true }
|
|
189
|
+
);
|
|
190
|
+
const location = options?.location || GOOGLE_DEFAULT_LOCATION;
|
|
191
|
+
const clientOptions = {
|
|
192
|
+
...location ? { apiEndpoint: `${location}-discoveryengine.googleapis.com` } : {},
|
|
193
|
+
...options?.apiEndpoint ? { apiEndpoint: options.apiEndpoint } : {},
|
|
194
|
+
keyFilename: options.googleCredentials,
|
|
195
|
+
};
|
|
196
|
+
const client = new RankServiceClient(clientOptions);
|
|
197
|
+
rerankerClients[provider] = {
|
|
198
|
+
model: options?.model || DEFAULT_RERANKER_MODELS[provider],
|
|
199
|
+
client, rankingConfigPath: client.rankingConfigPath(
|
|
200
|
+
options.projectId, location,
|
|
201
|
+
options?.rerankerConfigId || GOOGLE_RERANK_CONFIG_ID
|
|
202
|
+
),
|
|
203
|
+
};
|
|
204
|
+
break;
|
|
205
|
+
default:
|
|
206
|
+
throw new Error(`Unsupported reranker provider: ${provider}`);
|
|
207
|
+
}
|
|
208
|
+
return getRerankerClient(provider);
|
|
209
|
+
};
|
|
210
|
+
|
|
211
|
+
const rerank = async (query, records, options = {}) => {
|
|
212
|
+
assert(query, 'Query is required.', 400);
|
|
213
|
+
assert(records?.length, 'Records are required.', 400);
|
|
214
|
+
const { provider, model, client, rankingConfigPath }
|
|
215
|
+
= getRerankerClient(options?.provider);
|
|
216
|
+
records = records.map((content, id) => Object.isObject(content)
|
|
217
|
+
? content : { id: String(id), content }).slice(
|
|
218
|
+
0, MODEL_CONFIG[model]?.recordsLimit || records.length
|
|
219
|
+
);
|
|
220
|
+
const maxTokens = MODEL_CONFIG[model]?.maxTokens || Infinity;
|
|
221
|
+
let result;
|
|
222
|
+
for (let i in records) {
|
|
223
|
+
records[i].title = await trimText(records[i]?.title || '', maxTokens);
|
|
224
|
+
const titleTokens = await countTokens(records[i].title);
|
|
225
|
+
const availableTokens = maxTokens - titleTokens;
|
|
226
|
+
records[i].content = availableTokens > 0 ? await trimText(
|
|
227
|
+
records[i].content, availableTokens
|
|
228
|
+
) : '';
|
|
229
|
+
}
|
|
230
|
+
switch (provider) {
|
|
231
|
+
case GOOGLE:
|
|
232
|
+
const request = {
|
|
233
|
+
model, query, rankingConfig: rankingConfigPath,
|
|
234
|
+
records, topN: ~~options?.topN || records.length,
|
|
235
|
+
...options?.requestOptions || {},
|
|
236
|
+
};
|
|
237
|
+
result = (await client.rank(request))?.[0]?.records;
|
|
238
|
+
break;
|
|
239
|
+
default:
|
|
240
|
+
throw new Error(`Unsupported reranker provider: ${provider}`);
|
|
241
|
+
}
|
|
242
|
+
// print(result);
|
|
243
|
+
return result || [];
|
|
244
|
+
};
|
|
245
|
+
|
|
246
|
+
export {
|
|
247
|
+
_NEED,
|
|
248
|
+
embed,
|
|
249
|
+
initEmbedding,
|
|
250
|
+
initReranker,
|
|
251
|
+
rerank,
|
|
252
|
+
};
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "utilitas",
|
|
3
3
|
"description": "Just another common utility for JavaScript.",
|
|
4
|
-
"version": "2000.3.
|
|
4
|
+
"version": "2000.3.48",
|
|
5
5
|
"private": false,
|
|
6
6
|
"homepage": "https://github.com/Leask/utilitas",
|
|
7
7
|
"main": "index.mjs",
|
|
@@ -37,6 +37,7 @@
|
|
|
37
37
|
"devDependencies": {
|
|
38
38
|
"@ffmpeg-installer/ffmpeg": "^1.1.0",
|
|
39
39
|
"@ffprobe-installer/ffprobe": "^2.1.2",
|
|
40
|
+
"@google-cloud/discoveryengine": "^2.5.2",
|
|
40
41
|
"@google-cloud/storage": "^7.18.0",
|
|
41
42
|
"@google/genai": "^1.31.0",
|
|
42
43
|
"@mozilla/readability": "github:mozilla/readability",
|
package/lib/embedding.mjs
DELETED
|
@@ -1,160 +0,0 @@
|
|
|
1
|
-
import { convert } from './storage.mjs';
|
|
2
|
-
import { ensureArray, ensureString, need } from './utilitas.mjs';
|
|
3
|
-
import { trimText } from './alan.mjs';
|
|
4
|
-
|
|
5
|
-
const _NEED = ['openai'];
|
|
6
|
-
const clients = {};
|
|
7
|
-
|
|
8
|
-
const [
|
|
9
|
-
OPENAI,
|
|
10
|
-
OPENROUTER,
|
|
11
|
-
JINA,
|
|
12
|
-
OPENAI_MODEL_EMBED_SMALL,
|
|
13
|
-
OPENAI_MODEL_EMBED_LARGE,
|
|
14
|
-
GOOGLE_MODEL_GEMINI_EMBED,
|
|
15
|
-
JINA_MODEL_CLIP_2,
|
|
16
|
-
JINA_MODEL_V_3,
|
|
17
|
-
] = [
|
|
18
|
-
'OPENAI',
|
|
19
|
-
'OPENROUTER',
|
|
20
|
-
'JINA',
|
|
21
|
-
'text-embedding-3-small', // dim: 1536
|
|
22
|
-
'text-embedding-3-large', // dim: 3072
|
|
23
|
-
'gemini-embedding-001', // dim: 768(default), 1536, or 3072(google default)
|
|
24
|
-
'jina-clip-v2', // dim: 1024
|
|
25
|
-
'jina-embeddings-v3', // dim: 256‑1024
|
|
26
|
-
];
|
|
27
|
-
|
|
28
|
-
const PROVIDER_BASE_URL = {
|
|
29
|
-
[OPENROUTER]: 'https://openrouter.ai/api/v1',
|
|
30
|
-
[JINA]: 'https://api.jina.ai/v1/',
|
|
31
|
-
};
|
|
32
|
-
|
|
33
|
-
const DEFAULT_MODELS = {
|
|
34
|
-
[OPENAI]: OPENAI_MODEL_EMBED_SMALL,
|
|
35
|
-
[OPENROUTER]: GOOGLE_MODEL_GEMINI_EMBED,
|
|
36
|
-
[JINA]: JINA_MODEL_CLIP_2,
|
|
37
|
-
};
|
|
38
|
-
|
|
39
|
-
const MODEL_CONFIG = {
|
|
40
|
-
[OPENAI_MODEL_EMBED_SMALL]: { source: 'openai', maxTokens: 8192 },
|
|
41
|
-
[OPENAI_MODEL_EMBED_LARGE]: { source: 'openai', maxTokens: 8192 },
|
|
42
|
-
[GOOGLE_MODEL_GEMINI_EMBED]: {
|
|
43
|
-
source: 'google', maxTokens: 2048, options: { dimensions: 768 },
|
|
44
|
-
},
|
|
45
|
-
[JINA_MODEL_CLIP_2]: {
|
|
46
|
-
maxTokens: 8192,
|
|
47
|
-
image: true,
|
|
48
|
-
options: {
|
|
49
|
-
task: 'retrieval.query',
|
|
50
|
-
dimensions: 1024,
|
|
51
|
-
normalized: true,
|
|
52
|
-
embedding_type: 'float',
|
|
53
|
-
},
|
|
54
|
-
},
|
|
55
|
-
// Token calculation may be incorrect because its limitation applies to the
|
|
56
|
-
// entire request rather than individual entries.
|
|
57
|
-
[JINA_MODEL_V_3]: {
|
|
58
|
-
maxTokens: 8192,
|
|
59
|
-
image: false,
|
|
60
|
-
options: {
|
|
61
|
-
task: 'retrieval.query',
|
|
62
|
-
dimensions: 1024,
|
|
63
|
-
normalized: true,
|
|
64
|
-
late_chunking: true,
|
|
65
|
-
embedding_type: 'float',
|
|
66
|
-
},
|
|
67
|
-
},
|
|
68
|
-
};
|
|
69
|
-
|
|
70
|
-
const ensureProvider = (options) => {
|
|
71
|
-
options.provider = ensureString(options?.provider, { case: 'UP' });
|
|
72
|
-
assert(
|
|
73
|
-
DEFAULT_MODELS?.[options.provider], 'Provider is required.', 400
|
|
74
|
-
);
|
|
75
|
-
return options.provider;
|
|
76
|
-
};
|
|
77
|
-
|
|
78
|
-
const ensureApiKey = (options) => {
|
|
79
|
-
assert(options?.apiKey, 'API key is required.', 400);
|
|
80
|
-
return options.apiKey;
|
|
81
|
-
};
|
|
82
|
-
|
|
83
|
-
const getClient = (provider) => {
|
|
84
|
-
provider = ensureString(provider, { case: 'UP' })
|
|
85
|
-
|| Object.keys(clients || {})[0];
|
|
86
|
-
assert(provider, 'No embedding provider has been initialized.', 500);
|
|
87
|
-
return { ...clients?.[provider], provider };
|
|
88
|
-
};
|
|
89
|
-
|
|
90
|
-
const init = async (options = {}) => {
|
|
91
|
-
ensureApiKey(options);
|
|
92
|
-
const provider = ensureProvider(options);
|
|
93
|
-
const OpenAI = await need('openai');
|
|
94
|
-
const baseURL = options?.baseURL || PROVIDER_BASE_URL[provider];
|
|
95
|
-
const model = options?.model || DEFAULT_MODELS[provider];
|
|
96
|
-
clients[provider] = {
|
|
97
|
-
client: new OpenAI({ ...options, baseURL }),
|
|
98
|
-
model, source: MODEL_CONFIG[model]?.source,
|
|
99
|
-
};
|
|
100
|
-
return getClient(provider);
|
|
101
|
-
};
|
|
102
|
-
|
|
103
|
-
const embed = async (input, options = {}) => {
|
|
104
|
-
let [{ client, model: selectedModel, provider, source }, resp]
|
|
105
|
-
= [getClient(options?.provider), null];
|
|
106
|
-
const model = options?.model || selectedModel;
|
|
107
|
-
const multiple = Array.isArray(input);
|
|
108
|
-
input = await Promise.all(ensureArray(input).map(async x => {
|
|
109
|
-
x = Object.isObject(x) ? x : { text: x };
|
|
110
|
-
assert(
|
|
111
|
-
Object.keys(x).length == 1,
|
|
112
|
-
'Only one type of input is allowed at a time.', 400
|
|
113
|
-
);
|
|
114
|
-
if (x.text) {
|
|
115
|
-
x.text = await trimText(x.text, MODEL_CONFIG[model]?.maxTokens);
|
|
116
|
-
} else if (x.image) {
|
|
117
|
-
assert(
|
|
118
|
-
MODEL_CONFIG[model]?.image,
|
|
119
|
-
`Model ${model} does not support image embeddings.`, 400
|
|
120
|
-
);
|
|
121
|
-
if (options?.input) {
|
|
122
|
-
x.image = await convert(
|
|
123
|
-
x.image, { ...options, expected: 'base64' }
|
|
124
|
-
);
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
return x;
|
|
128
|
-
}));
|
|
129
|
-
MODEL_CONFIG[model]?.image || (input = input.map(x => x.text));
|
|
130
|
-
assert(input.length, 'Input is required.', 400);
|
|
131
|
-
const body = {
|
|
132
|
-
model, input, ...MODEL_CONFIG[model]?.options || {},
|
|
133
|
-
...options?.requestOptions || {},
|
|
134
|
-
};
|
|
135
|
-
switch (provider) {
|
|
136
|
-
case JINA:
|
|
137
|
-
resp = await client.post('/embeddings', { body });
|
|
138
|
-
break;
|
|
139
|
-
case OPENROUTER:
|
|
140
|
-
source = options?.source || source
|
|
141
|
-
|| MODEL_CONFIG[body.model]?.source;
|
|
142
|
-
body.model = `${source ? `${source}/` : ''}${body.model}`;
|
|
143
|
-
case OPENAI:
|
|
144
|
-
resp = await client.embeddings.create(body);
|
|
145
|
-
break;
|
|
146
|
-
default:
|
|
147
|
-
throw new Error(`Unsupported provider: ${provider}`);
|
|
148
|
-
}
|
|
149
|
-
assert(resp?.data?.length, 'No embeddings returned.', 500);
|
|
150
|
-
if (options?.raw) { return resp; }
|
|
151
|
-
const vectors = resp.data.map(x => x.embedding);
|
|
152
|
-
return multiple ? vectors : vectors[0];
|
|
153
|
-
};
|
|
154
|
-
|
|
155
|
-
export default init;
|
|
156
|
-
export {
|
|
157
|
-
_NEED,
|
|
158
|
-
embed,
|
|
159
|
-
init,
|
|
160
|
-
};
|