@lota-sdk/core 0.4.39 → 0.4.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lota-sdk/core",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.41",
|
|
4
4
|
"files": [
|
|
5
5
|
"src",
|
|
6
6
|
"infrastructure/schema"
|
|
@@ -32,7 +32,7 @@
|
|
|
32
32
|
"@ai-sdk/provider": "^3.0.9",
|
|
33
33
|
"@chat-adapter/slack": "^4.26.0",
|
|
34
34
|
"@chat-adapter/state-ioredis": "^4.26.0",
|
|
35
|
-
"@lota-sdk/shared": "0.4.
|
|
35
|
+
"@lota-sdk/shared": "0.4.41",
|
|
36
36
|
"@mendable/firecrawl-js": "^4.20.0",
|
|
37
37
|
"@surrealdb/node": "^3.0.3",
|
|
38
38
|
"ai": "^6.0.170",
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { devToolsMiddleware } from '@ai-sdk/devtools'
|
|
2
2
|
import { createOpenAI } from '@ai-sdk/openai'
|
|
3
3
|
import type { JSONSchema7 } from '@ai-sdk/provider'
|
|
4
|
-
import {
|
|
4
|
+
import { wrapLanguageModel } from 'ai'
|
|
5
5
|
import type { LanguageModelMiddleware } from 'ai'
|
|
6
6
|
import { Cause, Clock, Context, Duration, Effect, Fiber, Layer, Semaphore } from 'effect'
|
|
7
7
|
|
|
@@ -16,7 +16,6 @@ type AiGatewayChatResponse = { body?: unknown }
|
|
|
16
16
|
type AiGatewayTransformParamsOptions = Parameters<NonNullable<LanguageModelMiddleware['transformParams']>>[0]
|
|
17
17
|
type WrapStreamOptions = Parameters<NonNullable<LanguageModelMiddleware['wrapStream']>>[0]
|
|
18
18
|
type AiGatewayLanguageModel = Parameters<typeof wrapLanguageModel>[0]['model']
|
|
19
|
-
type AiGatewayEmbeddingModel = Parameters<typeof wrapEmbeddingModel>[0]['model']
|
|
20
19
|
type AiGatewayCallOptions = WrapStreamOptions['params']
|
|
21
20
|
type AiGatewayFunctionTool = Extract<NonNullable<AiGatewayCallOptions['tools']>[number], { type: 'function' }>
|
|
22
21
|
type AiGatewayGenerateResult = Awaited<ReturnType<WrapStreamOptions['doGenerate']>>
|
|
@@ -45,8 +44,6 @@ const AI_GATEWAY_MAX_RETRIES = 4
|
|
|
45
44
|
const AI_GATEWAY_MAX_RETRY_DELAY_MS = 15_000
|
|
46
45
|
const OPENAI_RESPONSES_PROVIDER_ID = 'openai.responses'
|
|
47
46
|
const OPENAI_CHAT_PROVIDER_ID = 'openai.chat'
|
|
48
|
-
const OPENAI_EMBEDDING_PROVIDER_ID = 'openai.embedding'
|
|
49
|
-
const OPENAI_EMBEDDING_MAX_PER_CALL = 2_048
|
|
50
47
|
const RETRYABLE_NETWORK_ERROR_CODES = new Set([
|
|
51
48
|
'ECONNABORTED',
|
|
52
49
|
'ECONNREFUSED',
|
|
@@ -1244,23 +1241,6 @@ function createAiGatewayLanguageModelPlaceholder(modelId: string, providerId: st
|
|
|
1244
1241
|
}
|
|
1245
1242
|
}
|
|
1246
1243
|
|
|
1247
|
-
function createAiGatewayEmbeddingModelPlaceholder(modelId: string): AiGatewayEmbeddingModel {
|
|
1248
|
-
return {
|
|
1249
|
-
specificationVersion: 'v3',
|
|
1250
|
-
provider: OPENAI_EMBEDDING_PROVIDER_ID,
|
|
1251
|
-
modelId,
|
|
1252
|
-
maxEmbeddingsPerCall: OPENAI_EMBEDDING_MAX_PER_CALL,
|
|
1253
|
-
supportsParallelCalls: true,
|
|
1254
|
-
doEmbed: () =>
|
|
1255
|
-
Promise.reject(
|
|
1256
|
-
new Error(
|
|
1257
|
-
`[ai-gateway] AiGateway embedding model ${modelId}.doEmbed was invoked without the gateway middleware; ` +
|
|
1258
|
-
'this call path should be fully handled by aiGatewayEmbeddingModel middleware.',
|
|
1259
|
-
),
|
|
1260
|
-
),
|
|
1261
|
-
}
|
|
1262
|
-
}
|
|
1263
|
-
|
|
1264
1244
|
export function aiGatewayModel(modelId: string, deps?: AiGatewayDeps) {
|
|
1265
1245
|
if (isOpenRouterModel(modelId)) {
|
|
1266
1246
|
return aiGatewayChatModel(modelId, deps)
|
|
@@ -1287,28 +1267,8 @@ export function aiGatewayChatModel(modelId: string, deps?: AiGatewayDeps) {
|
|
|
1287
1267
|
)
|
|
1288
1268
|
}
|
|
1289
1269
|
|
|
1290
|
-
export function aiGatewayEmbeddingModel(modelId: string,
|
|
1291
|
-
return
|
|
1292
|
-
model: createAiGatewayEmbeddingModelPlaceholder(modelId),
|
|
1293
|
-
middleware: {
|
|
1294
|
-
specificationVersion: 'v3',
|
|
1295
|
-
wrapEmbed: ({ params }) => {
|
|
1296
|
-
const resolvedDeps = resolveAiGatewayDeps(deps)
|
|
1297
|
-
const embeddingModel = resolvedDeps.gateway.provider.embeddingModel(modelId)
|
|
1298
|
-
return resolvedDeps.runPromise(
|
|
1299
|
-
withAiGatewayConcurrency(
|
|
1300
|
-
withAiGatewayResilience(
|
|
1301
|
-
'ai-gateway.embed',
|
|
1302
|
-
Effect.tryPromise({
|
|
1303
|
-
try: () => embeddingModel.doEmbed(params),
|
|
1304
|
-
catch: (cause) => classifyAiGatewayError('ai-gateway.embed', cause),
|
|
1305
|
-
}),
|
|
1306
|
-
).pipe(Effect.withSpan('AiGateway.embed'), Effect.annotateSpans({ modelId })),
|
|
1307
|
-
).pipe(Effect.provideService(AiGatewayTag, resolvedDeps.gateway)),
|
|
1308
|
-
)
|
|
1309
|
-
},
|
|
1310
|
-
},
|
|
1311
|
-
})
|
|
1270
|
+
export function aiGatewayEmbeddingModel(modelId: string, _deps?: AiGatewayDeps) {
|
|
1271
|
+
return openRouterEmbeddingModel(modelId)
|
|
1312
1272
|
}
|
|
1313
1273
|
|
|
1314
1274
|
/**
|
|
@@ -5,7 +5,7 @@ import { z } from 'zod'
|
|
|
5
5
|
import type { CoreThreadProfile } from '../config/agent-defaults'
|
|
6
6
|
import type { AgentFactory, AgentRuntimeConfigProvider, AgentToolBuilder } from '../config/agent-types'
|
|
7
7
|
import { DEFAULT_AI_GATEWAY_URL } from '../config/constants'
|
|
8
|
-
import {
|
|
8
|
+
import { AI_GATEWAY_FAST_RERANK_MODEL_ID } from '../config/model-constants'
|
|
9
9
|
import type { LotaThreadConfig, ThreadBootstrapWelcomeConfig } from '../config/thread-defaults'
|
|
10
10
|
import type { RecordIdRef } from '../db/record-id'
|
|
11
11
|
import type { NotificationService } from '../services/notification.service'
|
|
@@ -239,13 +239,13 @@ export const LotaRuntimeConfigSchema = z.object({
|
|
|
239
239
|
searchK: z.coerce.number().int().positive().default(6),
|
|
240
240
|
embeddingCacheTtlSeconds: z.coerce.number().int().positive().default(7200),
|
|
241
241
|
rerankerStrategy: MemoryRerankerStrategySchema.default('rerank'),
|
|
242
|
-
rerankerModelId: z.string().trim().min(1).default(
|
|
242
|
+
rerankerModelId: z.string().trim().min(1).default(AI_GATEWAY_FAST_RERANK_MODEL_ID),
|
|
243
243
|
})
|
|
244
244
|
.default({
|
|
245
245
|
searchK: 6,
|
|
246
246
|
embeddingCacheTtlSeconds: 7200,
|
|
247
247
|
rerankerStrategy: 'rerank',
|
|
248
|
-
rerankerModelId:
|
|
248
|
+
rerankerModelId: AI_GATEWAY_FAST_RERANK_MODEL_ID,
|
|
249
249
|
}),
|
|
250
250
|
threads: threadConfigSchema.default({}),
|
|
251
251
|
agents: agentsConfigSchema,
|
|
@@ -342,7 +342,7 @@ export const lotaRuntimeEnvConfig = Config.all({
|
|
|
342
342
|
memorySearchK: Config.number('MEMORY_SEARCH_K').pipe(Config.withDefault(6)),
|
|
343
343
|
memoryRerankerStrategy: Config.string('MEMORY_RERANKER_STRATEGY').pipe(Config.withDefault('rerank')),
|
|
344
344
|
memoryRerankerModelId: Config.string('MEMORY_RERANKER_MODEL_ID').pipe(
|
|
345
|
-
Config.withDefault(
|
|
345
|
+
Config.withDefault(AI_GATEWAY_FAST_RERANK_MODEL_ID),
|
|
346
346
|
),
|
|
347
347
|
otlpBaseUrl: Config.string('OTLP_BASE_URL').pipe(Config.option),
|
|
348
348
|
otlpServiceName: Config.string('OTLP_SERVICE_NAME').pipe(Config.withDefault('lota-sdk')),
|
|
@@ -2,7 +2,7 @@ import { Context, Effect, Layer } from 'effect'
|
|
|
2
2
|
import * as Schema from 'effect/Schema'
|
|
3
3
|
import { z } from 'zod'
|
|
4
4
|
|
|
5
|
-
import {
|
|
5
|
+
import { AI_GATEWAY_FAST_RERANK_MODEL_ID } from '../../config/model-constants'
|
|
6
6
|
import { ConfigurationError, ERROR_TAGS } from '../../effect/errors'
|
|
7
7
|
import { RuntimeConfigServiceTag } from '../../effect/services'
|
|
8
8
|
import { toValidationError } from '../../effect/zod'
|
|
@@ -11,6 +11,7 @@ import type { ResolvedLotaRuntimeConfig } from '../../runtime/runtime-config'
|
|
|
11
11
|
const AI_GATEWAY_VIRTUAL_KEY_HEADER = 'x-bf-vk' as const
|
|
12
12
|
const EXPECTED_GATEWAY_KEY_PREFIX = 'sk-bf-' as const
|
|
13
13
|
const OPENROUTER_MODEL_PREFIX = 'openrouter/' as const
|
|
14
|
+
const LEGACY_COHERE_RERANK_MODEL_IDS = new Set(['cohere/rerank-4-fast', 'openrouter/cohere/rerank-4-fast'])
|
|
14
15
|
|
|
15
16
|
const RerankRequestBodySchema = Schema.Struct({
|
|
16
17
|
model: Schema.String,
|
|
@@ -58,6 +59,8 @@ function normalizeRerankModelId(modelId: string): string {
|
|
|
58
59
|
throw new ConfigurationError({ message: 'Rerank model id is required.', key: 'rerankModelId' })
|
|
59
60
|
}
|
|
60
61
|
|
|
62
|
+
if (LEGACY_COHERE_RERANK_MODEL_IDS.has(normalized)) return AI_GATEWAY_FAST_RERANK_MODEL_ID
|
|
63
|
+
|
|
61
64
|
return normalized.startsWith(OPENROUTER_MODEL_PREFIX) ? normalized.slice(OPENROUTER_MODEL_PREFIX.length) : normalized
|
|
62
65
|
}
|
|
63
66
|
|
|
@@ -137,7 +140,7 @@ export function makeRerankService(config: ResolvedLotaRuntimeConfig) {
|
|
|
137
140
|
const configured = readConfiguredRerankModelId()
|
|
138
141
|
if (configured) return normalizeRerankModelId(configured)
|
|
139
142
|
|
|
140
|
-
return
|
|
143
|
+
return AI_GATEWAY_FAST_RERANK_MODEL_ID
|
|
141
144
|
}
|
|
142
145
|
|
|
143
146
|
return {
|