@lota-sdk/core 0.4.39 → 0.4.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lota-sdk/core",
3
- "version": "0.4.39",
3
+ "version": "0.4.41",
4
4
  "files": [
5
5
  "src",
6
6
  "infrastructure/schema"
@@ -32,7 +32,7 @@
32
32
  "@ai-sdk/provider": "^3.0.9",
33
33
  "@chat-adapter/slack": "^4.26.0",
34
34
  "@chat-adapter/state-ioredis": "^4.26.0",
35
- "@lota-sdk/shared": "0.4.39",
35
+ "@lota-sdk/shared": "0.4.41",
36
36
  "@mendable/firecrawl-js": "^4.20.0",
37
37
  "@surrealdb/node": "^3.0.3",
38
38
  "ai": "^6.0.170",
@@ -1,7 +1,7 @@
1
1
  import { devToolsMiddleware } from '@ai-sdk/devtools'
2
2
  import { createOpenAI } from '@ai-sdk/openai'
3
3
  import type { JSONSchema7 } from '@ai-sdk/provider'
4
- import { wrapEmbeddingModel, wrapLanguageModel } from 'ai'
4
+ import { wrapLanguageModel } from 'ai'
5
5
  import type { LanguageModelMiddleware } from 'ai'
6
6
  import { Cause, Clock, Context, Duration, Effect, Fiber, Layer, Semaphore } from 'effect'
7
7
 
@@ -16,7 +16,6 @@ type AiGatewayChatResponse = { body?: unknown }
16
16
  type AiGatewayTransformParamsOptions = Parameters<NonNullable<LanguageModelMiddleware['transformParams']>>[0]
17
17
  type WrapStreamOptions = Parameters<NonNullable<LanguageModelMiddleware['wrapStream']>>[0]
18
18
  type AiGatewayLanguageModel = Parameters<typeof wrapLanguageModel>[0]['model']
19
- type AiGatewayEmbeddingModel = Parameters<typeof wrapEmbeddingModel>[0]['model']
20
19
  type AiGatewayCallOptions = WrapStreamOptions['params']
21
20
  type AiGatewayFunctionTool = Extract<NonNullable<AiGatewayCallOptions['tools']>[number], { type: 'function' }>
22
21
  type AiGatewayGenerateResult = Awaited<ReturnType<WrapStreamOptions['doGenerate']>>
@@ -45,8 +44,6 @@ const AI_GATEWAY_MAX_RETRIES = 4
45
44
  const AI_GATEWAY_MAX_RETRY_DELAY_MS = 15_000
46
45
  const OPENAI_RESPONSES_PROVIDER_ID = 'openai.responses'
47
46
  const OPENAI_CHAT_PROVIDER_ID = 'openai.chat'
48
- const OPENAI_EMBEDDING_PROVIDER_ID = 'openai.embedding'
49
- const OPENAI_EMBEDDING_MAX_PER_CALL = 2_048
50
47
  const RETRYABLE_NETWORK_ERROR_CODES = new Set([
51
48
  'ECONNABORTED',
52
49
  'ECONNREFUSED',
@@ -1244,23 +1241,6 @@ function createAiGatewayLanguageModelPlaceholder(modelId: string, providerId: st
1244
1241
  }
1245
1242
  }
1246
1243
 
1247
- function createAiGatewayEmbeddingModelPlaceholder(modelId: string): AiGatewayEmbeddingModel {
1248
- return {
1249
- specificationVersion: 'v3',
1250
- provider: OPENAI_EMBEDDING_PROVIDER_ID,
1251
- modelId,
1252
- maxEmbeddingsPerCall: OPENAI_EMBEDDING_MAX_PER_CALL,
1253
- supportsParallelCalls: true,
1254
- doEmbed: () =>
1255
- Promise.reject(
1256
- new Error(
1257
- `[ai-gateway] AiGateway embedding model ${modelId}.doEmbed was invoked without the gateway middleware; ` +
1258
- 'this call path should be fully handled by aiGatewayEmbeddingModel middleware.',
1259
- ),
1260
- ),
1261
- }
1262
- }
1263
-
1264
1244
  export function aiGatewayModel(modelId: string, deps?: AiGatewayDeps) {
1265
1245
  if (isOpenRouterModel(modelId)) {
1266
1246
  return aiGatewayChatModel(modelId, deps)
@@ -1287,28 +1267,8 @@ export function aiGatewayChatModel(modelId: string, deps?: AiGatewayDeps) {
1287
1267
  )
1288
1268
  }
1289
1269
 
1290
- export function aiGatewayEmbeddingModel(modelId: string, deps?: AiGatewayDeps) {
1291
- return wrapEmbeddingModel({
1292
- model: createAiGatewayEmbeddingModelPlaceholder(modelId),
1293
- middleware: {
1294
- specificationVersion: 'v3',
1295
- wrapEmbed: ({ params }) => {
1296
- const resolvedDeps = resolveAiGatewayDeps(deps)
1297
- const embeddingModel = resolvedDeps.gateway.provider.embeddingModel(modelId)
1298
- return resolvedDeps.runPromise(
1299
- withAiGatewayConcurrency(
1300
- withAiGatewayResilience(
1301
- 'ai-gateway.embed',
1302
- Effect.tryPromise({
1303
- try: () => embeddingModel.doEmbed(params),
1304
- catch: (cause) => classifyAiGatewayError('ai-gateway.embed', cause),
1305
- }),
1306
- ).pipe(Effect.withSpan('AiGateway.embed'), Effect.annotateSpans({ modelId })),
1307
- ).pipe(Effect.provideService(AiGatewayTag, resolvedDeps.gateway)),
1308
- )
1309
- },
1310
- },
1311
- })
1270
+ export function aiGatewayEmbeddingModel(modelId: string, _deps?: AiGatewayDeps) {
1271
+ return openRouterEmbeddingModel(modelId)
1312
1272
  }
1313
1273
 
1314
1274
  /**
@@ -1,4 +1,5 @@
1
1
  export {
2
+ AI_GATEWAY_FAST_RERANK_MODEL_ID,
2
3
  AI_GATEWAY_REASONING_SUMMARY_LEVEL,
3
4
  OPENROUTER_GEMINI_PRO_MODEL_ID,
4
5
  OPENAI_HIGH_REASONING_PROVIDER_OPTIONS,
@@ -5,7 +5,7 @@ import { z } from 'zod'
5
5
  import type { CoreThreadProfile } from '../config/agent-defaults'
6
6
  import type { AgentFactory, AgentRuntimeConfigProvider, AgentToolBuilder } from '../config/agent-types'
7
7
  import { DEFAULT_AI_GATEWAY_URL } from '../config/constants'
8
- import { OPENROUTER_FAST_RERANK_MODEL_ID } from '../config/model-constants'
8
+ import { AI_GATEWAY_FAST_RERANK_MODEL_ID } from '../config/model-constants'
9
9
  import type { LotaThreadConfig, ThreadBootstrapWelcomeConfig } from '../config/thread-defaults'
10
10
  import type { RecordIdRef } from '../db/record-id'
11
11
  import type { NotificationService } from '../services/notification.service'
@@ -239,13 +239,13 @@ export const LotaRuntimeConfigSchema = z.object({
239
239
  searchK: z.coerce.number().int().positive().default(6),
240
240
  embeddingCacheTtlSeconds: z.coerce.number().int().positive().default(7200),
241
241
  rerankerStrategy: MemoryRerankerStrategySchema.default('rerank'),
242
- rerankerModelId: z.string().trim().min(1).default(OPENROUTER_FAST_RERANK_MODEL_ID),
242
+ rerankerModelId: z.string().trim().min(1).default(AI_GATEWAY_FAST_RERANK_MODEL_ID),
243
243
  })
244
244
  .default({
245
245
  searchK: 6,
246
246
  embeddingCacheTtlSeconds: 7200,
247
247
  rerankerStrategy: 'rerank',
248
- rerankerModelId: OPENROUTER_FAST_RERANK_MODEL_ID,
248
+ rerankerModelId: AI_GATEWAY_FAST_RERANK_MODEL_ID,
249
249
  }),
250
250
  threads: threadConfigSchema.default({}),
251
251
  agents: agentsConfigSchema,
@@ -342,7 +342,7 @@ export const lotaRuntimeEnvConfig = Config.all({
342
342
  memorySearchK: Config.number('MEMORY_SEARCH_K').pipe(Config.withDefault(6)),
343
343
  memoryRerankerStrategy: Config.string('MEMORY_RERANKER_STRATEGY').pipe(Config.withDefault('rerank')),
344
344
  memoryRerankerModelId: Config.string('MEMORY_RERANKER_MODEL_ID').pipe(
345
- Config.withDefault(OPENROUTER_FAST_RERANK_MODEL_ID),
345
+ Config.withDefault(AI_GATEWAY_FAST_RERANK_MODEL_ID),
346
346
  ),
347
347
  otlpBaseUrl: Config.string('OTLP_BASE_URL').pipe(Config.option),
348
348
  otlpServiceName: Config.string('OTLP_SERVICE_NAME').pipe(Config.withDefault('lota-sdk')),
@@ -2,7 +2,7 @@ import { Context, Effect, Layer } from 'effect'
2
2
  import * as Schema from 'effect/Schema'
3
3
  import { z } from 'zod'
4
4
 
5
- import { OPENROUTER_FAST_RERANK_MODEL_ID } from '../../config/model-constants'
5
+ import { AI_GATEWAY_FAST_RERANK_MODEL_ID } from '../../config/model-constants'
6
6
  import { ConfigurationError, ERROR_TAGS } from '../../effect/errors'
7
7
  import { RuntimeConfigServiceTag } from '../../effect/services'
8
8
  import { toValidationError } from '../../effect/zod'
@@ -11,6 +11,7 @@ import type { ResolvedLotaRuntimeConfig } from '../../runtime/runtime-config'
11
11
  const AI_GATEWAY_VIRTUAL_KEY_HEADER = 'x-bf-vk' as const
12
12
  const EXPECTED_GATEWAY_KEY_PREFIX = 'sk-bf-' as const
13
13
  const OPENROUTER_MODEL_PREFIX = 'openrouter/' as const
14
+ const LEGACY_COHERE_RERANK_MODEL_IDS = new Set(['cohere/rerank-4-fast', 'openrouter/cohere/rerank-4-fast'])
14
15
 
15
16
  const RerankRequestBodySchema = Schema.Struct({
16
17
  model: Schema.String,
@@ -58,6 +59,8 @@ function normalizeRerankModelId(modelId: string): string {
58
59
  throw new ConfigurationError({ message: 'Rerank model id is required.', key: 'rerankModelId' })
59
60
  }
60
61
 
62
+ if (LEGACY_COHERE_RERANK_MODEL_IDS.has(normalized)) return AI_GATEWAY_FAST_RERANK_MODEL_ID
63
+
61
64
  return normalized.startsWith(OPENROUTER_MODEL_PREFIX) ? normalized.slice(OPENROUTER_MODEL_PREFIX.length) : normalized
62
65
  }
63
66
 
@@ -137,7 +140,7 @@ export function makeRerankService(config: ResolvedLotaRuntimeConfig) {
137
140
  const configured = readConfiguredRerankModelId()
138
141
  if (configured) return normalizeRerankModelId(configured)
139
142
 
140
- return OPENROUTER_FAST_RERANK_MODEL_ID
143
+ return AI_GATEWAY_FAST_RERANK_MODEL_ID
141
144
  }
142
145
 
143
146
  return {