@lota-sdk/core 0.4.10 → 0.4.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -3
- package/src/ai-gateway/ai-gateway.ts +214 -98
- package/src/ai-gateway/index.ts +16 -1
- package/src/config/agent-defaults.ts +4 -120
- package/src/config/logger.ts +18 -34
- package/src/config/model-constants.ts +1 -0
- package/src/config/thread-defaults.ts +1 -18
- package/src/create-runtime.ts +90 -28
- package/src/db/base.service.ts +30 -38
- package/src/db/service.ts +489 -545
- package/src/effect/index.ts +0 -2
- package/src/effect/layers.ts +6 -13
- package/src/embeddings/provider.ts +2 -7
- package/src/index.ts +4 -5
- package/src/queues/autonomous-job.queue.ts +159 -113
- package/src/queues/context-compaction.queue.ts +39 -25
- package/src/queues/delayed-node-promotion.queue.ts +56 -29
- package/src/queues/document-processor.queue.ts +5 -3
- package/src/queues/index.ts +1 -0
- package/src/queues/memory-consolidation.queue.ts +79 -53
- package/src/queues/organization-learning.queue.ts +63 -39
- package/src/queues/plan-agent-heartbeat.queue.ts +104 -79
- package/src/queues/plan-scheduler.queue.ts +100 -84
- package/src/queues/post-chat-memory.queue.ts +55 -33
- package/src/queues/queue-factory.ts +40 -41
- package/src/queues/queues.service.ts +61 -0
- package/src/queues/title-generation.queue.ts +42 -31
- package/src/redis/org-memory-lock.ts +24 -9
- package/src/redis/redis-lease-lock.ts +8 -1
- package/src/runtime/agent-identity-overrides.ts +7 -3
- package/src/runtime/agent-runtime-policy.ts +9 -4
- package/src/runtime/agent-stream-helpers.ts +9 -4
- package/src/runtime/context-compaction/context-compaction-runtime.ts +28 -32
- package/src/runtime/context-compaction/context-compaction.ts +9 -7
- package/src/runtime/domain-layer.ts +15 -4
- package/src/runtime/execution-plan-visibility.ts +5 -2
- package/src/runtime/graph-designer.ts +0 -22
- package/src/runtime/index.ts +2 -0
- package/src/runtime/indexed-repositories-policy.ts +2 -6
- package/src/runtime/live-turn-trace.ts +344 -0
- package/src/runtime/plugin-resolution.ts +29 -12
- package/src/runtime/post-turn-side-effects.ts +139 -141
- package/src/runtime/runtime-config.ts +0 -6
- package/src/runtime/runtime-extensions.ts +0 -54
- package/src/runtime/runtime-lifecycle.ts +4 -4
- package/src/runtime/runtime-services.ts +125 -53
- package/src/runtime/runtime-worker-registry.ts +113 -30
- package/src/runtime/social-chat/social-chat-agent-runner.ts +6 -3
- package/src/runtime/social-chat/social-chat-history.ts +3 -1
- package/src/runtime/social-chat/social-chat.ts +35 -20
- package/src/runtime/team-consultation/team-consultation-orchestrator.ts +6 -5
- package/src/runtime/team-consultation/team-consultation-prompts.ts +11 -6
- package/src/runtime/thread-chat-helpers.ts +18 -9
- package/src/runtime/thread-turn-context.ts +7 -47
- package/src/runtime/turn-lifecycle.ts +6 -14
- package/src/services/agent-activity.service.ts +168 -175
- package/src/services/agent-executor.service.ts +35 -16
- package/src/services/attachment.service.ts +4 -70
- package/src/services/autonomous-job.service.ts +53 -61
- package/src/services/context-compaction.service.ts +7 -9
- package/src/services/execution-plan/execution-plan-graph.ts +106 -115
- package/src/services/execution-plan/execution-plan-schedule.ts +1 -15
- package/src/services/execution-plan/execution-plan.service.ts +67 -50
- package/src/services/global-orchestrator.service.ts +18 -7
- package/src/services/graph-full-routing.ts +7 -6
- package/src/services/memory/memory-conversation.ts +10 -5
- package/src/services/memory/memory.service.ts +11 -8
- package/src/services/ownership-dispatcher.service.ts +16 -5
- package/src/services/plan/plan-agent-heartbeat.service.ts +29 -15
- package/src/services/plan/plan-agent-query.service.ts +12 -8
- package/src/services/plan/plan-completion-side-effects.ts +93 -101
- package/src/services/plan/plan-cycle.service.ts +7 -45
- package/src/services/plan/plan-deadline.service.ts +28 -17
- package/src/services/plan/plan-event-delivery.service.ts +47 -40
- package/src/services/plan/plan-executor-context.ts +2 -0
- package/src/services/plan/plan-executor-graph.ts +366 -391
- package/src/services/plan/plan-executor.service.ts +13 -91
- package/src/services/plan/plan-scheduler.service.ts +62 -49
- package/src/services/plan/plan-transaction-events.ts +1 -1
- package/src/services/recent-activity-title.service.ts +6 -2
- package/src/services/thread/thread-bootstrap.ts +11 -9
- package/src/services/thread/thread-message.service.ts +6 -5
- package/src/services/thread/thread-turn-execution.ts +86 -82
- package/src/services/thread/thread-turn-preparation.service.ts +92 -45
- package/src/services/thread/thread-turn-streaming.ts +60 -28
- package/src/services/thread/thread-turn.ts +212 -46
- package/src/services/thread/thread.service.ts +21 -6
- package/src/system-agents/recent-activity-title-refiner.agent.ts +8 -5
- package/src/system-agents/thread-router.agent.ts +23 -20
- package/src/tools/execution-plan.tool.ts +8 -3
- package/src/tools/fetch-webpage.tool.ts +10 -9
- package/src/tools/firecrawl-client.ts +0 -15
- package/src/tools/remember-memory.tool.ts +3 -6
- package/src/tools/research-topic.tool.ts +12 -3
- package/src/tools/search-web.tool.ts +10 -9
- package/src/tools/search.tool.ts +4 -5
- package/src/tools/team-think.tool.ts +139 -121
- package/src/workers/bootstrap.ts +9 -10
- package/src/workers/memory-consolidation.worker.ts +4 -1
- package/src/workers/organization-learning.worker.ts +15 -2
- package/src/workers/regular-chat-memory-digest.helpers.ts +3 -4
- package/src/workers/regular-chat-memory-digest.runner.ts +21 -14
- package/src/workers/skill-extraction.runner.ts +13 -15
- package/src/workers/worker-utils.ts +6 -18
- package/src/effect/awaitable-effect.ts +0 -96
- package/src/effect/runtime-ref.ts +0 -25
- package/src/effect/runtime.ts +0 -46
- package/src/redis/runtime-connection.ts +0 -20
- package/src/runtime/runtime-accessors.ts +0 -92
- package/src/runtime/runtime-token.ts +0 -47
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lota-sdk/core",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.12",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"main": "./src/index.ts",
|
|
6
6
|
"types": "./src/index.ts",
|
|
@@ -31,10 +31,10 @@
|
|
|
31
31
|
"@ai-sdk/openai": "^3.0.53",
|
|
32
32
|
"@chat-adapter/slack": "^4.26.0",
|
|
33
33
|
"@chat-adapter/state-ioredis": "^4.26.0",
|
|
34
|
-
"@lota-sdk/shared": "0.4.
|
|
34
|
+
"@lota-sdk/shared": "0.4.12",
|
|
35
35
|
"@mendable/firecrawl-js": "^4.18.3",
|
|
36
36
|
"@surrealdb/node": "^3.0.3",
|
|
37
|
-
"ai": "^6.0.
|
|
37
|
+
"ai": "^6.0.168",
|
|
38
38
|
"bullmq": "^5.74.1",
|
|
39
39
|
"chat": "^4.26.0",
|
|
40
40
|
"effect": "^4.0.0-beta.50",
|
|
@@ -6,7 +6,6 @@ import { Cause, Clock, Context, Duration, Effect, ExecutionPlan, Fiber, Layer, S
|
|
|
6
6
|
|
|
7
7
|
import { DEFAULT_AI_GATEWAY_URL } from '../config/constants'
|
|
8
8
|
import { AiGenerationError, ConfigurationError } from '../effect/errors'
|
|
9
|
-
import { resolveLotaService } from '../effect/runtime'
|
|
10
9
|
import { RuntimeConfigServiceTag } from '../effect/services'
|
|
11
10
|
import { getDirectOpenRouterProvider, normalizeDirectOpenRouterModelId } from '../openrouter/direct-provider'
|
|
12
11
|
import { isRecord, readString } from '../utils/string'
|
|
@@ -24,6 +23,8 @@ type AiGatewayGeneratedContent = AiGatewayGenerateResult['content'][number]
|
|
|
24
23
|
type AiGatewayStreamPart = AiGatewayStreamResult['stream'] extends ReadableStream<infer T> ? T : never
|
|
25
24
|
type AiGatewayProviderOptions = NonNullable<AiGatewayCallOptions['providerOptions']>
|
|
26
25
|
type AiGatewayAttemptResult<A> = { source: string; result: A }
|
|
26
|
+
// eslint-disable-next-line @typescript-eslint/no-redundant-type-constituents
|
|
27
|
+
type AiGatewayRunFork = <A, E>(effect: Effect.Effect<A, E, never>) => Fiber.Fiber<A, E | unknown>
|
|
27
28
|
|
|
28
29
|
class AiGatewayGenerateAttempt extends Context.Service<
|
|
29
30
|
AiGatewayGenerateAttempt,
|
|
@@ -271,12 +272,13 @@ function withAiGatewayResilience<A>(source: string, effect: Effect.Effect<A, AiG
|
|
|
271
272
|
function withAiGatewayStreamIdleTimeout(
|
|
272
273
|
stream: ReadableStream<AiGatewayStreamPart>,
|
|
273
274
|
source: string,
|
|
275
|
+
runFork: AiGatewayRunFork,
|
|
274
276
|
onFinalize?: () => void,
|
|
275
277
|
): ReadableStream<AiGatewayStreamPart> {
|
|
276
278
|
let closed = false
|
|
277
279
|
let reader: ReadableStreamDefaultReader<AiGatewayStreamPart> | null = null
|
|
278
|
-
let idleTimeoutFiber:
|
|
279
|
-
let bodyPumpFiber:
|
|
280
|
+
let idleTimeoutFiber: Fiber.Fiber<unknown, unknown> | null = null
|
|
281
|
+
let bodyPumpFiber: Fiber.Fiber<unknown, unknown> | null = null
|
|
280
282
|
let finalized = false
|
|
281
283
|
|
|
282
284
|
const finalize = () => {
|
|
@@ -285,9 +287,9 @@ function withAiGatewayStreamIdleTimeout(
|
|
|
285
287
|
onFinalize?.()
|
|
286
288
|
}
|
|
287
289
|
|
|
288
|
-
const interruptFiber = (fiber:
|
|
290
|
+
const interruptFiber = (fiber: Fiber.Fiber<unknown, unknown> | null) => {
|
|
289
291
|
if (!fiber) return
|
|
290
|
-
void
|
|
292
|
+
void runFork(Fiber.interrupt(fiber))
|
|
291
293
|
}
|
|
292
294
|
|
|
293
295
|
const stopIdleTimeout = () => {
|
|
@@ -351,7 +353,7 @@ function withAiGatewayStreamIdleTimeout(
|
|
|
351
353
|
|
|
352
354
|
const resetIdleTimeout = (controller: ReadableStreamDefaultController<AiGatewayStreamPart>) => {
|
|
353
355
|
stopIdleTimeout()
|
|
354
|
-
idleTimeoutFiber =
|
|
356
|
+
idleTimeoutFiber = runFork(
|
|
355
357
|
Effect.sleep(Duration.millis(AI_GATEWAY_STREAM_IDLE_TIMEOUT_MS)).pipe(
|
|
356
358
|
Effect.flatMap(() =>
|
|
357
359
|
Effect.gen(function* () {
|
|
@@ -417,7 +419,7 @@ function withAiGatewayStreamIdleTimeout(
|
|
|
417
419
|
start(controller) {
|
|
418
420
|
const streamReader = stream.getReader()
|
|
419
421
|
reader = streamReader
|
|
420
|
-
bodyPumpFiber =
|
|
422
|
+
bodyPumpFiber = runFork(pumpStreamEffect(streamReader, controller))
|
|
421
423
|
},
|
|
422
424
|
cancel(reason) {
|
|
423
425
|
closed = true
|
|
@@ -493,40 +495,22 @@ export const AiGatewayLive = Layer.effect(
|
|
|
493
495
|
|
|
494
496
|
type AiGatewayRuntimeConfig = Context.Service.Shape<typeof RuntimeConfigServiceTag>
|
|
495
497
|
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
})
|
|
503
|
-
currentAiGateway = params.aiGateway
|
|
504
|
-
currentAiGatewayRuntimeConfig = params.runtimeConfig
|
|
505
|
-
}
|
|
506
|
-
|
|
507
|
-
export function clearAiGatewayRuntimeAccessors(): void {
|
|
508
|
-
currentAiGateway = null
|
|
509
|
-
currentAiGatewayRuntimeConfig = null
|
|
510
|
-
}
|
|
511
|
-
|
|
512
|
-
function getAiGateway(): AiGatewayTag['Service'] {
|
|
513
|
-
return currentAiGateway ?? resolveLotaService(AiGatewayTag)
|
|
514
|
-
}
|
|
515
|
-
|
|
516
|
-
function getAiGatewayRuntimeConfig(): AiGatewayRuntimeConfig {
|
|
517
|
-
return currentAiGatewayRuntimeConfig ?? resolveLotaService(RuntimeConfigServiceTag)
|
|
518
|
-
}
|
|
519
|
-
|
|
520
|
-
function withAiGatewayConcurrency<A>(effect: Effect.Effect<A, AiGenerationError>): Effect.Effect<A, AiGenerationError> {
|
|
521
|
-
return getAiGateway().semaphore.withPermit(effect)
|
|
498
|
+
function withAiGatewayConcurrency<A>(
|
|
499
|
+
effect: Effect.Effect<A, AiGenerationError>,
|
|
500
|
+
): Effect.Effect<A, AiGenerationError, AiGatewayTag> {
|
|
501
|
+
return Effect.gen(function* () {
|
|
502
|
+
const gateway = yield* AiGatewayTag
|
|
503
|
+
return yield* gateway.semaphore.withPermit(effect)
|
|
504
|
+
})
|
|
522
505
|
}
|
|
523
506
|
|
|
524
507
|
function withAiGatewayStreamConcurrency(
|
|
525
508
|
effect: Effect.Effect<AiGatewayAttemptResult<AiGatewayStreamResult>, AiGenerationError>,
|
|
526
|
-
|
|
509
|
+
runFork: AiGatewayRunFork,
|
|
510
|
+
): Effect.Effect<AiGatewayAttemptResult<AiGatewayStreamResult>, AiGenerationError, AiGatewayTag> {
|
|
527
511
|
return Effect.uninterruptibleMask((restore) =>
|
|
528
512
|
Effect.gen(function* () {
|
|
529
|
-
const { semaphore } =
|
|
513
|
+
const { semaphore } = yield* AiGatewayTag
|
|
530
514
|
const currentContext = yield* Effect.context<never>()
|
|
531
515
|
yield* semaphore.take(1)
|
|
532
516
|
|
|
@@ -551,7 +535,7 @@ function withAiGatewayStreamConcurrency(
|
|
|
551
535
|
...attempt,
|
|
552
536
|
result: {
|
|
553
537
|
...attempt.result,
|
|
554
|
-
stream: withAiGatewayStreamIdleTimeout(attempt.result.stream, attempt.source, release),
|
|
538
|
+
stream: withAiGatewayStreamIdleTimeout(attempt.result.stream, attempt.source, runFork, release),
|
|
555
539
|
},
|
|
556
540
|
}
|
|
557
541
|
}),
|
|
@@ -611,6 +595,52 @@ export function extractAiGatewayChatReasoningDeltaText(rawChunk: unknown): strin
|
|
|
611
595
|
return null
|
|
612
596
|
}
|
|
613
597
|
|
|
598
|
+
function findAiGatewayChatReasoningOverlap(previousReasoningText: string, nextReasoningText: string): number {
|
|
599
|
+
const maxOverlap = Math.min(previousReasoningText.length, nextReasoningText.length)
|
|
600
|
+
|
|
601
|
+
for (let overlapLength = maxOverlap; overlapLength > 0; overlapLength -= 1) {
|
|
602
|
+
if (previousReasoningText.slice(-overlapLength) === nextReasoningText.slice(0, overlapLength)) {
|
|
603
|
+
return overlapLength
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
return 0
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
function deriveAiGatewayChatReasoningDeltaText(params: { previousReasoningText: string; rawChunk: unknown }): {
|
|
611
|
+
delta: string | null
|
|
612
|
+
nextReasoningText: string
|
|
613
|
+
} {
|
|
614
|
+
const extractedText = extractAiGatewayChatReasoningDeltaText(params.rawChunk)
|
|
615
|
+
if (!extractedText) {
|
|
616
|
+
return { delta: null, nextReasoningText: params.previousReasoningText }
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
if (params.previousReasoningText.length === 0) {
|
|
620
|
+
return { delta: extractedText, nextReasoningText: extractedText }
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
if (extractedText === params.previousReasoningText) {
|
|
624
|
+
return { delta: null, nextReasoningText: params.previousReasoningText }
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
if (extractedText.startsWith(params.previousReasoningText)) {
|
|
628
|
+
const delta = extractedText.slice(params.previousReasoningText.length)
|
|
629
|
+
return { delta: delta.length > 0 ? delta : null, nextReasoningText: extractedText }
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
const overlapLength = findAiGatewayChatReasoningOverlap(params.previousReasoningText, extractedText)
|
|
633
|
+
if (overlapLength > 0) {
|
|
634
|
+
const delta = extractedText.slice(overlapLength)
|
|
635
|
+
return { delta: delta.length > 0 ? delta : null, nextReasoningText: `${params.previousReasoningText}${delta}` }
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
// Some providers emit true deltas, others resend the full reasoning-so-far.
|
|
639
|
+
// If the chunk is not a prefix extension, treat it as a standalone delta and
|
|
640
|
+
// append it to the accumulated reasoning text.
|
|
641
|
+
return { delta: extractedText, nextReasoningText: `${params.previousReasoningText}${extractedText}` }
|
|
642
|
+
}
|
|
643
|
+
|
|
614
644
|
export function injectAiGatewayChatReasoningContent(
|
|
615
645
|
content: readonly AiGatewayGeneratedContent[],
|
|
616
646
|
response?: AiGatewayChatResponse,
|
|
@@ -638,18 +668,20 @@ function isOpenRouterModel(modelId: string): boolean {
|
|
|
638
668
|
return modelId.trim().toLowerCase().startsWith('openrouter/')
|
|
639
669
|
}
|
|
640
670
|
|
|
641
|
-
function hasDirectOpenRouterFallback(modelId: string): boolean {
|
|
642
|
-
const config = getAiGatewayRuntimeConfig()
|
|
671
|
+
function hasDirectOpenRouterFallback(config: AiGatewayRuntimeConfig, modelId: string): boolean {
|
|
643
672
|
return isOpenRouterModel(modelId) && Boolean(config.aiGateway.openRouterApiKey?.trim())
|
|
644
673
|
}
|
|
645
674
|
|
|
646
|
-
function getDirectOpenRouterChatModel(modelId: string): AiGatewayLanguageModel {
|
|
647
|
-
const config = getAiGatewayRuntimeConfig()
|
|
675
|
+
function getDirectOpenRouterChatModel(config: AiGatewayRuntimeConfig, modelId: string): AiGatewayLanguageModel {
|
|
648
676
|
return getDirectOpenRouterProvider(config.aiGateway.openRouterApiKey).chat(normalizeDirectOpenRouterModelId(modelId))
|
|
649
677
|
}
|
|
650
678
|
|
|
651
|
-
function shouldFallbackToDirectOpenRouter(
|
|
652
|
-
|
|
679
|
+
function shouldFallbackToDirectOpenRouter(
|
|
680
|
+
config: AiGatewayRuntimeConfig,
|
|
681
|
+
modelId: string,
|
|
682
|
+
error: AiGenerationError,
|
|
683
|
+
): boolean {
|
|
684
|
+
return hasDirectOpenRouterFallback(config, modelId) && isRetryableAiGatewayError(error)
|
|
653
685
|
}
|
|
654
686
|
|
|
655
687
|
function attemptAiGatewayGenerate(
|
|
@@ -681,22 +713,25 @@ function attemptAiGatewayStream(
|
|
|
681
713
|
}
|
|
682
714
|
|
|
683
715
|
function attemptDirectOpenRouterGenerate(
|
|
716
|
+
config: AiGatewayRuntimeConfig,
|
|
684
717
|
modelId: string,
|
|
685
718
|
params: AiGatewayCallOptions,
|
|
686
719
|
): Effect.Effect<AiGatewayAttemptResult<AiGatewayGenerateResult>, AiGenerationError> {
|
|
687
|
-
const model = getDirectOpenRouterChatModel(modelId)
|
|
720
|
+
const model = getDirectOpenRouterChatModel(config, modelId)
|
|
688
721
|
return attemptAiGatewayGenerate('openrouter.generate', () => model.doGenerate(params))
|
|
689
722
|
}
|
|
690
723
|
|
|
691
724
|
function attemptDirectOpenRouterStream(
|
|
725
|
+
config: AiGatewayRuntimeConfig,
|
|
692
726
|
modelId: string,
|
|
693
727
|
params: AiGatewayCallOptions,
|
|
694
728
|
): Effect.Effect<AiGatewayAttemptResult<AiGatewayStreamResult>, AiGenerationError> {
|
|
695
|
-
const model = getDirectOpenRouterChatModel(modelId)
|
|
729
|
+
const model = getDirectOpenRouterChatModel(config, modelId)
|
|
696
730
|
return attemptAiGatewayStream('openrouter.stream', () => model.doStream(params))
|
|
697
731
|
}
|
|
698
732
|
|
|
699
733
|
function executeGenerateAttemptPlan(
|
|
734
|
+
config: AiGatewayRuntimeConfig,
|
|
700
735
|
modelId: string,
|
|
701
736
|
params: AiGatewayCallOptions,
|
|
702
737
|
doGenerate: () => PromiseLike<AiGatewayGenerateResult>,
|
|
@@ -709,7 +744,7 @@ function executeGenerateAttemptPlan(
|
|
|
709
744
|
return yield* attempt.execute
|
|
710
745
|
})
|
|
711
746
|
|
|
712
|
-
if (!hasDirectOpenRouterFallback(modelId)) {
|
|
747
|
+
if (!hasDirectOpenRouterFallback(config, modelId)) {
|
|
713
748
|
return effect.pipe(
|
|
714
749
|
Effect.provide(primary),
|
|
715
750
|
Effect.withSpan('AiGateway.executeGeneratePlan'),
|
|
@@ -723,9 +758,9 @@ function executeGenerateAttemptPlan(
|
|
|
723
758
|
{ provide: primary },
|
|
724
759
|
{
|
|
725
760
|
provide: Layer.succeed(AiGatewayGenerateAttempt, {
|
|
726
|
-
execute: attemptDirectOpenRouterGenerate(modelId, params),
|
|
761
|
+
execute: attemptDirectOpenRouterGenerate(config, modelId, params),
|
|
727
762
|
}),
|
|
728
|
-
while: (error: AiGenerationError) => shouldFallbackToDirectOpenRouter(modelId, error),
|
|
763
|
+
while: (error: AiGenerationError) => shouldFallbackToDirectOpenRouter(config, modelId, error),
|
|
729
764
|
},
|
|
730
765
|
),
|
|
731
766
|
),
|
|
@@ -735,6 +770,7 @@ function executeGenerateAttemptPlan(
|
|
|
735
770
|
}
|
|
736
771
|
|
|
737
772
|
function executeStreamAttemptPlan(
|
|
773
|
+
config: AiGatewayRuntimeConfig,
|
|
738
774
|
modelId: string,
|
|
739
775
|
params: AiGatewayCallOptions,
|
|
740
776
|
doStream: () => PromiseLike<AiGatewayStreamResult>,
|
|
@@ -747,7 +783,7 @@ function executeStreamAttemptPlan(
|
|
|
747
783
|
return yield* attempt.execute
|
|
748
784
|
})
|
|
749
785
|
|
|
750
|
-
if (!hasDirectOpenRouterFallback(modelId)) {
|
|
786
|
+
if (!hasDirectOpenRouterFallback(config, modelId)) {
|
|
751
787
|
return effect.pipe(
|
|
752
788
|
Effect.provide(primary),
|
|
753
789
|
Effect.withSpan('AiGateway.executeStreamPlan'),
|
|
@@ -760,8 +796,10 @@ function executeStreamAttemptPlan(
|
|
|
760
796
|
ExecutionPlan.make(
|
|
761
797
|
{ provide: primary },
|
|
762
798
|
{
|
|
763
|
-
provide: Layer.succeed(AiGatewayStreamAttempt, {
|
|
764
|
-
|
|
799
|
+
provide: Layer.succeed(AiGatewayStreamAttempt, {
|
|
800
|
+
execute: attemptDirectOpenRouterStream(config, modelId, params),
|
|
801
|
+
}),
|
|
802
|
+
while: (error: AiGenerationError) => shouldFallbackToDirectOpenRouter(config, modelId, error),
|
|
765
803
|
},
|
|
766
804
|
),
|
|
767
805
|
),
|
|
@@ -794,6 +832,8 @@ export function injectAiGatewayChatReasoningStream(
|
|
|
794
832
|
const reasoningId = 'ai-gateway-reasoning-0'
|
|
795
833
|
let reasoningOpen = false
|
|
796
834
|
let reasoningClosed = false
|
|
835
|
+
let reasoningText = ''
|
|
836
|
+
let nativeReasoningSeen = false
|
|
797
837
|
|
|
798
838
|
return stream.pipeThrough(
|
|
799
839
|
new TransformStream<AiGatewayStreamPart, AiGatewayStreamPart>({
|
|
@@ -805,11 +845,25 @@ export function injectAiGatewayChatReasoningStream(
|
|
|
805
845
|
reasoningClosed = true
|
|
806
846
|
}
|
|
807
847
|
|
|
848
|
+
if (chunk.type === 'reasoning-start' || chunk.type === 'reasoning-delta' || chunk.type === 'reasoning-end') {
|
|
849
|
+
nativeReasoningSeen = true
|
|
850
|
+
closeReasoning()
|
|
851
|
+
controller.enqueue(chunk)
|
|
852
|
+
return
|
|
853
|
+
}
|
|
854
|
+
|
|
808
855
|
if (chunk.type === 'raw') {
|
|
809
|
-
const
|
|
856
|
+
const reasoningDeltaState =
|
|
857
|
+
reasoningClosed || nativeReasoningSeen
|
|
858
|
+
? null
|
|
859
|
+
: deriveAiGatewayChatReasoningDeltaText({
|
|
860
|
+
previousReasoningText: reasoningText,
|
|
861
|
+
rawChunk: chunk.rawValue,
|
|
862
|
+
})
|
|
810
863
|
controller.enqueue(chunk)
|
|
811
864
|
|
|
812
|
-
if (
|
|
865
|
+
if (reasoningDeltaState?.delta) {
|
|
866
|
+
reasoningText = reasoningDeltaState.nextReasoningText
|
|
813
867
|
if (!reasoningOpen) {
|
|
814
868
|
controller.enqueue({ type: 'reasoning-start', id: reasoningId } satisfies AiGatewayStreamPart)
|
|
815
869
|
reasoningOpen = true
|
|
@@ -818,7 +872,7 @@ export function injectAiGatewayChatReasoningStream(
|
|
|
818
872
|
controller.enqueue({
|
|
819
873
|
type: 'reasoning-delta',
|
|
820
874
|
id: reasoningId,
|
|
821
|
-
delta:
|
|
875
|
+
delta: reasoningDeltaState.delta,
|
|
822
876
|
} satisfies AiGatewayStreamPart)
|
|
823
877
|
}
|
|
824
878
|
return
|
|
@@ -849,7 +903,56 @@ function addAiGatewayReasoningRawChunks(
|
|
|
849
903
|
return { ...params, includeRawChunks: true }
|
|
850
904
|
}
|
|
851
905
|
|
|
852
|
-
function
|
|
906
|
+
function resolveProviderModel(
|
|
907
|
+
provider: ReturnType<typeof createOpenAI>,
|
|
908
|
+
modelId: string,
|
|
909
|
+
providerId: string,
|
|
910
|
+
): AiGatewayLanguageModel {
|
|
911
|
+
return providerId === OPENAI_CHAT_PROVIDER_ID ? provider.chat(modelId) : provider(modelId)
|
|
912
|
+
}
|
|
913
|
+
|
|
914
|
+
// Module-level Promise slot that `createLotaRuntime` populates during boot.
|
|
915
|
+
// This is a legitimate per-process singleton (mirrors the worker bootstrap
|
|
916
|
+
// pattern in `workers/bootstrap.ts`): the AI gateway middleware is dispatched
|
|
917
|
+
// by AI SDK callers that live outside Effect context, so the middleware needs
|
|
918
|
+
// a way to run gateway Effects without capturing a `ManagedRuntime` through
|
|
919
|
+
// every `aiGatewayModel(modelId)` call site.
|
|
920
|
+
//
|
|
921
|
+
// Only `createLotaRuntime` writes to the slot; resetting on disconnect is a
|
|
922
|
+
// Phase 3b concern — for now it stays alive for the process lifetime.
|
|
923
|
+
let aiGatewayRuntimeReady: Promise<{
|
|
924
|
+
gateway: Context.Service.Shape<typeof AiGatewayTag>
|
|
925
|
+
runtimeConfig: Context.Service.Shape<typeof RuntimeConfigServiceTag>
|
|
926
|
+
runPromise: <A, E>(effect: Effect.Effect<A, E, never>) => Promise<A>
|
|
927
|
+
runFork: AiGatewayRunFork
|
|
928
|
+
}> | null = null
|
|
929
|
+
|
|
930
|
+
export function bindAiGatewayRuntime(params: {
|
|
931
|
+
gateway: Context.Service.Shape<typeof AiGatewayTag>
|
|
932
|
+
runtimeConfig: Context.Service.Shape<typeof RuntimeConfigServiceTag>
|
|
933
|
+
runPromise: <A, E>(effect: Effect.Effect<A, E, never>) => Promise<A>
|
|
934
|
+
runFork: AiGatewayRunFork
|
|
935
|
+
}): void {
|
|
936
|
+
aiGatewayRuntimeReady = Promise.resolve(params)
|
|
937
|
+
}
|
|
938
|
+
|
|
939
|
+
export function clearAiGatewayRuntime(): void {
|
|
940
|
+
aiGatewayRuntimeReady = null
|
|
941
|
+
}
|
|
942
|
+
|
|
943
|
+
async function getAiGatewayRuntime(): Promise<{
|
|
944
|
+
gateway: Context.Service.Shape<typeof AiGatewayTag>
|
|
945
|
+
runtimeConfig: Context.Service.Shape<typeof RuntimeConfigServiceTag>
|
|
946
|
+
runPromise: <A, E>(effect: Effect.Effect<A, E, never>) => Promise<A>
|
|
947
|
+
runFork: AiGatewayRunFork
|
|
948
|
+
}> {
|
|
949
|
+
if (!aiGatewayRuntimeReady) {
|
|
950
|
+
throw new Error('AI gateway runtime has not been initialized. Call createLotaRuntime() first.')
|
|
951
|
+
}
|
|
952
|
+
return aiGatewayRuntimeReady
|
|
953
|
+
}
|
|
954
|
+
|
|
955
|
+
function createAiGatewayLanguageModelMiddleware(modelId: string, providerId: string): LanguageModelMiddleware {
|
|
853
956
|
return {
|
|
854
957
|
specificationVersion: 'v3',
|
|
855
958
|
transformParams: ({ params, type }) =>
|
|
@@ -858,10 +961,12 @@ function createAiGatewayLanguageModelMiddleware(modelId: string): LanguageModelM
|
|
|
858
961
|
addAiGatewayReasoningRawChunks(normalizeAiGatewayChatProviderOptions(params, modelId), type),
|
|
859
962
|
),
|
|
860
963
|
),
|
|
861
|
-
wrapGenerate: ({
|
|
862
|
-
|
|
964
|
+
wrapGenerate: async ({ params }) => {
|
|
965
|
+
const { gateway, runtimeConfig, runPromise } = await getAiGatewayRuntime()
|
|
966
|
+
const model = resolveProviderModel(gateway.provider, modelId, providerId)
|
|
967
|
+
return runPromise(
|
|
863
968
|
withAiGatewayConcurrency(
|
|
864
|
-
executeGenerateAttemptPlan(modelId, params, doGenerate).pipe(
|
|
969
|
+
executeGenerateAttemptPlan(runtimeConfig, modelId, params, () => model.doGenerate(params)).pipe(
|
|
865
970
|
Effect.map(({ result }) => ({
|
|
866
971
|
...result,
|
|
867
972
|
content: injectAiGatewayChatReasoningContent(
|
|
@@ -870,12 +975,15 @@ function createAiGatewayLanguageModelMiddleware(modelId: string): LanguageModelM
|
|
|
870
975
|
),
|
|
871
976
|
})),
|
|
872
977
|
),
|
|
873
|
-
),
|
|
874
|
-
)
|
|
875
|
-
|
|
876
|
-
|
|
978
|
+
).pipe(Effect.provideService(AiGatewayTag, gateway)),
|
|
979
|
+
)
|
|
980
|
+
},
|
|
981
|
+
wrapStream: async ({ params }) => {
|
|
982
|
+
const { gateway, runtimeConfig, runPromise, runFork } = await getAiGatewayRuntime()
|
|
983
|
+
const model = resolveProviderModel(gateway.provider, modelId, providerId)
|
|
984
|
+
return runPromise(
|
|
877
985
|
withAiGatewayStreamConcurrency(
|
|
878
|
-
executeStreamAttemptPlan(modelId, params, doStream).pipe(
|
|
986
|
+
executeStreamAttemptPlan(runtimeConfig, modelId, params, () => model.doStream(params)).pipe(
|
|
879
987
|
Effect.map((attempt) => ({
|
|
880
988
|
...attempt,
|
|
881
989
|
result: isReasoningEnabled(params)
|
|
@@ -883,8 +991,12 @@ function createAiGatewayLanguageModelMiddleware(modelId: string): LanguageModelM
|
|
|
883
991
|
: attempt.result,
|
|
884
992
|
})),
|
|
885
993
|
),
|
|
886
|
-
|
|
887
|
-
|
|
994
|
+
runFork,
|
|
995
|
+
)
|
|
996
|
+
.pipe(Effect.map(({ result }) => result))
|
|
997
|
+
.pipe(Effect.provideService(AiGatewayTag, gateway)),
|
|
998
|
+
)
|
|
999
|
+
},
|
|
888
1000
|
}
|
|
889
1001
|
}
|
|
890
1002
|
|
|
@@ -921,36 +1033,42 @@ function withAiGatewayDevTools<TModel extends AiGatewayLanguageModel>(model: TMo
|
|
|
921
1033
|
return wrapLanguageModel({ model, middleware: devToolsMiddleware() }) as TModel
|
|
922
1034
|
}
|
|
923
1035
|
|
|
924
|
-
function
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
}
|
|
1036
|
+
function createAiGatewayLanguageModelPlaceholder(modelId: string, providerId: string): AiGatewayLanguageModel {
|
|
1037
|
+
const unreachable = (method: string) =>
|
|
1038
|
+
Promise.reject(
|
|
1039
|
+
new Error(
|
|
1040
|
+
`[ai-gateway] AiGateway language model ${modelId}.${method} was invoked without the gateway middleware; ` +
|
|
1041
|
+
'this call path should be fully handled by createAiGatewayLanguageModelMiddleware.',
|
|
1042
|
+
),
|
|
1043
|
+
)
|
|
1044
|
+
|
|
929
1045
|
return {
|
|
930
1046
|
specificationVersion: 'v3',
|
|
931
|
-
provider:
|
|
932
|
-
modelId
|
|
1047
|
+
provider: providerId,
|
|
1048
|
+
modelId,
|
|
933
1049
|
supportedUrls: {},
|
|
934
|
-
doGenerate: (
|
|
935
|
-
doStream: (
|
|
1050
|
+
doGenerate: () => unreachable('doGenerate'),
|
|
1051
|
+
doStream: () => unreachable('doStream'),
|
|
936
1052
|
}
|
|
937
1053
|
}
|
|
938
1054
|
|
|
939
|
-
function
|
|
1055
|
+
function createAiGatewayEmbeddingModelPlaceholder(modelId: string): AiGatewayEmbeddingModel {
|
|
940
1056
|
return {
|
|
941
1057
|
specificationVersion: 'v3',
|
|
942
1058
|
provider: OPENAI_EMBEDDING_PROVIDER_ID,
|
|
943
1059
|
modelId,
|
|
944
1060
|
maxEmbeddingsPerCall: OPENAI_EMBEDDING_MAX_PER_CALL,
|
|
945
1061
|
supportsParallelCalls: true,
|
|
946
|
-
doEmbed: (
|
|
1062
|
+
doEmbed: () =>
|
|
1063
|
+
Promise.reject(
|
|
1064
|
+
new Error(
|
|
1065
|
+
`[ai-gateway] AiGateway embedding model ${modelId}.doEmbed was invoked without the gateway middleware; ` +
|
|
1066
|
+
'this call path should be fully handled by aiGatewayEmbeddingModel middleware.',
|
|
1067
|
+
),
|
|
1068
|
+
),
|
|
947
1069
|
}
|
|
948
1070
|
}
|
|
949
1071
|
|
|
950
|
-
export function getAiGatewayProvider() {
|
|
951
|
-
return getAiGateway().provider
|
|
952
|
-
}
|
|
953
|
-
|
|
954
1072
|
export function aiGatewayModel(modelId: string) {
|
|
955
1073
|
if (isOpenRouterModel(modelId)) {
|
|
956
1074
|
return aiGatewayChatModel(modelId)
|
|
@@ -958,12 +1076,8 @@ export function aiGatewayModel(modelId: string) {
|
|
|
958
1076
|
|
|
959
1077
|
return withAiGatewayDevTools(
|
|
960
1078
|
wrapLanguageModel({
|
|
961
|
-
model:
|
|
962
|
-
|
|
963
|
-
providerId: OPENAI_RESPONSES_PROVIDER_ID,
|
|
964
|
-
resolve: () => getAiGatewayProvider()(modelId),
|
|
965
|
-
}),
|
|
966
|
-
middleware: createAiGatewayLanguageModelMiddleware(modelId),
|
|
1079
|
+
model: createAiGatewayLanguageModelPlaceholder(modelId, OPENAI_RESPONSES_PROVIDER_ID),
|
|
1080
|
+
middleware: createAiGatewayLanguageModelMiddleware(modelId, OPENAI_RESPONSES_PROVIDER_ID),
|
|
967
1081
|
}),
|
|
968
1082
|
)
|
|
969
1083
|
}
|
|
@@ -975,30 +1089,32 @@ export function aiGatewayOpenRouterResponseHealingModel(modelId: string) {
|
|
|
975
1089
|
export function aiGatewayChatModel(modelId: string) {
|
|
976
1090
|
return withAiGatewayDevTools(
|
|
977
1091
|
wrapLanguageModel({
|
|
978
|
-
model:
|
|
979
|
-
|
|
980
|
-
providerId: OPENAI_CHAT_PROVIDER_ID,
|
|
981
|
-
resolve: () => getAiGatewayProvider().chat(modelId),
|
|
982
|
-
}),
|
|
983
|
-
middleware: createAiGatewayLanguageModelMiddleware(modelId),
|
|
1092
|
+
model: createAiGatewayLanguageModelPlaceholder(modelId, OPENAI_CHAT_PROVIDER_ID),
|
|
1093
|
+
middleware: createAiGatewayLanguageModelMiddleware(modelId, OPENAI_CHAT_PROVIDER_ID),
|
|
984
1094
|
}),
|
|
985
1095
|
)
|
|
986
1096
|
}
|
|
987
1097
|
|
|
988
1098
|
export function aiGatewayEmbeddingModel(modelId: string) {
|
|
989
1099
|
return wrapEmbeddingModel({
|
|
990
|
-
model:
|
|
1100
|
+
model: createAiGatewayEmbeddingModelPlaceholder(modelId),
|
|
991
1101
|
middleware: {
|
|
992
1102
|
specificationVersion: 'v3',
|
|
993
|
-
wrapEmbed: ({
|
|
994
|
-
|
|
1103
|
+
wrapEmbed: async ({ params }) => {
|
|
1104
|
+
const { gateway, runPromise } = await getAiGatewayRuntime()
|
|
1105
|
+
const embeddingModel = gateway.provider.embeddingModel(modelId)
|
|
1106
|
+
return runPromise(
|
|
995
1107
|
withAiGatewayConcurrency(
|
|
996
1108
|
withAiGatewayResilience(
|
|
997
1109
|
'ai-gateway.embed',
|
|
998
|
-
Effect.tryPromise({
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1110
|
+
Effect.tryPromise({
|
|
1111
|
+
try: () => embeddingModel.doEmbed(params),
|
|
1112
|
+
catch: (cause) => classifyAiGatewayError('ai-gateway.embed', cause),
|
|
1113
|
+
}),
|
|
1114
|
+
).pipe(Effect.withSpan('AiGateway.embed'), Effect.annotateSpans({ modelId })),
|
|
1115
|
+
).pipe(Effect.provideService(AiGatewayTag, gateway)),
|
|
1116
|
+
)
|
|
1117
|
+
},
|
|
1002
1118
|
},
|
|
1003
1119
|
})
|
|
1004
1120
|
}
|
package/src/ai-gateway/index.ts
CHANGED
|
@@ -1,2 +1,17 @@
|
|
|
1
|
-
export
|
|
1
|
+
export {
|
|
2
|
+
AiGatewayLive,
|
|
3
|
+
AiGatewayTag,
|
|
4
|
+
DEFAULT_AI_GATEWAY_URL,
|
|
5
|
+
aiGatewayChatModel,
|
|
6
|
+
aiGatewayEmbeddingModel,
|
|
7
|
+
aiGatewayModel,
|
|
8
|
+
aiGatewayOpenRouterResponseHealingModel,
|
|
9
|
+
bindAiGatewayRuntime,
|
|
10
|
+
extractAiGatewayChatReasoningDeltaText,
|
|
11
|
+
extractAiGatewayChatReasoningText,
|
|
12
|
+
injectAiGatewayChatReasoningContent,
|
|
13
|
+
injectAiGatewayChatReasoningStream,
|
|
14
|
+
normalizeAiGatewayChatProviderOptions,
|
|
15
|
+
normalizeAiGatewayUrl,
|
|
16
|
+
} from './ai-gateway'
|
|
2
17
|
export * from './cache-headers'
|