@effect-uai/core 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{AiError-CBuPHVKA.d.mts → AiError-csR8Bhxx.d.mts} +26 -4
- package/dist/{AiError-CBuPHVKA.d.mts.map → AiError-csR8Bhxx.d.mts.map} +1 -1
- package/dist/Audio-BfCTGnH3.d.mts +61 -0
- package/dist/Audio-BfCTGnH3.d.mts.map +1 -0
- package/dist/{Image-BZmKfIdq.d.mts → Image-DxyXqzAM.d.mts} +4 -4
- package/dist/{Image-BZmKfIdq.d.mts.map → Image-DxyXqzAM.d.mts.map} +1 -1
- package/dist/{Items-CB8Bo3FI.d.mts → Items-Hg5AsYxl.d.mts} +5 -5
- package/dist/{Items-CB8Bo3FI.d.mts.map → Items-Hg5AsYxl.d.mts.map} +1 -1
- package/dist/{StructuredFormat-BWq5Hd1O.d.mts → StructuredFormat-Cl41C56K.d.mts} +1 -1
- package/dist/{StructuredFormat-BWq5Hd1O.d.mts.map → StructuredFormat-Cl41C56K.d.mts.map} +1 -1
- package/dist/{Tool-DjVufH7i.d.mts → Tool-B8B5qVEy.d.mts} +2 -2
- package/dist/{Tool-DjVufH7i.d.mts.map → Tool-B8B5qVEy.d.mts.map} +1 -1
- package/dist/{Turn-OPaILVIB.d.mts → Turn-7geUcKsf.d.mts} +4 -4
- package/dist/{Turn-OPaILVIB.d.mts.map → Turn-7geUcKsf.d.mts.map} +1 -1
- package/dist/domain/AiError.d.mts +2 -2
- package/dist/domain/AiError.mjs +18 -2
- package/dist/domain/AiError.mjs.map +1 -1
- package/dist/domain/Audio.d.mts +2 -0
- package/dist/domain/Audio.mjs +14 -0
- package/dist/domain/Audio.mjs.map +1 -0
- package/dist/domain/Image.d.mts +1 -1
- package/dist/domain/Items.d.mts +1 -1
- package/dist/domain/Music.d.mts +116 -0
- package/dist/domain/Music.d.mts.map +1 -0
- package/dist/domain/Music.mjs +29 -0
- package/dist/domain/Music.mjs.map +1 -0
- package/dist/domain/Transcript.d.mts +95 -0
- package/dist/domain/Transcript.d.mts.map +1 -0
- package/dist/domain/Transcript.mjs +22 -0
- package/dist/domain/Transcript.mjs.map +1 -0
- package/dist/domain/Turn.d.mts +1 -1
- package/dist/embedding-model/Embedding.d.mts +1 -1
- package/dist/embedding-model/EmbeddingModel.d.mts +1 -1
- package/dist/index.d.mts +13 -7
- package/dist/index.mjs +7 -1
- package/dist/language-model/LanguageModel.d.mts +5 -5
- package/dist/loop/Loop.d.mts +2 -2
- package/dist/music-generator/MusicGenerator.d.mts +77 -0
- package/dist/music-generator/MusicGenerator.d.mts.map +1 -0
- package/dist/music-generator/MusicGenerator.mjs +51 -0
- package/dist/music-generator/MusicGenerator.mjs.map +1 -0
- package/dist/music-generator/MusicGenerator.test.d.mts +1 -0
- package/dist/music-generator/MusicGenerator.test.mjs +154 -0
- package/dist/music-generator/MusicGenerator.test.mjs.map +1 -0
- package/dist/speech-synthesizer/SpeechSynthesizer.d.mts +96 -0
- package/dist/speech-synthesizer/SpeechSynthesizer.d.mts.map +1 -0
- package/dist/speech-synthesizer/SpeechSynthesizer.mjs +48 -0
- package/dist/speech-synthesizer/SpeechSynthesizer.mjs.map +1 -0
- package/dist/speech-synthesizer/SpeechSynthesizer.test.d.mts +1 -0
- package/dist/speech-synthesizer/SpeechSynthesizer.test.mjs +112 -0
- package/dist/speech-synthesizer/SpeechSynthesizer.test.mjs.map +1 -0
- package/dist/streaming/JSONL.d.mts +10 -3
- package/dist/streaming/JSONL.d.mts.map +1 -1
- package/dist/streaming/JSONL.mjs +12 -1
- package/dist/streaming/JSONL.mjs.map +1 -1
- package/dist/structured-format/StructuredFormat.d.mts +1 -1
- package/dist/testing/MockMusicGenerator.d.mts +39 -0
- package/dist/testing/MockMusicGenerator.d.mts.map +1 -0
- package/dist/testing/MockMusicGenerator.mjs +96 -0
- package/dist/testing/MockMusicGenerator.mjs.map +1 -0
- package/dist/testing/MockProvider.d.mts +2 -2
- package/dist/testing/MockSpeechSynthesizer.d.mts +37 -0
- package/dist/testing/MockSpeechSynthesizer.d.mts.map +1 -0
- package/dist/testing/MockSpeechSynthesizer.mjs +95 -0
- package/dist/testing/MockSpeechSynthesizer.mjs.map +1 -0
- package/dist/testing/MockTranscriber.d.mts +37 -0
- package/dist/testing/MockTranscriber.d.mts.map +1 -0
- package/dist/testing/MockTranscriber.mjs +77 -0
- package/dist/testing/MockTranscriber.mjs.map +1 -0
- package/dist/tool/HistoryCheck.d.mts +1 -1
- package/dist/tool/Outcome.d.mts +1 -1
- package/dist/tool/Resolvers.d.mts +1 -1
- package/dist/tool/Tool.d.mts +1 -1
- package/dist/tool/Toolkit.d.mts +2 -2
- package/dist/transcriber/Transcriber.d.mts +101 -0
- package/dist/transcriber/Transcriber.d.mts.map +1 -0
- package/dist/transcriber/Transcriber.mjs +49 -0
- package/dist/transcriber/Transcriber.mjs.map +1 -0
- package/dist/transcriber/Transcriber.test.d.mts +1 -0
- package/dist/transcriber/Transcriber.test.mjs +130 -0
- package/dist/transcriber/Transcriber.test.mjs.map +1 -0
- package/package.json +37 -1
- package/src/domain/AiError.ts +21 -0
- package/src/domain/Audio.ts +88 -0
- package/src/domain/Music.ts +121 -0
- package/src/domain/Transcript.ts +83 -0
- package/src/index.ts +6 -0
- package/src/music-generator/MusicGenerator.test.ts +170 -0
- package/src/music-generator/MusicGenerator.ts +123 -0
- package/src/speech-synthesizer/SpeechSynthesizer.test.ts +141 -0
- package/src/speech-synthesizer/SpeechSynthesizer.ts +131 -0
- package/src/streaming/JSONL.ts +12 -0
- package/src/testing/MockMusicGenerator.ts +170 -0
- package/src/testing/MockSpeechSynthesizer.ts +165 -0
- package/src/testing/MockTranscriber.ts +139 -0
- package/src/transcriber/Transcriber.test.ts +125 -0
- package/src/transcriber/Transcriber.ts +127 -0
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import { Context, Effect, Function, Stream } from "effect"
|
|
2
|
+
import * as AiError from "../domain/AiError.js"
|
|
3
|
+
import type { AudioChunk } from "../domain/Audio.js"
|
|
4
|
+
import type {
|
|
5
|
+
CommonGenerateMusicRequest,
|
|
6
|
+
CommonStreamGenerateMusicRequest,
|
|
7
|
+
MusicResult,
|
|
8
|
+
MusicSessionInput,
|
|
9
|
+
} from "../domain/Music.js"
|
|
10
|
+
|
|
11
|
+
export type {
|
|
12
|
+
CommonGenerateMusicRequest,
|
|
13
|
+
CommonStreamGenerateMusicRequest,
|
|
14
|
+
MusicResult,
|
|
15
|
+
MusicSessionInput,
|
|
16
|
+
WeightedPrompt,
|
|
17
|
+
} from "../domain/Music.js"
|
|
18
|
+
|
|
19
|
+
export type MusicGeneratorService = {
|
|
20
|
+
/**
|
|
21
|
+
* One-shot. Prompt in, full audio bytes out. Universally supported.
|
|
22
|
+
* Async/poll-based providers (Suno, Mureka) hide their poll loop
|
|
23
|
+
* inside the adapter — caller still sees a single `Effect`.
|
|
24
|
+
*/
|
|
25
|
+
readonly generate: (
|
|
26
|
+
request: CommonGenerateMusicRequest,
|
|
27
|
+
) => Effect.Effect<MusicResult, AiError.AiError>
|
|
28
|
+
/**
|
|
29
|
+
* Prompt in, audio chunks streamed out. Providers without a native
|
|
30
|
+
* chunked-output endpoint (Lyria 3 sync, Mureka, MiniMax, Stable
|
|
31
|
+
* Audio) emulate this by calling `generate` and emitting a single
|
|
32
|
+
* `AudioChunk` — first-class, no `Unsupported`.
|
|
33
|
+
*/
|
|
34
|
+
readonly streamGeneration: (
|
|
35
|
+
request: CommonStreamGenerateMusicRequest,
|
|
36
|
+
) => Stream.Stream<AudioChunk, AiError.AiError>
|
|
37
|
+
/**
|
|
38
|
+
* Bidirectional session: a `Stream` of prompt-or-config updates flows
|
|
39
|
+
* in, a `Stream` of audio chunks flows out. The session WS / RPC is
|
|
40
|
+
* acquired on first pull and released when the output stream is
|
|
41
|
+
* finalized via `Stream.scoped`.
|
|
42
|
+
*
|
|
43
|
+
* Gated by the `MusicInteractiveSession` capability marker on the
|
|
44
|
+
* top-level helper — providers without bidirectional support don't
|
|
45
|
+
* ship the marker, so calls fail at `Effect.provide` with a type
|
|
46
|
+
* error.
|
|
47
|
+
*/
|
|
48
|
+
readonly streamGenerationFrom: <E, R>(
|
|
49
|
+
input: Stream.Stream<MusicSessionInput, E, R>,
|
|
50
|
+
request: CommonStreamGenerateMusicRequest,
|
|
51
|
+
) => Stream.Stream<AudioChunk, AiError.AiError | E, R>
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export class MusicGenerator extends Context.Service<MusicGenerator, MusicGeneratorService>()(
|
|
55
|
+
"@betalyra/effect-uai/MusicGenerator",
|
|
56
|
+
) {}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Capability marker — provided by provider layers whose
|
|
60
|
+
* `streamGenerationFrom` is wired up at the wire level. Currently only
|
|
61
|
+
* Lyria RealTime (via the BidiGenerateMusic WebSocket) ships it.
|
|
62
|
+
* Calling `streamGenerationFrom` while only a non-interactive Layer is
|
|
63
|
+
* in scope fails at `Effect.provide` with a type error.
|
|
64
|
+
*
|
|
65
|
+
* Phantom — the value is `void`; providers register with
|
|
66
|
+
* `Layer.succeed(MusicInteractiveSession, undefined)`.
|
|
67
|
+
*/
|
|
68
|
+
export class MusicInteractiveSession extends Context.Service<MusicInteractiveSession, void>()(
|
|
69
|
+
"@betalyra/effect-uai/capability/MusicInteractiveSession",
|
|
70
|
+
) {}
|
|
71
|
+
|
|
72
|
+
/** One-shot generation. */
|
|
73
|
+
export const generate = (
|
|
74
|
+
request: CommonGenerateMusicRequest,
|
|
75
|
+
): Effect.Effect<MusicResult, AiError.AiError, MusicGenerator> =>
|
|
76
|
+
Effect.flatMap(MusicGenerator.asEffect(), (s) => s.generate(request))
|
|
77
|
+
|
|
78
|
+
/** Prompt in, audio chunks out. */
|
|
79
|
+
export const streamGeneration = (
|
|
80
|
+
request: CommonStreamGenerateMusicRequest,
|
|
81
|
+
): Stream.Stream<AudioChunk, AiError.AiError, MusicGenerator> =>
|
|
82
|
+
Stream.unwrap(Effect.map(MusicGenerator.asEffect(), (s) => s.streamGeneration(request)))
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Bidirectional generation. Dual-arity: pipeable (data-last) and
|
|
86
|
+
* direct (data-first). Requires `MusicInteractiveSession` in R —
|
|
87
|
+
* providers without bidirectional support are a type error at provide
|
|
88
|
+
* time.
|
|
89
|
+
*
|
|
90
|
+
* @example
|
|
91
|
+
* ```ts
|
|
92
|
+
* const audio = Stream.fromIterable([
|
|
93
|
+
* Music.promptsInput([{ text: "minimal techno", weight: 1.0 }]),
|
|
94
|
+
* Music.configInput({ bpm: 124 }),
|
|
95
|
+
* ]).pipe(
|
|
96
|
+
* MusicGenerator.streamGenerationFrom({ model: "lyria-realtime-001", prompts: "" }),
|
|
97
|
+
* )
|
|
98
|
+
* ```
|
|
99
|
+
*/
|
|
100
|
+
export const streamGenerationFrom: {
|
|
101
|
+
(
|
|
102
|
+
request: CommonStreamGenerateMusicRequest,
|
|
103
|
+
): <E, R>(
|
|
104
|
+
input: Stream.Stream<MusicSessionInput, E, R>,
|
|
105
|
+
) => Stream.Stream<AudioChunk, AiError.AiError | E, R | MusicGenerator | MusicInteractiveSession>
|
|
106
|
+
<E, R>(
|
|
107
|
+
input: Stream.Stream<MusicSessionInput, E, R>,
|
|
108
|
+
request: CommonStreamGenerateMusicRequest,
|
|
109
|
+
): Stream.Stream<AudioChunk, AiError.AiError | E, R | MusicGenerator | MusicInteractiveSession>
|
|
110
|
+
} = Function.dual(
|
|
111
|
+
2,
|
|
112
|
+
<E, R>(
|
|
113
|
+
input: Stream.Stream<MusicSessionInput, E, R>,
|
|
114
|
+
request: CommonStreamGenerateMusicRequest,
|
|
115
|
+
) =>
|
|
116
|
+
Stream.unwrap(
|
|
117
|
+
Effect.gen(function* () {
|
|
118
|
+
const s = yield* MusicGenerator.asEffect()
|
|
119
|
+
yield* MusicInteractiveSession.asEffect()
|
|
120
|
+
return s.streamGenerationFrom(input, request)
|
|
121
|
+
}),
|
|
122
|
+
),
|
|
123
|
+
)
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import { Effect, Stream } from "effect"
|
|
2
|
+
import { describe, expect, expectTypeOf, it } from "vitest"
|
|
3
|
+
import type * as AiError from "../domain/AiError.js"
|
|
4
|
+
import type { AudioBlob, AudioChunk, AudioFormat } from "../domain/Audio.js"
|
|
5
|
+
import * as MockSpeechSynthesizer from "../testing/MockSpeechSynthesizer.js"
|
|
6
|
+
import * as SpeechSynthesizer from "./SpeechSynthesizer.js"
|
|
7
|
+
|
|
8
|
+
const pcmFormat: AudioFormat = {
|
|
9
|
+
container: "raw",
|
|
10
|
+
encoding: "pcm_s16le",
|
|
11
|
+
sampleRate: 24000,
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const blob: AudioBlob = {
|
|
15
|
+
format: pcmFormat,
|
|
16
|
+
bytes: new Uint8Array([0xde, 0xad, 0xbe, 0xef]),
|
|
17
|
+
durationSeconds: 0.5,
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
const chunk = (n: number): AudioChunk => ({ bytes: new Uint8Array([n]) })
|
|
21
|
+
|
|
22
|
+
describe("SpeechSynthesizer.synthesize", () => {
|
|
23
|
+
it("returns the scripted AudioBlob", async () => {
|
|
24
|
+
const mock = MockSpeechSynthesizer.layer({ blobs: [blob] })
|
|
25
|
+
const program = SpeechSynthesizer.synthesize({
|
|
26
|
+
text: "hi",
|
|
27
|
+
model: "mock-tts",
|
|
28
|
+
voiceId: "stock-voice",
|
|
29
|
+
})
|
|
30
|
+
const result = await Effect.runPromise(program.pipe(Effect.provide(mock.layer)))
|
|
31
|
+
expect(result.bytes).toEqual(blob.bytes)
|
|
32
|
+
expect(result.durationSeconds).toBe(0.5)
|
|
33
|
+
})
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
describe("SpeechSynthesizer.streamSynthesis", () => {
|
|
37
|
+
it("emits scripted chunks for full-text-in streaming", async () => {
|
|
38
|
+
const mock = MockSpeechSynthesizer.layer({
|
|
39
|
+
streamSynthesisChunks: [[chunk(1), chunk(2), chunk(3)]],
|
|
40
|
+
})
|
|
41
|
+
const program = Stream.runCollect(
|
|
42
|
+
SpeechSynthesizer.streamSynthesis({
|
|
43
|
+
text: "hi",
|
|
44
|
+
model: "mock-tts",
|
|
45
|
+
voiceId: "stock-voice",
|
|
46
|
+
}),
|
|
47
|
+
)
|
|
48
|
+
const out = await Effect.runPromise(program.pipe(Effect.provide(mock.layer)))
|
|
49
|
+
expect(out.map((c) => Array.from(c.bytes))).toEqual([[1], [2], [3]])
|
|
50
|
+
})
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
describe("SpeechSynthesizer capability marker (compile-time)", () => {
|
|
54
|
+
const ssfReq: SpeechSynthesizer.CommonStreamSynthesizeRequest = {
|
|
55
|
+
model: "mock-tts",
|
|
56
|
+
voiceId: "v",
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
it("requires `TtsIncrementalText` on the R channel of streamSynthesisFrom", () => {
|
|
60
|
+
const tokens: Stream.Stream<string> = Stream.fromIterable(["a"])
|
|
61
|
+
const audio = tokens.pipe(SpeechSynthesizer.streamSynthesisFrom(ssfReq))
|
|
62
|
+
expectTypeOf(audio).toEqualTypeOf<
|
|
63
|
+
Stream.Stream<
|
|
64
|
+
AudioChunk,
|
|
65
|
+
AiError.AiError,
|
|
66
|
+
SpeechSynthesizer.SpeechSynthesizer | SpeechSynthesizer.TtsIncrementalText
|
|
67
|
+
>
|
|
68
|
+
>()
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
it("does NOT require `TtsIncrementalText` for sync `synthesize`", () => {
|
|
72
|
+
const eff = SpeechSynthesizer.synthesize({ text: "hi", model: "m", voiceId: "v" })
|
|
73
|
+
expectTypeOf(eff).toEqualTypeOf<
|
|
74
|
+
Effect.Effect<AudioBlob, AiError.AiError, SpeechSynthesizer.SpeechSynthesizer>
|
|
75
|
+
>()
|
|
76
|
+
})
|
|
77
|
+
|
|
78
|
+
it("does NOT require `TtsIncrementalText` for full-text `streamSynthesis`", () => {
|
|
79
|
+
const audio = SpeechSynthesizer.streamSynthesis({ text: "hi", model: "m", voiceId: "v" })
|
|
80
|
+
expectTypeOf(audio).toEqualTypeOf<
|
|
81
|
+
Stream.Stream<AudioChunk, AiError.AiError, SpeechSynthesizer.SpeechSynthesizer>
|
|
82
|
+
>()
|
|
83
|
+
})
|
|
84
|
+
|
|
85
|
+
it("a layer without the marker leaves `TtsIncrementalText` unsatisfied in R", () => {
|
|
86
|
+
const noMarker = MockSpeechSynthesizer.layerWithoutIncremental({})
|
|
87
|
+
const tokens: Stream.Stream<string> = Stream.fromIterable(["a"])
|
|
88
|
+
const audio = tokens.pipe(SpeechSynthesizer.streamSynthesisFrom(ssfReq))
|
|
89
|
+
const program = Stream.runDrain(audio).pipe(Effect.provide(noMarker.layer))
|
|
90
|
+
// `SpeechSynthesizer` is provided by the layer; `TtsIncrementalText` is not,
|
|
91
|
+
// so it remains in R — calling `Effect.runPromise(program)` would be a type
|
|
92
|
+
// error because runPromise requires `R = never`.
|
|
93
|
+
expectTypeOf(program).toEqualTypeOf<
|
|
94
|
+
Effect.Effect<void, AiError.AiError, SpeechSynthesizer.TtsIncrementalText>
|
|
95
|
+
>()
|
|
96
|
+
})
|
|
97
|
+
|
|
98
|
+
it("a full layer (with marker) clears R to never", () => {
|
|
99
|
+
const fullMock = MockSpeechSynthesizer.layer({
|
|
100
|
+
streamSynthesisFromChunks: [[]],
|
|
101
|
+
})
|
|
102
|
+
const tokens: Stream.Stream<string> = Stream.fromIterable(["a"])
|
|
103
|
+
const audio = tokens.pipe(SpeechSynthesizer.streamSynthesisFrom(ssfReq))
|
|
104
|
+
const program = Stream.runDrain(audio).pipe(Effect.provide(fullMock.layer))
|
|
105
|
+
expectTypeOf(program).toEqualTypeOf<Effect.Effect<void, AiError.AiError, never>>()
|
|
106
|
+
})
|
|
107
|
+
})
|
|
108
|
+
|
|
109
|
+
describe("SpeechSynthesizer.streamSynthesisFrom", () => {
|
|
110
|
+
const ssfReq: SpeechSynthesizer.CommonStreamSynthesizeRequest = {
|
|
111
|
+
model: "mock-tts",
|
|
112
|
+
voiceId: "stock-voice",
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
it("pipes an LLM-style text stream into audio chunks", async () => {
|
|
116
|
+
const mock = MockSpeechSynthesizer.layer({
|
|
117
|
+
streamSynthesisFromChunks: [[chunk(10), chunk(20)]],
|
|
118
|
+
})
|
|
119
|
+
const tokens = Stream.fromIterable(["Hello, ", "world."])
|
|
120
|
+
const audio = tokens.pipe(SpeechSynthesizer.streamSynthesisFrom(ssfReq))
|
|
121
|
+
const out = await Effect.runPromise(Stream.runCollect(audio).pipe(Effect.provide(mock.layer)))
|
|
122
|
+
expect(out.map((c) => Array.from(c.bytes))).toEqual([[10], [20]])
|
|
123
|
+
})
|
|
124
|
+
|
|
125
|
+
it("records the request on the streamSynthesisFrom call channel", async () => {
|
|
126
|
+
const mock = MockSpeechSynthesizer.layer({
|
|
127
|
+
streamSynthesisFromChunks: [[chunk(42)]],
|
|
128
|
+
})
|
|
129
|
+
const program = Effect.gen(function* () {
|
|
130
|
+
yield* Stream.runDrain(
|
|
131
|
+
Stream.fromIterable(["x"]).pipe(SpeechSynthesizer.streamSynthesisFrom(ssfReq)),
|
|
132
|
+
)
|
|
133
|
+
return yield* mock.recorder
|
|
134
|
+
})
|
|
135
|
+
const rec = await Effect.runPromise(program.pipe(Effect.provide(mock.layer)))
|
|
136
|
+
expect(rec.streamSynthesisFromCalls.length).toBe(1)
|
|
137
|
+
expect(rec.streamSynthesisFromCalls[0]!.voiceId).toBe("stock-voice")
|
|
138
|
+
expect(rec.synthesizeCalls.length).toBe(0)
|
|
139
|
+
expect(rec.streamSynthesisCalls.length).toBe(0)
|
|
140
|
+
})
|
|
141
|
+
})
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
import { Context, Effect, Function, Stream } from "effect"
|
|
2
|
+
import * as AiError from "../domain/AiError.js"
|
|
3
|
+
import type { AudioBlob, AudioChunk, AudioFormat } from "../domain/Audio.js"
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Cross-provider synthesis request. Provider-specific extensions
|
|
7
|
+
* (ElevenLabs `stability` / `similarity_boost`, Cartesia `emotion`,
|
|
8
|
+
* MiniMax `vol` / `pitch`, Azure SSML style tags) live on each
|
|
9
|
+
* provider's typed request which extends this and narrows `model` and
|
|
10
|
+
* `voiceId`.
|
|
11
|
+
*/
|
|
12
|
+
export type CommonSynthesizeRequest = {
|
|
13
|
+
readonly text: string
|
|
14
|
+
/** Model identifier. Each provider narrows. */
|
|
15
|
+
readonly model: string
|
|
16
|
+
/**
|
|
17
|
+
* Voice identifier. Per-provider request types narrow this to a
|
|
18
|
+
* typed literal union of stock voices + `(string & {})` escape for
|
|
19
|
+
* custom cloned voice IDs. Providers without custom-voice support
|
|
20
|
+
* (OpenAI, Deepgram Aura, AWS Polly) narrow to the stock-only union.
|
|
21
|
+
*/
|
|
22
|
+
readonly voiceId: string
|
|
23
|
+
readonly outputFormat?: AudioFormat
|
|
24
|
+
readonly speed?: number
|
|
25
|
+
readonly languageCode?: string
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Incremental-synthesis request — text arrives as `Stream<string>`.
|
|
30
|
+
* Gated by the `TtsIncrementalText` capability marker; only providers
|
|
31
|
+
* that ship the marker can be used.
|
|
32
|
+
*
|
|
33
|
+
* Multi-context features (Cartesia `context_id`, ElevenLabs `multi-
|
|
34
|
+
* stream-input`) are NOT exposed here — one logical utterance per
|
|
35
|
+
* call. Provider extensions can expose `forkContext` for that.
|
|
36
|
+
*/
|
|
37
|
+
export type CommonStreamSynthesizeRequest = Omit<CommonSynthesizeRequest, "text">
|
|
38
|
+
|
|
39
|
+
export type SpeechSynthesizerService = {
|
|
40
|
+
/** One-shot. Full text in, full audio bytes out. Universally supported. */
|
|
41
|
+
readonly synthesize: (
|
|
42
|
+
request: CommonSynthesizeRequest,
|
|
43
|
+
) => Effect.Effect<AudioBlob, AiError.AiError>
|
|
44
|
+
/**
|
|
45
|
+
* Full text in, audio chunks streamed out (chunked HTTP). Universally
|
|
46
|
+
* supported across providers that offer any streaming TTS at all.
|
|
47
|
+
*/
|
|
48
|
+
readonly streamSynthesis: (
|
|
49
|
+
request: CommonSynthesizeRequest,
|
|
50
|
+
) => Stream.Stream<AudioChunk, AiError.AiError>
|
|
51
|
+
/**
|
|
52
|
+
* Incremental text in (as a Stream), audio chunks streamed out. The
|
|
53
|
+
* underlying WS connection is acquired on first pull and released
|
|
54
|
+
* when the output stream is finalized via `Stream.scoped`.
|
|
55
|
+
*
|
|
56
|
+
* Gated by the `TtsIncrementalText` capability marker on the top-
|
|
57
|
+
* level helper — providers without WS-style incremental input don't
|
|
58
|
+
* ship the marker, so calls fail at `Effect.provide` with a type
|
|
59
|
+
* error.
|
|
60
|
+
*/
|
|
61
|
+
readonly streamSynthesisFrom: <E, R>(
|
|
62
|
+
textIn: Stream.Stream<string, E, R>,
|
|
63
|
+
request: CommonStreamSynthesizeRequest,
|
|
64
|
+
) => Stream.Stream<AudioChunk, AiError.AiError | E, R>
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export class SpeechSynthesizer extends Context.Service<
|
|
68
|
+
SpeechSynthesizer,
|
|
69
|
+
SpeechSynthesizerService
|
|
70
|
+
>()("@betalyra/effect-uai/SpeechSynthesizer") {}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Capability marker — provided by provider layers whose
|
|
74
|
+
* `streamSynthesisFrom` is wired up at the wire level. OpenAI, Azure
|
|
75
|
+
* (wire), and AWS Polly non-Generative do not ship it. Calling
|
|
76
|
+
* `streamSynthesisFrom` while only one of those Layers is in scope
|
|
77
|
+
* fails at `Effect.provide` with a type error.
|
|
78
|
+
*
|
|
79
|
+
* Phantom — the value is `void`; providers register with
|
|
80
|
+
* `Layer.succeed(TtsIncrementalText, undefined)`.
|
|
81
|
+
*/
|
|
82
|
+
export class TtsIncrementalText extends Context.Service<TtsIncrementalText, void>()(
|
|
83
|
+
"@betalyra/effect-uai/capability/TtsIncrementalText",
|
|
84
|
+
) {}
|
|
85
|
+
|
|
86
|
+
/** One-shot synthesis. */
|
|
87
|
+
export const synthesize = (
|
|
88
|
+
request: CommonSynthesizeRequest,
|
|
89
|
+
): Effect.Effect<AudioBlob, AiError.AiError, SpeechSynthesizer> =>
|
|
90
|
+
Effect.flatMap(SpeechSynthesizer.asEffect(), (s) => s.synthesize(request))
|
|
91
|
+
|
|
92
|
+
/** Full text in, audio chunks out. */
|
|
93
|
+
export const streamSynthesis = (
|
|
94
|
+
request: CommonSynthesizeRequest,
|
|
95
|
+
): Stream.Stream<AudioChunk, AiError.AiError, SpeechSynthesizer> =>
|
|
96
|
+
Stream.unwrap(Effect.map(SpeechSynthesizer.asEffect(), (s) => s.streamSynthesis(request)))
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Incremental synthesis. Dual-arity: pipeable (data-last) and direct
|
|
100
|
+
* (data-first). Requires `TtsIncrementalText` in R — providers without
|
|
101
|
+
* incremental-text-in support are a type error at provide time.
|
|
102
|
+
*
|
|
103
|
+
* @example
|
|
104
|
+
* ```ts
|
|
105
|
+
* const audio = LanguageModel.streamTurn(turnReq).pipe(
|
|
106
|
+
* Stream.filterMap(Turn.toTextDelta),
|
|
107
|
+
* SpeechSynthesizer.streamSynthesisFrom(synthReq),
|
|
108
|
+
* )
|
|
109
|
+
* ```
|
|
110
|
+
*/
|
|
111
|
+
export const streamSynthesisFrom: {
|
|
112
|
+
(
|
|
113
|
+
request: CommonStreamSynthesizeRequest,
|
|
114
|
+
): <E, R>(
|
|
115
|
+
textIn: Stream.Stream<string, E, R>,
|
|
116
|
+
) => Stream.Stream<AudioChunk, AiError.AiError | E, R | SpeechSynthesizer | TtsIncrementalText>
|
|
117
|
+
<E, R>(
|
|
118
|
+
textIn: Stream.Stream<string, E, R>,
|
|
119
|
+
request: CommonStreamSynthesizeRequest,
|
|
120
|
+
): Stream.Stream<AudioChunk, AiError.AiError | E, R | SpeechSynthesizer | TtsIncrementalText>
|
|
121
|
+
} = Function.dual(
|
|
122
|
+
2,
|
|
123
|
+
<E, R>(textIn: Stream.Stream<string, E, R>, request: CommonStreamSynthesizeRequest) =>
|
|
124
|
+
Stream.unwrap(
|
|
125
|
+
Effect.gen(function* () {
|
|
126
|
+
const s = yield* SpeechSynthesizer.asEffect()
|
|
127
|
+
yield* TtsIncrementalText.asEffect()
|
|
128
|
+
return s.streamSynthesisFrom(textIn, request)
|
|
129
|
+
}),
|
|
130
|
+
),
|
|
131
|
+
)
|
package/src/streaming/JSONL.ts
CHANGED
|
@@ -79,6 +79,18 @@ export const parse =
|
|
|
79
79
|
),
|
|
80
80
|
)
|
|
81
81
|
|
|
82
|
+
/**
|
|
83
|
+
* Best-effort parse of a single JSON frame. Returns the parsed value or
|
|
84
|
+
* `undefined` on malformed input. Realtime WS adapters use this to skip
|
|
85
|
+
* non-JSON or partially-received frames silently rather than fail the
|
|
86
|
+
* entire session over one bad frame.
|
|
87
|
+
*/
|
|
88
|
+
export const parseSafe = (raw: string) =>
|
|
89
|
+
Effect.try({
|
|
90
|
+
try: () => JSON.parse(raw) as unknown,
|
|
91
|
+
catch: () => undefined,
|
|
92
|
+
}).pipe(Effect.orElseSucceed(() => undefined))
|
|
93
|
+
|
|
82
94
|
const encoder = new TextEncoder()
|
|
83
95
|
|
|
84
96
|
/**
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
import { Effect, Layer, Ref, Stream } from "effect"
|
|
2
|
+
import * as AiError from "../domain/AiError.js"
|
|
3
|
+
import type { AudioChunk } from "../domain/Audio.js"
|
|
4
|
+
import type {
|
|
5
|
+
CommonGenerateMusicRequest,
|
|
6
|
+
CommonStreamGenerateMusicRequest,
|
|
7
|
+
MusicResult,
|
|
8
|
+
MusicSessionInput,
|
|
9
|
+
} from "../domain/Music.js"
|
|
10
|
+
import {
|
|
11
|
+
MusicGenerator,
|
|
12
|
+
MusicInteractiveSession,
|
|
13
|
+
type MusicGeneratorService,
|
|
14
|
+
} from "../music-generator/MusicGenerator.js"
|
|
15
|
+
|
|
16
|
+
export type MockMusicGeneratorRecorder = {
|
|
17
|
+
readonly generateCalls: ReadonlyArray<CommonGenerateMusicRequest>
|
|
18
|
+
readonly streamGenerationCalls: ReadonlyArray<CommonStreamGenerateMusicRequest>
|
|
19
|
+
readonly streamGenerationFromCalls: ReadonlyArray<CommonStreamGenerateMusicRequest>
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export type MockMusicGeneratorScript = {
|
|
23
|
+
/** One result per `generate` call, consumed in order. */
|
|
24
|
+
readonly results?: ReadonlyArray<MusicResult>
|
|
25
|
+
/** One chunk-list per `streamGeneration` call, consumed in order. */
|
|
26
|
+
readonly streamGenerationChunks?: ReadonlyArray<ReadonlyArray<AudioChunk>>
|
|
27
|
+
/** One chunk-list per `streamGenerationFrom` call, consumed in order. */
|
|
28
|
+
readonly streamGenerationFromChunks?: ReadonlyArray<ReadonlyArray<AudioChunk>>
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
const makeService = (
|
|
32
|
+
script: MockMusicGeneratorScript,
|
|
33
|
+
record: {
|
|
34
|
+
readonly generate: (req: CommonGenerateMusicRequest) => Effect.Effect<void>
|
|
35
|
+
readonly streamGeneration: (req: CommonStreamGenerateMusicRequest) => Effect.Effect<void>
|
|
36
|
+
readonly streamGenerationFrom: (req: CommonStreamGenerateMusicRequest) => Effect.Effect<void>
|
|
37
|
+
},
|
|
38
|
+
) =>
|
|
39
|
+
Effect.gen(function* () {
|
|
40
|
+
const gCursor = yield* Ref.make(0)
|
|
41
|
+
const sgCursor = yield* Ref.make(0)
|
|
42
|
+
const sgfCursor = yield* Ref.make(0)
|
|
43
|
+
const service: MusicGeneratorService = {
|
|
44
|
+
generate: (request) =>
|
|
45
|
+
Effect.gen(function* () {
|
|
46
|
+
yield* record.generate(request)
|
|
47
|
+
const i = yield* Ref.getAndUpdate(gCursor, (n) => n + 1)
|
|
48
|
+
const scripted = script.results ?? []
|
|
49
|
+
if (i >= scripted.length) {
|
|
50
|
+
return yield* Effect.fail(
|
|
51
|
+
new AiError.InvalidRequest({
|
|
52
|
+
provider: "mock",
|
|
53
|
+
raw: `MockMusicGenerator exhausted: ${scripted.length} results scripted, but call ${i + 1} was made`,
|
|
54
|
+
}),
|
|
55
|
+
)
|
|
56
|
+
}
|
|
57
|
+
return scripted[i]!
|
|
58
|
+
}),
|
|
59
|
+
streamGeneration: (request) =>
|
|
60
|
+
Stream.unwrap(
|
|
61
|
+
Effect.gen(function* () {
|
|
62
|
+
yield* record.streamGeneration(request)
|
|
63
|
+
const i = yield* Ref.getAndUpdate(sgCursor, (n) => n + 1)
|
|
64
|
+
const scripted = script.streamGenerationChunks ?? []
|
|
65
|
+
if (i >= scripted.length) {
|
|
66
|
+
return Stream.fail(
|
|
67
|
+
new AiError.InvalidRequest({
|
|
68
|
+
provider: "mock",
|
|
69
|
+
raw: `MockMusicGenerator exhausted: ${scripted.length} streamGeneration lists scripted, but call ${i + 1} was made`,
|
|
70
|
+
}),
|
|
71
|
+
)
|
|
72
|
+
}
|
|
73
|
+
return Stream.fromIterable(scripted[i]!)
|
|
74
|
+
}),
|
|
75
|
+
),
|
|
76
|
+
streamGenerationFrom: <E, R>(
|
|
77
|
+
input: Stream.Stream<MusicSessionInput, E, R>,
|
|
78
|
+
request: CommonStreamGenerateMusicRequest,
|
|
79
|
+
): Stream.Stream<AudioChunk, AiError.AiError | E, R> =>
|
|
80
|
+
Stream.unwrap(
|
|
81
|
+
Effect.gen(function* () {
|
|
82
|
+
yield* record.streamGenerationFrom(request)
|
|
83
|
+
const i = yield* Ref.getAndUpdate(sgfCursor, (n) => n + 1)
|
|
84
|
+
const scripted = script.streamGenerationFromChunks ?? []
|
|
85
|
+
if (i >= scripted.length) {
|
|
86
|
+
const exhausted: Stream.Stream<AudioChunk, AiError.AiError | E, R> = Stream.fail(
|
|
87
|
+
new AiError.InvalidRequest({
|
|
88
|
+
provider: "mock",
|
|
89
|
+
raw: `MockMusicGenerator exhausted: ${scripted.length} streamGenerationFrom lists scripted, but call ${i + 1} was made`,
|
|
90
|
+
}),
|
|
91
|
+
)
|
|
92
|
+
return exhausted
|
|
93
|
+
}
|
|
94
|
+
// Drain the input fully before emitting scripted audio chunks,
|
|
95
|
+
// so consumers can assert on what session messages were pushed.
|
|
96
|
+
return Stream.drain(input).pipe(Stream.concat(Stream.fromIterable(scripted[i]!)))
|
|
97
|
+
}),
|
|
98
|
+
),
|
|
99
|
+
}
|
|
100
|
+
return service
|
|
101
|
+
})
|
|
102
|
+
|
|
103
|
+
/**
|
|
104
|
+
* Layer providing the `MusicGenerator` service AND the
|
|
105
|
+
* `MusicInteractiveSession` capability marker. Use for the common case
|
|
106
|
+
* where code under test exercises `streamGenerationFrom`.
|
|
107
|
+
*/
|
|
108
|
+
export const layer = (
|
|
109
|
+
script: MockMusicGeneratorScript,
|
|
110
|
+
): {
|
|
111
|
+
readonly layer: Layer.Layer<MusicGenerator | MusicInteractiveSession>
|
|
112
|
+
readonly recorder: Effect.Effect<MockMusicGeneratorRecorder>
|
|
113
|
+
} => {
|
|
114
|
+
const gCalls = Ref.makeUnsafe<ReadonlyArray<CommonGenerateMusicRequest>>([])
|
|
115
|
+
const sgCalls = Ref.makeUnsafe<ReadonlyArray<CommonStreamGenerateMusicRequest>>([])
|
|
116
|
+
const sgfCalls = Ref.makeUnsafe<ReadonlyArray<CommonStreamGenerateMusicRequest>>([])
|
|
117
|
+
const generatorLayer = Layer.effect(
|
|
118
|
+
MusicGenerator,
|
|
119
|
+
makeService(script, {
|
|
120
|
+
generate: (req) => Ref.update(gCalls, (xs) => [...xs, req]),
|
|
121
|
+
streamGeneration: (req) => Ref.update(sgCalls, (xs) => [...xs, req]),
|
|
122
|
+
streamGenerationFrom: (req) => Ref.update(sgfCalls, (xs) => [...xs, req]),
|
|
123
|
+
}),
|
|
124
|
+
)
|
|
125
|
+
const live = Layer.merge(generatorLayer, Layer.succeed(MusicInteractiveSession, undefined))
|
|
126
|
+
return {
|
|
127
|
+
layer: live,
|
|
128
|
+
recorder: Effect.gen(function* () {
|
|
129
|
+
const generateCalls = yield* Ref.get(gCalls)
|
|
130
|
+
const streamGenerationCalls = yield* Ref.get(sgCalls)
|
|
131
|
+
const streamGenerationFromCalls = yield* Ref.get(sgfCalls)
|
|
132
|
+
return { generateCalls, streamGenerationCalls, streamGenerationFromCalls }
|
|
133
|
+
}),
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Variant that omits the `MusicInteractiveSession` marker — simulates a
|
|
139
|
+
* provider without bidirectional support (Lyria 3 sync, ElevenLabs,
|
|
140
|
+
* Mureka, MiniMax, Stable Audio, Suno). Calls to
|
|
141
|
+
* `streamGenerationFrom` in code under test should be a compile-time
|
|
142
|
+
* error against this Layer alone.
|
|
143
|
+
*/
|
|
144
|
+
export const layerWithoutInteractive = (
|
|
145
|
+
script: MockMusicGeneratorScript,
|
|
146
|
+
): {
|
|
147
|
+
readonly layer: Layer.Layer<MusicGenerator>
|
|
148
|
+
readonly recorder: Effect.Effect<MockMusicGeneratorRecorder>
|
|
149
|
+
} => {
|
|
150
|
+
const gCalls = Ref.makeUnsafe<ReadonlyArray<CommonGenerateMusicRequest>>([])
|
|
151
|
+
const sgCalls = Ref.makeUnsafe<ReadonlyArray<CommonStreamGenerateMusicRequest>>([])
|
|
152
|
+
const sgfCalls = Ref.makeUnsafe<ReadonlyArray<CommonStreamGenerateMusicRequest>>([])
|
|
153
|
+
const live = Layer.effect(
|
|
154
|
+
MusicGenerator,
|
|
155
|
+
makeService(script, {
|
|
156
|
+
generate: (req) => Ref.update(gCalls, (xs) => [...xs, req]),
|
|
157
|
+
streamGeneration: (req) => Ref.update(sgCalls, (xs) => [...xs, req]),
|
|
158
|
+
streamGenerationFrom: (req) => Ref.update(sgfCalls, (xs) => [...xs, req]),
|
|
159
|
+
}),
|
|
160
|
+
)
|
|
161
|
+
return {
|
|
162
|
+
layer: live,
|
|
163
|
+
recorder: Effect.gen(function* () {
|
|
164
|
+
const generateCalls = yield* Ref.get(gCalls)
|
|
165
|
+
const streamGenerationCalls = yield* Ref.get(sgCalls)
|
|
166
|
+
const streamGenerationFromCalls = yield* Ref.get(sgfCalls)
|
|
167
|
+
return { generateCalls, streamGenerationCalls, streamGenerationFromCalls }
|
|
168
|
+
}),
|
|
169
|
+
}
|
|
170
|
+
}
|