@effect-uai/core 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{AiError-CBuPHVKA.d.mts → AiError-CAX_48RU.d.mts} +27 -5
- package/dist/{AiError-CBuPHVKA.d.mts.map → AiError-CAX_48RU.d.mts.map} +1 -1
- package/dist/Audio-BfCTGnH3.d.mts +61 -0
- package/dist/Audio-BfCTGnH3.d.mts.map +1 -0
- package/dist/{Image-BZmKfIdq.d.mts → Image-HNmMpMTh.d.mts} +1 -1
- package/dist/{Image-BZmKfIdq.d.mts.map → Image-HNmMpMTh.d.mts.map} +1 -1
- package/dist/{Items-CB8Bo3FI.d.mts → Items-DqbaJoz7.d.mts} +5 -5
- package/dist/{Items-CB8Bo3FI.d.mts.map → Items-DqbaJoz7.d.mts.map} +1 -1
- package/dist/{StructuredFormat-BWq5Hd1O.d.mts → StructuredFormat-BbN4dosH.d.mts} +11 -4
- package/dist/StructuredFormat-BbN4dosH.d.mts.map +1 -0
- package/dist/{Tool-DjVufH7i.d.mts → Tool-Y0__Py1H.d.mts} +20 -4
- package/dist/Tool-Y0__Py1H.d.mts.map +1 -0
- package/dist/Turn-ChbL2foc.d.mts +388 -0
- package/dist/Turn-ChbL2foc.d.mts.map +1 -0
- package/dist/domain/AiError.d.mts +2 -2
- package/dist/domain/AiError.mjs +19 -3
- package/dist/domain/AiError.mjs.map +1 -1
- package/dist/domain/Audio.d.mts +2 -0
- package/dist/domain/Audio.mjs +14 -0
- package/dist/domain/Audio.mjs.map +1 -0
- package/dist/domain/Image.d.mts +1 -1
- package/dist/domain/Items.d.mts +1 -1
- package/dist/domain/Items.mjs +1 -1
- package/dist/domain/Items.mjs.map +1 -1
- package/dist/domain/Music.d.mts +116 -0
- package/dist/domain/Music.d.mts.map +1 -0
- package/dist/domain/Music.mjs +29 -0
- package/dist/domain/Music.mjs.map +1 -0
- package/dist/domain/Transcript.d.mts +95 -0
- package/dist/domain/Transcript.d.mts.map +1 -0
- package/dist/domain/Transcript.mjs +22 -0
- package/dist/domain/Transcript.mjs.map +1 -0
- package/dist/domain/Turn.d.mts +2 -2
- package/dist/domain/Turn.mjs +22 -4
- package/dist/domain/Turn.mjs.map +1 -1
- package/dist/domain/Turn.test.d.mts +1 -0
- package/dist/domain/Turn.test.mjs +136 -0
- package/dist/domain/Turn.test.mjs.map +1 -0
- package/dist/embedding-model/Embedding.d.mts +15 -3
- package/dist/embedding-model/Embedding.d.mts.map +1 -1
- package/dist/embedding-model/Embedding.mjs.map +1 -1
- package/dist/embedding-model/EmbeddingModel.d.mts +33 -17
- package/dist/embedding-model/EmbeddingModel.d.mts.map +1 -1
- package/dist/embedding-model/EmbeddingModel.mjs.map +1 -1
- package/dist/embedding-model/EmbeddingModel.test.d.mts +1 -0
- package/dist/embedding-model/EmbeddingModel.test.mjs +59 -0
- package/dist/embedding-model/EmbeddingModel.test.mjs.map +1 -0
- package/dist/index.d.mts +13 -7
- package/dist/index.mjs +7 -1
- package/dist/language-model/LanguageModel.d.mts +30 -8
- package/dist/language-model/LanguageModel.d.mts.map +1 -1
- package/dist/language-model/LanguageModel.mjs +33 -3
- package/dist/language-model/LanguageModel.mjs.map +1 -1
- package/dist/language-model/LanguageModel.test.d.mts +1 -0
- package/dist/language-model/LanguageModel.test.mjs +143 -0
- package/dist/language-model/LanguageModel.test.mjs.map +1 -0
- package/dist/loop/Loop.d.mts +94 -11
- package/dist/loop/Loop.d.mts.map +1 -1
- package/dist/loop/Loop.mjs +92 -26
- package/dist/loop/Loop.mjs.map +1 -1
- package/dist/loop/Loop.test.mjs +171 -3
- package/dist/loop/Loop.test.mjs.map +1 -1
- package/dist/music-generator/MusicGenerator.d.mts +77 -0
- package/dist/music-generator/MusicGenerator.d.mts.map +1 -0
- package/dist/music-generator/MusicGenerator.mjs +51 -0
- package/dist/music-generator/MusicGenerator.mjs.map +1 -0
- package/dist/music-generator/MusicGenerator.test.d.mts +1 -0
- package/dist/music-generator/MusicGenerator.test.mjs +154 -0
- package/dist/music-generator/MusicGenerator.test.mjs.map +1 -0
- package/dist/observability/Metrics.d.mts +1 -1
- package/dist/observability/Metrics.mjs +1 -1
- package/dist/observability/Metrics.mjs.map +1 -1
- package/dist/speech-synthesizer/SpeechSynthesizer.d.mts +96 -0
- package/dist/speech-synthesizer/SpeechSynthesizer.d.mts.map +1 -0
- package/dist/speech-synthesizer/SpeechSynthesizer.mjs +48 -0
- package/dist/speech-synthesizer/SpeechSynthesizer.mjs.map +1 -0
- package/dist/speech-synthesizer/SpeechSynthesizer.test.d.mts +1 -0
- package/dist/speech-synthesizer/SpeechSynthesizer.test.mjs +112 -0
- package/dist/speech-synthesizer/SpeechSynthesizer.test.mjs.map +1 -0
- package/dist/streaming/JSONL.d.mts +10 -3
- package/dist/streaming/JSONL.d.mts.map +1 -1
- package/dist/streaming/JSONL.mjs +15 -9
- package/dist/streaming/JSONL.mjs.map +1 -1
- package/dist/structured-format/StructuredFormat.d.mts +2 -2
- package/dist/structured-format/StructuredFormat.mjs +9 -1
- package/dist/structured-format/StructuredFormat.mjs.map +1 -1
- package/dist/structured-format/StructuredFormat.test.d.mts +1 -0
- package/dist/structured-format/StructuredFormat.test.mjs +70 -0
- package/dist/structured-format/StructuredFormat.test.mjs.map +1 -0
- package/dist/testing/MockMusicGenerator.d.mts +39 -0
- package/dist/testing/MockMusicGenerator.d.mts.map +1 -0
- package/dist/testing/MockMusicGenerator.mjs +96 -0
- package/dist/testing/MockMusicGenerator.mjs.map +1 -0
- package/dist/testing/MockProvider.d.mts +23 -18
- package/dist/testing/MockProvider.d.mts.map +1 -1
- package/dist/testing/MockProvider.mjs +56 -72
- package/dist/testing/MockProvider.mjs.map +1 -1
- package/dist/testing/MockSpeechSynthesizer.d.mts +37 -0
- package/dist/testing/MockSpeechSynthesizer.d.mts.map +1 -0
- package/dist/testing/MockSpeechSynthesizer.mjs +95 -0
- package/dist/testing/MockSpeechSynthesizer.mjs.map +1 -0
- package/dist/testing/MockTranscriber.d.mts +37 -0
- package/dist/testing/MockTranscriber.d.mts.map +1 -0
- package/dist/testing/MockTranscriber.mjs +77 -0
- package/dist/testing/MockTranscriber.mjs.map +1 -0
- package/dist/tool/HistoryCheck.d.mts +1 -1
- package/dist/tool/Outcome.d.mts +1 -1
- package/dist/tool/Resolvers.d.mts +65 -8
- package/dist/tool/Resolvers.d.mts.map +1 -1
- package/dist/tool/Resolvers.mjs +8 -12
- package/dist/tool/Resolvers.mjs.map +1 -1
- package/dist/tool/Resolvers.test.mjs +6 -5
- package/dist/tool/Resolvers.test.mjs.map +1 -1
- package/dist/tool/Tool.d.mts +2 -2
- package/dist/tool/Tool.mjs +18 -1
- package/dist/tool/Tool.mjs.map +1 -1
- package/dist/tool/Tool.test.d.mts +1 -0
- package/dist/tool/Tool.test.mjs +66 -0
- package/dist/tool/Tool.test.mjs.map +1 -0
- package/dist/tool/Toolkit.d.mts +4 -6
- package/dist/tool/Toolkit.d.mts.map +1 -1
- package/dist/tool/Toolkit.mjs +14 -43
- package/dist/tool/Toolkit.mjs.map +1 -1
- package/dist/transcriber/Transcriber.d.mts +101 -0
- package/dist/transcriber/Transcriber.d.mts.map +1 -0
- package/dist/transcriber/Transcriber.mjs +49 -0
- package/dist/transcriber/Transcriber.mjs.map +1 -0
- package/dist/transcriber/Transcriber.test.d.mts +1 -0
- package/dist/transcriber/Transcriber.test.mjs +130 -0
- package/dist/transcriber/Transcriber.test.mjs.map +1 -0
- package/package.json +37 -1
- package/src/domain/AiError.ts +22 -1
- package/src/domain/Audio.ts +88 -0
- package/src/domain/Items.ts +1 -1
- package/src/domain/Music.ts +121 -0
- package/src/domain/Transcript.ts +83 -0
- package/src/domain/Turn.test.ts +141 -0
- package/src/domain/Turn.ts +50 -43
- package/src/embedding-model/Embedding.ts +23 -0
- package/src/embedding-model/EmbeddingModel.test.ts +92 -0
- package/src/embedding-model/EmbeddingModel.ts +30 -20
- package/src/index.ts +6 -0
- package/src/language-model/LanguageModel.test.ts +170 -0
- package/src/language-model/LanguageModel.ts +64 -1
- package/src/loop/Loop.test.ts +256 -3
- package/src/loop/Loop.ts +225 -49
- package/src/music-generator/MusicGenerator.test.ts +170 -0
- package/src/music-generator/MusicGenerator.ts +123 -0
- package/src/observability/Metrics.ts +1 -1
- package/src/speech-synthesizer/SpeechSynthesizer.test.ts +141 -0
- package/src/speech-synthesizer/SpeechSynthesizer.ts +131 -0
- package/src/streaming/JSONL.ts +16 -13
- package/src/structured-format/StructuredFormat.test.ts +105 -0
- package/src/structured-format/StructuredFormat.ts +14 -1
- package/src/testing/MockMusicGenerator.ts +168 -0
- package/src/testing/MockProvider.ts +126 -105
- package/src/testing/MockSpeechSynthesizer.ts +163 -0
- package/src/testing/MockTranscriber.ts +137 -0
- package/src/tool/Resolvers.test.ts +8 -5
- package/src/tool/Resolvers.ts +17 -19
- package/src/tool/Tool.test.ts +105 -0
- package/src/tool/Tool.ts +20 -0
- package/src/tool/Toolkit.ts +49 -50
- package/src/transcriber/Transcriber.test.ts +125 -0
- package/src/transcriber/Transcriber.ts +127 -0
- package/dist/StructuredFormat-BWq5Hd1O.d.mts.map +0 -1
- package/dist/Tool-DjVufH7i.d.mts.map +0 -1
- package/dist/Turn-OPaILVIB.d.mts +0 -194
- package/dist/Turn-OPaILVIB.d.mts.map +0 -1
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import { Context, Effect, Function, Stream } from "effect"
|
|
2
|
+
import * as AiError from "../domain/AiError.js"
|
|
3
|
+
import type { AudioChunk } from "../domain/Audio.js"
|
|
4
|
+
import type {
|
|
5
|
+
CommonGenerateMusicRequest,
|
|
6
|
+
CommonStreamGenerateMusicRequest,
|
|
7
|
+
MusicResult,
|
|
8
|
+
MusicSessionInput,
|
|
9
|
+
} from "../domain/Music.js"
|
|
10
|
+
|
|
11
|
+
export type {
|
|
12
|
+
CommonGenerateMusicRequest,
|
|
13
|
+
CommonStreamGenerateMusicRequest,
|
|
14
|
+
MusicResult,
|
|
15
|
+
MusicSessionInput,
|
|
16
|
+
WeightedPrompt,
|
|
17
|
+
} from "../domain/Music.js"
|
|
18
|
+
|
|
19
|
+
export type MusicGeneratorService = {
|
|
20
|
+
/**
|
|
21
|
+
* One-shot. Prompt in, full audio bytes out. Universally supported.
|
|
22
|
+
* Async/poll-based providers (Suno, Mureka) hide their poll loop
|
|
23
|
+
* inside the adapter — caller still sees a single `Effect`.
|
|
24
|
+
*/
|
|
25
|
+
readonly generate: (
|
|
26
|
+
request: CommonGenerateMusicRequest,
|
|
27
|
+
) => Effect.Effect<MusicResult, AiError.AiError>
|
|
28
|
+
/**
|
|
29
|
+
* Prompt in, audio chunks streamed out. Providers without a native
|
|
30
|
+
* chunked-output endpoint (Lyria 3 sync, Mureka, MiniMax, Stable
|
|
31
|
+
* Audio) emulate this by calling `generate` and emitting a single
|
|
32
|
+
* `AudioChunk` — first-class, no `Unsupported`.
|
|
33
|
+
*/
|
|
34
|
+
readonly streamGeneration: (
|
|
35
|
+
request: CommonStreamGenerateMusicRequest,
|
|
36
|
+
) => Stream.Stream<AudioChunk, AiError.AiError>
|
|
37
|
+
/**
|
|
38
|
+
* Bidirectional session: a `Stream` of prompt-or-config updates flows
|
|
39
|
+
* in, a `Stream` of audio chunks flows out. The session WS / RPC is
|
|
40
|
+
* acquired on first pull and released when the output stream is
|
|
41
|
+
* finalized via `Stream.scoped`.
|
|
42
|
+
*
|
|
43
|
+
* Gated by the `MusicInteractiveSession` capability marker on the
|
|
44
|
+
* top-level helper — providers without bidirectional support don't
|
|
45
|
+
* ship the marker, so calls fail at `Effect.provide` with a type
|
|
46
|
+
* error.
|
|
47
|
+
*/
|
|
48
|
+
readonly streamGenerationFrom: <E, R>(
|
|
49
|
+
input: Stream.Stream<MusicSessionInput, E, R>,
|
|
50
|
+
request: CommonStreamGenerateMusicRequest,
|
|
51
|
+
) => Stream.Stream<AudioChunk, AiError.AiError | E, R>
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export class MusicGenerator extends Context.Service<MusicGenerator, MusicGeneratorService>()(
|
|
55
|
+
"@betalyra/effect-uai/MusicGenerator",
|
|
56
|
+
) {}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Capability marker — provided by provider layers whose
|
|
60
|
+
* `streamGenerationFrom` is wired up at the wire level. Currently only
|
|
61
|
+
* Lyria RealTime (via the BidiGenerateMusic WebSocket) ships it.
|
|
62
|
+
* Calling `streamGenerationFrom` while only a non-interactive Layer is
|
|
63
|
+
* in scope fails at `Effect.provide` with a type error.
|
|
64
|
+
*
|
|
65
|
+
* Phantom — the value is `void`; providers register with
|
|
66
|
+
* `Layer.succeed(MusicInteractiveSession, undefined)`.
|
|
67
|
+
*/
|
|
68
|
+
export class MusicInteractiveSession extends Context.Service<MusicInteractiveSession, void>()(
|
|
69
|
+
"@betalyra/effect-uai/capability/MusicInteractiveSession",
|
|
70
|
+
) {}
|
|
71
|
+
|
|
72
|
+
/** One-shot generation. */
|
|
73
|
+
export const generate = (
|
|
74
|
+
request: CommonGenerateMusicRequest,
|
|
75
|
+
): Effect.Effect<MusicResult, AiError.AiError, MusicGenerator> =>
|
|
76
|
+
Effect.flatMap(MusicGenerator.asEffect(), (s) => s.generate(request))
|
|
77
|
+
|
|
78
|
+
/** Prompt in, audio chunks out. */
|
|
79
|
+
export const streamGeneration = (
|
|
80
|
+
request: CommonStreamGenerateMusicRequest,
|
|
81
|
+
): Stream.Stream<AudioChunk, AiError.AiError, MusicGenerator> =>
|
|
82
|
+
Stream.unwrap(Effect.map(MusicGenerator.asEffect(), (s) => s.streamGeneration(request)))
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Bidirectional generation. Dual-arity: pipeable (data-last) and
|
|
86
|
+
* direct (data-first). Requires `MusicInteractiveSession` in R —
|
|
87
|
+
* providers without bidirectional support are a type error at provide
|
|
88
|
+
* time.
|
|
89
|
+
*
|
|
90
|
+
* @example
|
|
91
|
+
* ```ts
|
|
92
|
+
* const audio = Stream.fromIterable([
|
|
93
|
+
* Music.promptsInput([{ text: "minimal techno", weight: 1.0 }]),
|
|
94
|
+
* Music.configInput({ bpm: 124 }),
|
|
95
|
+
* ]).pipe(
|
|
96
|
+
* MusicGenerator.streamGenerationFrom({ model: "lyria-realtime-001", prompts: "" }),
|
|
97
|
+
* )
|
|
98
|
+
* ```
|
|
99
|
+
*/
|
|
100
|
+
export const streamGenerationFrom: {
|
|
101
|
+
(
|
|
102
|
+
request: CommonStreamGenerateMusicRequest,
|
|
103
|
+
): <E, R>(
|
|
104
|
+
input: Stream.Stream<MusicSessionInput, E, R>,
|
|
105
|
+
) => Stream.Stream<AudioChunk, AiError.AiError | E, R | MusicGenerator | MusicInteractiveSession>
|
|
106
|
+
<E, R>(
|
|
107
|
+
input: Stream.Stream<MusicSessionInput, E, R>,
|
|
108
|
+
request: CommonStreamGenerateMusicRequest,
|
|
109
|
+
): Stream.Stream<AudioChunk, AiError.AiError | E, R | MusicGenerator | MusicInteractiveSession>
|
|
110
|
+
} = Function.dual(
|
|
111
|
+
2,
|
|
112
|
+
<E, R>(
|
|
113
|
+
input: Stream.Stream<MusicSessionInput, E, R>,
|
|
114
|
+
request: CommonStreamGenerateMusicRequest,
|
|
115
|
+
) =>
|
|
116
|
+
Stream.unwrap(
|
|
117
|
+
Effect.gen(function* () {
|
|
118
|
+
const s = yield* MusicGenerator.asEffect()
|
|
119
|
+
yield* MusicInteractiveSession.asEffect()
|
|
120
|
+
return s.streamGenerationFrom(input, request)
|
|
121
|
+
}),
|
|
122
|
+
),
|
|
123
|
+
)
|
|
@@ -52,7 +52,7 @@ export type RatePoint<A> = {
|
|
|
52
52
|
* The weight is the unit you care about - bytes, tokens, error count, etc.
|
|
53
53
|
* For tokens-per-second on `TurnEvent`, pass:
|
|
54
54
|
*
|
|
55
|
-
* `(d) => d.
|
|
55
|
+
* `(d) => d._tag === "TextDelta" ? countTokens(d.text) : 0`
|
|
56
56
|
*
|
|
57
57
|
* Use any tokenizer you like; the library does not ship one.
|
|
58
58
|
*/
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import { Effect, Stream } from "effect"
|
|
2
|
+
import { describe, expect, expectTypeOf, it } from "vitest"
|
|
3
|
+
import type * as AiError from "../domain/AiError.js"
|
|
4
|
+
import type { AudioBlob, AudioChunk, AudioFormat } from "../domain/Audio.js"
|
|
5
|
+
import * as MockSpeechSynthesizer from "../testing/MockSpeechSynthesizer.js"
|
|
6
|
+
import * as SpeechSynthesizer from "./SpeechSynthesizer.js"
|
|
7
|
+
|
|
8
|
+
const pcmFormat: AudioFormat = {
|
|
9
|
+
container: "raw",
|
|
10
|
+
encoding: "pcm_s16le",
|
|
11
|
+
sampleRate: 24000,
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const blob: AudioBlob = {
|
|
15
|
+
format: pcmFormat,
|
|
16
|
+
bytes: new Uint8Array([0xde, 0xad, 0xbe, 0xef]),
|
|
17
|
+
durationSeconds: 0.5,
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
const chunk = (n: number): AudioChunk => ({ bytes: new Uint8Array([n]) })
|
|
21
|
+
|
|
22
|
+
describe("SpeechSynthesizer.synthesize", () => {
|
|
23
|
+
it("returns the scripted AudioBlob", async () => {
|
|
24
|
+
const mock = MockSpeechSynthesizer.layer({ blobs: [blob] })
|
|
25
|
+
const program = SpeechSynthesizer.synthesize({
|
|
26
|
+
text: "hi",
|
|
27
|
+
model: "mock-tts",
|
|
28
|
+
voiceId: "stock-voice",
|
|
29
|
+
})
|
|
30
|
+
const result = await Effect.runPromise(program.pipe(Effect.provide(mock.layer)))
|
|
31
|
+
expect(result.bytes).toEqual(blob.bytes)
|
|
32
|
+
expect(result.durationSeconds).toBe(0.5)
|
|
33
|
+
})
|
|
34
|
+
})
|
|
35
|
+
|
|
36
|
+
describe("SpeechSynthesizer.streamSynthesis", () => {
|
|
37
|
+
it("emits scripted chunks for full-text-in streaming", async () => {
|
|
38
|
+
const mock = MockSpeechSynthesizer.layer({
|
|
39
|
+
streamSynthesisChunks: [[chunk(1), chunk(2), chunk(3)]],
|
|
40
|
+
})
|
|
41
|
+
const program = Stream.runCollect(
|
|
42
|
+
SpeechSynthesizer.streamSynthesis({
|
|
43
|
+
text: "hi",
|
|
44
|
+
model: "mock-tts",
|
|
45
|
+
voiceId: "stock-voice",
|
|
46
|
+
}),
|
|
47
|
+
)
|
|
48
|
+
const out = await Effect.runPromise(program.pipe(Effect.provide(mock.layer)))
|
|
49
|
+
expect(out.map((c) => Array.from(c.bytes))).toEqual([[1], [2], [3]])
|
|
50
|
+
})
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
describe("SpeechSynthesizer capability marker (compile-time)", () => {
|
|
54
|
+
const ssfReq: SpeechSynthesizer.CommonStreamSynthesizeRequest = {
|
|
55
|
+
model: "mock-tts",
|
|
56
|
+
voiceId: "v",
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
it("requires `TtsIncrementalText` on the R channel of streamSynthesisFrom", () => {
|
|
60
|
+
const tokens: Stream.Stream<string> = Stream.fromIterable(["a"])
|
|
61
|
+
const audio = tokens.pipe(SpeechSynthesizer.streamSynthesisFrom(ssfReq))
|
|
62
|
+
expectTypeOf(audio).toEqualTypeOf<
|
|
63
|
+
Stream.Stream<
|
|
64
|
+
AudioChunk,
|
|
65
|
+
AiError.AiError,
|
|
66
|
+
SpeechSynthesizer.SpeechSynthesizer | SpeechSynthesizer.TtsIncrementalText
|
|
67
|
+
>
|
|
68
|
+
>()
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
it("does NOT require `TtsIncrementalText` for sync `synthesize`", () => {
|
|
72
|
+
const eff = SpeechSynthesizer.synthesize({ text: "hi", model: "m", voiceId: "v" })
|
|
73
|
+
expectTypeOf(eff).toEqualTypeOf<
|
|
74
|
+
Effect.Effect<AudioBlob, AiError.AiError, SpeechSynthesizer.SpeechSynthesizer>
|
|
75
|
+
>()
|
|
76
|
+
})
|
|
77
|
+
|
|
78
|
+
it("does NOT require `TtsIncrementalText` for full-text `streamSynthesis`", () => {
|
|
79
|
+
const audio = SpeechSynthesizer.streamSynthesis({ text: "hi", model: "m", voiceId: "v" })
|
|
80
|
+
expectTypeOf(audio).toEqualTypeOf<
|
|
81
|
+
Stream.Stream<AudioChunk, AiError.AiError, SpeechSynthesizer.SpeechSynthesizer>
|
|
82
|
+
>()
|
|
83
|
+
})
|
|
84
|
+
|
|
85
|
+
it("a layer without the marker leaves `TtsIncrementalText` unsatisfied in R", () => {
|
|
86
|
+
const noMarker = MockSpeechSynthesizer.layerWithoutIncremental({})
|
|
87
|
+
const tokens: Stream.Stream<string> = Stream.fromIterable(["a"])
|
|
88
|
+
const audio = tokens.pipe(SpeechSynthesizer.streamSynthesisFrom(ssfReq))
|
|
89
|
+
const program = Stream.runDrain(audio).pipe(Effect.provide(noMarker.layer))
|
|
90
|
+
// `SpeechSynthesizer` is provided by the layer; `TtsIncrementalText` is not,
|
|
91
|
+
// so it remains in R — calling `Effect.runPromise(program)` would be a type
|
|
92
|
+
// error because runPromise requires `R = never`.
|
|
93
|
+
expectTypeOf(program).toEqualTypeOf<
|
|
94
|
+
Effect.Effect<void, AiError.AiError, SpeechSynthesizer.TtsIncrementalText>
|
|
95
|
+
>()
|
|
96
|
+
})
|
|
97
|
+
|
|
98
|
+
it("a full layer (with marker) clears R to never", () => {
|
|
99
|
+
const fullMock = MockSpeechSynthesizer.layer({
|
|
100
|
+
streamSynthesisFromChunks: [[]],
|
|
101
|
+
})
|
|
102
|
+
const tokens: Stream.Stream<string> = Stream.fromIterable(["a"])
|
|
103
|
+
const audio = tokens.pipe(SpeechSynthesizer.streamSynthesisFrom(ssfReq))
|
|
104
|
+
const program = Stream.runDrain(audio).pipe(Effect.provide(fullMock.layer))
|
|
105
|
+
expectTypeOf(program).toEqualTypeOf<Effect.Effect<void, AiError.AiError, never>>()
|
|
106
|
+
})
|
|
107
|
+
})
|
|
108
|
+
|
|
109
|
+
describe("SpeechSynthesizer.streamSynthesisFrom", () => {
|
|
110
|
+
const ssfReq: SpeechSynthesizer.CommonStreamSynthesizeRequest = {
|
|
111
|
+
model: "mock-tts",
|
|
112
|
+
voiceId: "stock-voice",
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
it("pipes an LLM-style text stream into audio chunks", async () => {
|
|
116
|
+
const mock = MockSpeechSynthesizer.layer({
|
|
117
|
+
streamSynthesisFromChunks: [[chunk(10), chunk(20)]],
|
|
118
|
+
})
|
|
119
|
+
const tokens = Stream.fromIterable(["Hello, ", "world."])
|
|
120
|
+
const audio = tokens.pipe(SpeechSynthesizer.streamSynthesisFrom(ssfReq))
|
|
121
|
+
const out = await Effect.runPromise(Stream.runCollect(audio).pipe(Effect.provide(mock.layer)))
|
|
122
|
+
expect(out.map((c) => Array.from(c.bytes))).toEqual([[10], [20]])
|
|
123
|
+
})
|
|
124
|
+
|
|
125
|
+
it("records the request on the streamSynthesisFrom call channel", async () => {
|
|
126
|
+
const mock = MockSpeechSynthesizer.layer({
|
|
127
|
+
streamSynthesisFromChunks: [[chunk(42)]],
|
|
128
|
+
})
|
|
129
|
+
const program = Effect.gen(function* () {
|
|
130
|
+
yield* Stream.runDrain(
|
|
131
|
+
Stream.fromIterable(["x"]).pipe(SpeechSynthesizer.streamSynthesisFrom(ssfReq)),
|
|
132
|
+
)
|
|
133
|
+
return yield* mock.recorder
|
|
134
|
+
})
|
|
135
|
+
const rec = await Effect.runPromise(program.pipe(Effect.provide(mock.layer)))
|
|
136
|
+
expect(rec.streamSynthesisFromCalls.length).toBe(1)
|
|
137
|
+
expect(rec.streamSynthesisFromCalls[0]!.voiceId).toBe("stock-voice")
|
|
138
|
+
expect(rec.synthesizeCalls.length).toBe(0)
|
|
139
|
+
expect(rec.streamSynthesisCalls.length).toBe(0)
|
|
140
|
+
})
|
|
141
|
+
})
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
import { Context, Effect, Function, Stream } from "effect"
|
|
2
|
+
import * as AiError from "../domain/AiError.js"
|
|
3
|
+
import type { AudioBlob, AudioChunk, AudioFormat } from "../domain/Audio.js"
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Cross-provider synthesis request. Provider-specific extensions
|
|
7
|
+
* (ElevenLabs `stability` / `similarity_boost`, Cartesia `emotion`,
|
|
8
|
+
* MiniMax `vol` / `pitch`, Azure SSML style tags) live on each
|
|
9
|
+
* provider's typed request which extends this and narrows `model` and
|
|
10
|
+
* `voiceId`.
|
|
11
|
+
*/
|
|
12
|
+
export type CommonSynthesizeRequest = {
|
|
13
|
+
readonly text: string
|
|
14
|
+
/** Model identifier. Each provider narrows. */
|
|
15
|
+
readonly model: string
|
|
16
|
+
/**
|
|
17
|
+
* Voice identifier. Per-provider request types narrow this to a
|
|
18
|
+
* typed literal union of stock voices + `(string & {})` escape for
|
|
19
|
+
* custom cloned voice IDs. Providers without custom-voice support
|
|
20
|
+
* (OpenAI, Deepgram Aura, AWS Polly) narrow to the stock-only union.
|
|
21
|
+
*/
|
|
22
|
+
readonly voiceId: string
|
|
23
|
+
readonly outputFormat?: AudioFormat
|
|
24
|
+
readonly speed?: number
|
|
25
|
+
readonly languageCode?: string
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Incremental-synthesis request — text arrives as `Stream<string>`.
|
|
30
|
+
* Gated by the `TtsIncrementalText` capability marker; only providers
|
|
31
|
+
* that ship the marker can be used.
|
|
32
|
+
*
|
|
33
|
+
* Multi-context features (Cartesia `context_id`, ElevenLabs `multi-
|
|
34
|
+
* stream-input`) are NOT exposed here — one logical utterance per
|
|
35
|
+
* call. Provider extensions can expose `forkContext` for that.
|
|
36
|
+
*/
|
|
37
|
+
export type CommonStreamSynthesizeRequest = Omit<CommonSynthesizeRequest, "text">
|
|
38
|
+
|
|
39
|
+
export type SpeechSynthesizerService = {
|
|
40
|
+
/** One-shot. Full text in, full audio bytes out. Universally supported. */
|
|
41
|
+
readonly synthesize: (
|
|
42
|
+
request: CommonSynthesizeRequest,
|
|
43
|
+
) => Effect.Effect<AudioBlob, AiError.AiError>
|
|
44
|
+
/**
|
|
45
|
+
* Full text in, audio chunks streamed out (chunked HTTP). Universally
|
|
46
|
+
* supported across providers that offer any streaming TTS at all.
|
|
47
|
+
*/
|
|
48
|
+
readonly streamSynthesis: (
|
|
49
|
+
request: CommonSynthesizeRequest,
|
|
50
|
+
) => Stream.Stream<AudioChunk, AiError.AiError>
|
|
51
|
+
/**
|
|
52
|
+
* Incremental text in (as a Stream), audio chunks streamed out. The
|
|
53
|
+
* underlying WS connection is acquired on first pull and released
|
|
54
|
+
* when the output stream is finalized via `Stream.scoped`.
|
|
55
|
+
*
|
|
56
|
+
* Gated by the `TtsIncrementalText` capability marker on the top-
|
|
57
|
+
* level helper — providers without WS-style incremental input don't
|
|
58
|
+
* ship the marker, so calls fail at `Effect.provide` with a type
|
|
59
|
+
* error.
|
|
60
|
+
*/
|
|
61
|
+
readonly streamSynthesisFrom: <E, R>(
|
|
62
|
+
textIn: Stream.Stream<string, E, R>,
|
|
63
|
+
request: CommonStreamSynthesizeRequest,
|
|
64
|
+
) => Stream.Stream<AudioChunk, AiError.AiError | E, R>
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export class SpeechSynthesizer extends Context.Service<
|
|
68
|
+
SpeechSynthesizer,
|
|
69
|
+
SpeechSynthesizerService
|
|
70
|
+
>()("@betalyra/effect-uai/SpeechSynthesizer") {}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Capability marker — provided by provider layers whose
|
|
74
|
+
* `streamSynthesisFrom` is wired up at the wire level. OpenAI, Azure
|
|
75
|
+
* (wire), and AWS Polly non-Generative do not ship it. Calling
|
|
76
|
+
* `streamSynthesisFrom` while only one of those Layers is in scope
|
|
77
|
+
* fails at `Effect.provide` with a type error.
|
|
78
|
+
*
|
|
79
|
+
* Phantom — the value is `void`; providers register with
|
|
80
|
+
* `Layer.succeed(TtsIncrementalText, undefined)`.
|
|
81
|
+
*/
|
|
82
|
+
export class TtsIncrementalText extends Context.Service<TtsIncrementalText, void>()(
|
|
83
|
+
"@betalyra/effect-uai/capability/TtsIncrementalText",
|
|
84
|
+
) {}
|
|
85
|
+
|
|
86
|
+
/** One-shot synthesis. */
|
|
87
|
+
export const synthesize = (
|
|
88
|
+
request: CommonSynthesizeRequest,
|
|
89
|
+
): Effect.Effect<AudioBlob, AiError.AiError, SpeechSynthesizer> =>
|
|
90
|
+
Effect.flatMap(SpeechSynthesizer.asEffect(), (s) => s.synthesize(request))
|
|
91
|
+
|
|
92
|
+
/** Full text in, audio chunks out. */
|
|
93
|
+
export const streamSynthesis = (
|
|
94
|
+
request: CommonSynthesizeRequest,
|
|
95
|
+
): Stream.Stream<AudioChunk, AiError.AiError, SpeechSynthesizer> =>
|
|
96
|
+
Stream.unwrap(Effect.map(SpeechSynthesizer.asEffect(), (s) => s.streamSynthesis(request)))
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Incremental synthesis. Dual-arity: pipeable (data-last) and direct
|
|
100
|
+
* (data-first). Requires `TtsIncrementalText` in R — providers without
|
|
101
|
+
* incremental-text-in support are a type error at provide time.
|
|
102
|
+
*
|
|
103
|
+
* @example
|
|
104
|
+
* ```ts
|
|
105
|
+
* const audio = LanguageModel.streamTurn(turnReq).pipe(
|
|
106
|
+
* Stream.filterMap(Turn.toTextDelta),
|
|
107
|
+
* SpeechSynthesizer.streamSynthesisFrom(synthReq),
|
|
108
|
+
* )
|
|
109
|
+
* ```
|
|
110
|
+
*/
|
|
111
|
+
export const streamSynthesisFrom: {
|
|
112
|
+
(
|
|
113
|
+
request: CommonStreamSynthesizeRequest,
|
|
114
|
+
): <E, R>(
|
|
115
|
+
textIn: Stream.Stream<string, E, R>,
|
|
116
|
+
) => Stream.Stream<AudioChunk, AiError.AiError | E, R | SpeechSynthesizer | TtsIncrementalText>
|
|
117
|
+
<E, R>(
|
|
118
|
+
textIn: Stream.Stream<string, E, R>,
|
|
119
|
+
request: CommonStreamSynthesizeRequest,
|
|
120
|
+
): Stream.Stream<AudioChunk, AiError.AiError | E, R | SpeechSynthesizer | TtsIncrementalText>
|
|
121
|
+
} = Function.dual(
|
|
122
|
+
2,
|
|
123
|
+
<E, R>(textIn: Stream.Stream<string, E, R>, request: CommonStreamSynthesizeRequest) =>
|
|
124
|
+
Stream.unwrap(
|
|
125
|
+
Effect.gen(function* () {
|
|
126
|
+
const s = yield* SpeechSynthesizer.asEffect()
|
|
127
|
+
yield* TtsIncrementalText.asEffect()
|
|
128
|
+
return s.streamSynthesisFrom(textIn, request)
|
|
129
|
+
}),
|
|
130
|
+
),
|
|
131
|
+
)
|
package/src/streaming/JSONL.ts
CHANGED
|
@@ -61,23 +61,26 @@ export const fromBytes = <E, R>(
|
|
|
61
61
|
* decode errors both surface as a `JsonParseError` so callers can `catchTag`
|
|
62
62
|
* uniformly.
|
|
63
63
|
*/
|
|
64
|
-
export const parse =
|
|
65
|
-
|
|
66
|
-
<E, R>(self: Stream.Stream<string, E, R>): Stream.Stream<A, JsonParseError | E, R> =>
|
|
64
|
+
export const parse = <A, I>(schema: Schema.Codec<A, I>) => {
|
|
65
|
+
const decode = Schema.decodeUnknownEffect(Schema.fromJsonString(schema))
|
|
66
|
+
return <E, R>(self: Stream.Stream<string, E, R>): Stream.Stream<A, JsonParseError | E, R> =>
|
|
67
67
|
self.pipe(
|
|
68
68
|
Stream.mapEffect((line) =>
|
|
69
|
-
Effect.
|
|
70
|
-
try: () => JSON.parse(line) as unknown,
|
|
71
|
-
catch: (cause) => new JsonParseError({ line, cause }),
|
|
72
|
-
}).pipe(
|
|
73
|
-
Effect.flatMap((value) =>
|
|
74
|
-
Schema.decodeUnknownEffect(schema)(value).pipe(
|
|
75
|
-
Effect.mapError((cause) => new JsonParseError({ line, cause })),
|
|
76
|
-
),
|
|
77
|
-
),
|
|
78
|
-
),
|
|
69
|
+
decode(line).pipe(Effect.mapError((cause) => new JsonParseError({ line, cause }))),
|
|
79
70
|
),
|
|
80
71
|
)
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const decodeUnknownFromJson = Schema.decodeUnknownEffect(Schema.fromJsonString(Schema.Unknown))
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Best-effort parse of a single JSON frame. Returns the parsed value or
|
|
78
|
+
* `undefined` on malformed input. Realtime WS adapters use this to skip
|
|
79
|
+
* non-JSON or partially-received frames silently rather than fail the
|
|
80
|
+
* entire session over one bad frame.
|
|
81
|
+
*/
|
|
82
|
+
export const parseSafe = (raw: string): Effect.Effect<unknown> =>
|
|
83
|
+
decodeUnknownFromJson(raw).pipe(Effect.orElseSucceed(() => undefined))
|
|
81
84
|
|
|
82
85
|
const encoder = new TextEncoder()
|
|
83
86
|
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import { Effect, Exit, Filter, Result, Schema, Stream } from "effect"
|
|
2
|
+
import { describe, expect, it } from "vitest"
|
|
3
|
+
import {
|
|
4
|
+
decodeJsonLines,
|
|
5
|
+
decodeJsonLinesRecoverable,
|
|
6
|
+
fromEffectSchema,
|
|
7
|
+
JsonParseError,
|
|
8
|
+
StructuredDecodeError,
|
|
9
|
+
} from "./StructuredFormat.js"
|
|
10
|
+
|
|
11
|
+
const Item = Schema.Struct({ id: Schema.Number, name: Schema.String })
|
|
12
|
+
type Item = typeof Item.Type
|
|
13
|
+
const itemFormat = fromEffectSchema(Item, { name: "Item" })
|
|
14
|
+
|
|
15
|
+
const linesOf = (...xs: ReadonlyArray<string>): Stream.Stream<string> => Stream.fromIterable(xs)
|
|
16
|
+
|
|
17
|
+
const collect = <A, E>(s: Stream.Stream<A, E>) =>
|
|
18
|
+
Effect.runPromise(Stream.runCollect(s).pipe(Effect.map((c) => Array.from(c))))
|
|
19
|
+
|
|
20
|
+
describe("decodeJsonLinesRecoverable", () => {
|
|
21
|
+
it("yields a Success for each well-formed line", async () => {
|
|
22
|
+
const out = await collect(
|
|
23
|
+
linesOf('{"id":1,"name":"a"}', '{"id":2,"name":"b"}').pipe(
|
|
24
|
+
decodeJsonLinesRecoverable(itemFormat),
|
|
25
|
+
),
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
expect(out).toHaveLength(2)
|
|
29
|
+
expect(Result.isSuccess(out[0]!)).toBe(true)
|
|
30
|
+
expect(Result.isSuccess(out[1]!)).toBe(true)
|
|
31
|
+
if (Result.isSuccess(out[0]!) && Result.isSuccess(out[1]!)) {
|
|
32
|
+
expect(out[0]!.success).toEqual<Item>({ id: 1, name: "a" })
|
|
33
|
+
expect(out[1]!.success).toEqual<Item>({ id: 2, name: "b" })
|
|
34
|
+
}
|
|
35
|
+
})
|
|
36
|
+
|
|
37
|
+
it("yields a Failure for a malformed JSON line WITHOUT aborting the stream", async () => {
|
|
38
|
+
const out = await collect(
|
|
39
|
+
linesOf('{"id":1,"name":"a"}', "not json at all", '{"id":3,"name":"c"}').pipe(
|
|
40
|
+
decodeJsonLinesRecoverable(itemFormat),
|
|
41
|
+
),
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
expect(out).toHaveLength(3)
|
|
45
|
+
expect(Result.isSuccess(out[0]!)).toBe(true)
|
|
46
|
+
expect(Result.isFailure(out[1]!)).toBe(true)
|
|
47
|
+
expect(Result.isSuccess(out[2]!)).toBe(true)
|
|
48
|
+
if (Result.isFailure(out[1]!)) {
|
|
49
|
+
expect(out[1]!.failure).toBeInstanceOf(JsonParseError)
|
|
50
|
+
}
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
it("yields a Failure for a schema-invalid line without aborting", async () => {
|
|
54
|
+
const out = await collect(
|
|
55
|
+
linesOf(
|
|
56
|
+
'{"id":1,"name":"a"}',
|
|
57
|
+
'{"id":"not-a-number","name":"b"}', // schema fail
|
|
58
|
+
'{"id":3,"name":"c"}',
|
|
59
|
+
).pipe(decodeJsonLinesRecoverable(itemFormat)),
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
expect(out).toHaveLength(3)
|
|
63
|
+
expect(Result.isSuccess(out[0]!)).toBe(true)
|
|
64
|
+
expect(Result.isFailure(out[1]!)).toBe(true)
|
|
65
|
+
expect(Result.isSuccess(out[2]!)).toBe(true)
|
|
66
|
+
if (Result.isFailure(out[1]!)) {
|
|
67
|
+
expect(out[1]!.failure).toBeInstanceOf(StructuredDecodeError)
|
|
68
|
+
}
|
|
69
|
+
})
|
|
70
|
+
|
|
71
|
+
it("propagates upstream errors normally (only DECODE failures are lifted into Result)", async () => {
|
|
72
|
+
const boom = new Error("upstream broke")
|
|
73
|
+
const stream = Stream.concat(linesOf('{"id":1,"name":"a"}'), Stream.fail(boom)).pipe(
|
|
74
|
+
decodeJsonLinesRecoverable(itemFormat),
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
const exit = await Effect.runPromise(Effect.exit(Stream.runCollect(stream)))
|
|
78
|
+
expect(Exit.isFailure(exit)).toBe(true)
|
|
79
|
+
})
|
|
80
|
+
|
|
81
|
+
it("composes with filter-success / log-and-continue", async () => {
|
|
82
|
+
const out = await collect(
|
|
83
|
+
linesOf('{"id":1,"name":"a"}', "garbage", '{"id":2,"name":"b"}').pipe(
|
|
84
|
+
decodeJsonLinesRecoverable(itemFormat),
|
|
85
|
+
Stream.filterMap(Filter.fromPredicateOption(Result.getSuccess)),
|
|
86
|
+
),
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
expect(out).toEqual<Array<Item>>([
|
|
90
|
+
{ id: 1, name: "a" },
|
|
91
|
+
{ id: 2, name: "b" },
|
|
92
|
+
])
|
|
93
|
+
})
|
|
94
|
+
})
|
|
95
|
+
|
|
96
|
+
describe("decodeJsonLines (fail-fast, sanity)", () => {
|
|
97
|
+
it("aborts the stream on the first bad line", async () => {
|
|
98
|
+
const stream = linesOf('{"id":1,"name":"a"}', "garbage", '{"id":3,"name":"c"}').pipe(
|
|
99
|
+
decodeJsonLines(itemFormat),
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
const exit = await Effect.runPromise(Effect.exit(Stream.runCollect(stream)))
|
|
103
|
+
expect(Exit.isFailure(exit)).toBe(true)
|
|
104
|
+
})
|
|
105
|
+
})
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { StandardJSONSchemaV1, StandardSchemaV1 } from "@standard-schema/spec"
|
|
2
|
-
import { Data, Effect, Match, Schema, Stream, pipe } from "effect"
|
|
2
|
+
import { Data, Effect, Match, Result, Schema, Stream, pipe } from "effect"
|
|
3
3
|
|
|
4
4
|
// ---------------------------------------------------------------------------
|
|
5
5
|
// Types
|
|
@@ -158,3 +158,16 @@ export const decodeJsonLines =
|
|
|
158
158
|
self: Stream.Stream<string, E, R>,
|
|
159
159
|
): Stream.Stream<A, E | JsonParseError | StructuredDecodeError, R> =>
|
|
160
160
|
self.pipe(Stream.mapEffect(parseJson(format)))
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* Like {@link decodeJsonLines}, but each line yields a `Result` instead of
|
|
164
|
+
* failing the stream. Use when one bad line shouldn't abort the rest —
|
|
165
|
+
* log-and-continue, or partial-recovery with a corrective re-prompt.
|
|
166
|
+
* Upstream errors (the input stream's own `E`) still propagate normally.
|
|
167
|
+
*/
|
|
168
|
+
export const decodeJsonLinesRecoverable =
|
|
169
|
+
<A>(format: StructuredFormat<A>) =>
|
|
170
|
+
<E, R>(
|
|
171
|
+
self: Stream.Stream<string, E, R>,
|
|
172
|
+
): Stream.Stream<Result.Result<A, JsonParseError | StructuredDecodeError>, E, R> =>
|
|
173
|
+
self.pipe(Stream.mapEffect((line) => Effect.result(parseJson(format)(line))))
|