@effect-uai/core 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{AiError-CBuPHVKA.d.mts → AiError-CAX_48RU.d.mts} +27 -5
- package/dist/{AiError-CBuPHVKA.d.mts.map → AiError-CAX_48RU.d.mts.map} +1 -1
- package/dist/Audio-BfCTGnH3.d.mts +61 -0
- package/dist/Audio-BfCTGnH3.d.mts.map +1 -0
- package/dist/{Image-BZmKfIdq.d.mts → Image-HNmMpMTh.d.mts} +1 -1
- package/dist/{Image-BZmKfIdq.d.mts.map → Image-HNmMpMTh.d.mts.map} +1 -1
- package/dist/{Items-CB8Bo3FI.d.mts → Items-DqbaJoz7.d.mts} +5 -5
- package/dist/{Items-CB8Bo3FI.d.mts.map → Items-DqbaJoz7.d.mts.map} +1 -1
- package/dist/{StructuredFormat-BWq5Hd1O.d.mts → StructuredFormat-BbN4dosH.d.mts} +11 -4
- package/dist/StructuredFormat-BbN4dosH.d.mts.map +1 -0
- package/dist/{Tool-DjVufH7i.d.mts → Tool-Y0__Py1H.d.mts} +20 -4
- package/dist/Tool-Y0__Py1H.d.mts.map +1 -0
- package/dist/Turn-ChbL2foc.d.mts +388 -0
- package/dist/Turn-ChbL2foc.d.mts.map +1 -0
- package/dist/domain/AiError.d.mts +2 -2
- package/dist/domain/AiError.mjs +19 -3
- package/dist/domain/AiError.mjs.map +1 -1
- package/dist/domain/Audio.d.mts +2 -0
- package/dist/domain/Audio.mjs +14 -0
- package/dist/domain/Audio.mjs.map +1 -0
- package/dist/domain/Image.d.mts +1 -1
- package/dist/domain/Items.d.mts +1 -1
- package/dist/domain/Items.mjs +1 -1
- package/dist/domain/Items.mjs.map +1 -1
- package/dist/domain/Music.d.mts +116 -0
- package/dist/domain/Music.d.mts.map +1 -0
- package/dist/domain/Music.mjs +29 -0
- package/dist/domain/Music.mjs.map +1 -0
- package/dist/domain/Transcript.d.mts +95 -0
- package/dist/domain/Transcript.d.mts.map +1 -0
- package/dist/domain/Transcript.mjs +22 -0
- package/dist/domain/Transcript.mjs.map +1 -0
- package/dist/domain/Turn.d.mts +2 -2
- package/dist/domain/Turn.mjs +22 -4
- package/dist/domain/Turn.mjs.map +1 -1
- package/dist/domain/Turn.test.d.mts +1 -0
- package/dist/domain/Turn.test.mjs +136 -0
- package/dist/domain/Turn.test.mjs.map +1 -0
- package/dist/embedding-model/Embedding.d.mts +15 -3
- package/dist/embedding-model/Embedding.d.mts.map +1 -1
- package/dist/embedding-model/Embedding.mjs.map +1 -1
- package/dist/embedding-model/EmbeddingModel.d.mts +33 -17
- package/dist/embedding-model/EmbeddingModel.d.mts.map +1 -1
- package/dist/embedding-model/EmbeddingModel.mjs.map +1 -1
- package/dist/embedding-model/EmbeddingModel.test.d.mts +1 -0
- package/dist/embedding-model/EmbeddingModel.test.mjs +59 -0
- package/dist/embedding-model/EmbeddingModel.test.mjs.map +1 -0
- package/dist/index.d.mts +13 -7
- package/dist/index.mjs +7 -1
- package/dist/language-model/LanguageModel.d.mts +30 -8
- package/dist/language-model/LanguageModel.d.mts.map +1 -1
- package/dist/language-model/LanguageModel.mjs +33 -3
- package/dist/language-model/LanguageModel.mjs.map +1 -1
- package/dist/language-model/LanguageModel.test.d.mts +1 -0
- package/dist/language-model/LanguageModel.test.mjs +143 -0
- package/dist/language-model/LanguageModel.test.mjs.map +1 -0
- package/dist/loop/Loop.d.mts +94 -11
- package/dist/loop/Loop.d.mts.map +1 -1
- package/dist/loop/Loop.mjs +92 -26
- package/dist/loop/Loop.mjs.map +1 -1
- package/dist/loop/Loop.test.mjs +171 -3
- package/dist/loop/Loop.test.mjs.map +1 -1
- package/dist/music-generator/MusicGenerator.d.mts +77 -0
- package/dist/music-generator/MusicGenerator.d.mts.map +1 -0
- package/dist/music-generator/MusicGenerator.mjs +51 -0
- package/dist/music-generator/MusicGenerator.mjs.map +1 -0
- package/dist/music-generator/MusicGenerator.test.d.mts +1 -0
- package/dist/music-generator/MusicGenerator.test.mjs +154 -0
- package/dist/music-generator/MusicGenerator.test.mjs.map +1 -0
- package/dist/observability/Metrics.d.mts +1 -1
- package/dist/observability/Metrics.mjs +1 -1
- package/dist/observability/Metrics.mjs.map +1 -1
- package/dist/speech-synthesizer/SpeechSynthesizer.d.mts +96 -0
- package/dist/speech-synthesizer/SpeechSynthesizer.d.mts.map +1 -0
- package/dist/speech-synthesizer/SpeechSynthesizer.mjs +48 -0
- package/dist/speech-synthesizer/SpeechSynthesizer.mjs.map +1 -0
- package/dist/speech-synthesizer/SpeechSynthesizer.test.d.mts +1 -0
- package/dist/speech-synthesizer/SpeechSynthesizer.test.mjs +112 -0
- package/dist/speech-synthesizer/SpeechSynthesizer.test.mjs.map +1 -0
- package/dist/streaming/JSONL.d.mts +10 -3
- package/dist/streaming/JSONL.d.mts.map +1 -1
- package/dist/streaming/JSONL.mjs +15 -9
- package/dist/streaming/JSONL.mjs.map +1 -1
- package/dist/structured-format/StructuredFormat.d.mts +2 -2
- package/dist/structured-format/StructuredFormat.mjs +9 -1
- package/dist/structured-format/StructuredFormat.mjs.map +1 -1
- package/dist/structured-format/StructuredFormat.test.d.mts +1 -0
- package/dist/structured-format/StructuredFormat.test.mjs +70 -0
- package/dist/structured-format/StructuredFormat.test.mjs.map +1 -0
- package/dist/testing/MockMusicGenerator.d.mts +39 -0
- package/dist/testing/MockMusicGenerator.d.mts.map +1 -0
- package/dist/testing/MockMusicGenerator.mjs +96 -0
- package/dist/testing/MockMusicGenerator.mjs.map +1 -0
- package/dist/testing/MockProvider.d.mts +23 -18
- package/dist/testing/MockProvider.d.mts.map +1 -1
- package/dist/testing/MockProvider.mjs +56 -72
- package/dist/testing/MockProvider.mjs.map +1 -1
- package/dist/testing/MockSpeechSynthesizer.d.mts +37 -0
- package/dist/testing/MockSpeechSynthesizer.d.mts.map +1 -0
- package/dist/testing/MockSpeechSynthesizer.mjs +95 -0
- package/dist/testing/MockSpeechSynthesizer.mjs.map +1 -0
- package/dist/testing/MockTranscriber.d.mts +37 -0
- package/dist/testing/MockTranscriber.d.mts.map +1 -0
- package/dist/testing/MockTranscriber.mjs +77 -0
- package/dist/testing/MockTranscriber.mjs.map +1 -0
- package/dist/tool/HistoryCheck.d.mts +1 -1
- package/dist/tool/Outcome.d.mts +1 -1
- package/dist/tool/Resolvers.d.mts +65 -8
- package/dist/tool/Resolvers.d.mts.map +1 -1
- package/dist/tool/Resolvers.mjs +8 -12
- package/dist/tool/Resolvers.mjs.map +1 -1
- package/dist/tool/Resolvers.test.mjs +6 -5
- package/dist/tool/Resolvers.test.mjs.map +1 -1
- package/dist/tool/Tool.d.mts +2 -2
- package/dist/tool/Tool.mjs +18 -1
- package/dist/tool/Tool.mjs.map +1 -1
- package/dist/tool/Tool.test.d.mts +1 -0
- package/dist/tool/Tool.test.mjs +66 -0
- package/dist/tool/Tool.test.mjs.map +1 -0
- package/dist/tool/Toolkit.d.mts +4 -6
- package/dist/tool/Toolkit.d.mts.map +1 -1
- package/dist/tool/Toolkit.mjs +14 -43
- package/dist/tool/Toolkit.mjs.map +1 -1
- package/dist/transcriber/Transcriber.d.mts +101 -0
- package/dist/transcriber/Transcriber.d.mts.map +1 -0
- package/dist/transcriber/Transcriber.mjs +49 -0
- package/dist/transcriber/Transcriber.mjs.map +1 -0
- package/dist/transcriber/Transcriber.test.d.mts +1 -0
- package/dist/transcriber/Transcriber.test.mjs +130 -0
- package/dist/transcriber/Transcriber.test.mjs.map +1 -0
- package/package.json +37 -1
- package/src/domain/AiError.ts +22 -1
- package/src/domain/Audio.ts +88 -0
- package/src/domain/Items.ts +1 -1
- package/src/domain/Music.ts +121 -0
- package/src/domain/Transcript.ts +83 -0
- package/src/domain/Turn.test.ts +141 -0
- package/src/domain/Turn.ts +50 -43
- package/src/embedding-model/Embedding.ts +23 -0
- package/src/embedding-model/EmbeddingModel.test.ts +92 -0
- package/src/embedding-model/EmbeddingModel.ts +30 -20
- package/src/index.ts +6 -0
- package/src/language-model/LanguageModel.test.ts +170 -0
- package/src/language-model/LanguageModel.ts +64 -1
- package/src/loop/Loop.test.ts +256 -3
- package/src/loop/Loop.ts +225 -49
- package/src/music-generator/MusicGenerator.test.ts +170 -0
- package/src/music-generator/MusicGenerator.ts +123 -0
- package/src/observability/Metrics.ts +1 -1
- package/src/speech-synthesizer/SpeechSynthesizer.test.ts +141 -0
- package/src/speech-synthesizer/SpeechSynthesizer.ts +131 -0
- package/src/streaming/JSONL.ts +16 -13
- package/src/structured-format/StructuredFormat.test.ts +105 -0
- package/src/structured-format/StructuredFormat.ts +14 -1
- package/src/testing/MockMusicGenerator.ts +168 -0
- package/src/testing/MockProvider.ts +126 -105
- package/src/testing/MockSpeechSynthesizer.ts +163 -0
- package/src/testing/MockTranscriber.ts +137 -0
- package/src/tool/Resolvers.test.ts +8 -5
- package/src/tool/Resolvers.ts +17 -19
- package/src/tool/Tool.test.ts +105 -0
- package/src/tool/Tool.ts +20 -0
- package/src/tool/Toolkit.ts +49 -50
- package/src/transcriber/Transcriber.test.ts +125 -0
- package/src/transcriber/Transcriber.ts +127 -0
- package/dist/StructuredFormat-BWq5Hd1O.d.mts.map +0 -1
- package/dist/Tool-DjVufH7i.d.mts.map +0 -1
- package/dist/Turn-OPaILVIB.d.mts +0 -194
- package/dist/Turn-OPaILVIB.d.mts.map +0 -1
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { n as __exportAll } from "../chunk-uyGKjUfl.mjs";
|
|
2
|
+
import { Context, Effect, Function, Stream } from "effect";
|
|
3
|
+
//#region src/transcriber/Transcriber.ts
|
|
4
|
+
var Transcriber_exports = /* @__PURE__ */ __exportAll({
|
|
5
|
+
SttStreaming: () => SttStreaming,
|
|
6
|
+
Transcriber: () => Transcriber,
|
|
7
|
+
streamTranscriptionFrom: () => streamTranscriptionFrom,
|
|
8
|
+
transcribe: () => transcribe
|
|
9
|
+
});
|
|
10
|
+
var Transcriber = class extends Context.Service()("@betalyra/effect-uai/Transcriber") {};
|
|
11
|
+
/**
|
|
12
|
+
* Capability marker — provided by provider layers whose
|
|
13
|
+
* `streamTranscriptionFrom` is wired up at the wire level. Azure does
|
|
14
|
+
* not ship it (streaming-STT is SDK-internal). Calling
|
|
15
|
+
* `streamTranscriptionFrom` while only Azure's Layer is in scope fails
|
|
16
|
+
* at `Effect.provide` with a type error, not at runtime.
|
|
17
|
+
*
|
|
18
|
+
* Phantom — the value is `void`; providers register with
|
|
19
|
+
* `Layer.succeed(SttStreaming, undefined)`.
|
|
20
|
+
*/
|
|
21
|
+
var SttStreaming = class extends Context.Service()("@betalyra/effect-uai/capability/SttStreaming") {};
|
|
22
|
+
/** One-shot transcription. */
|
|
23
|
+
const transcribe = (request) => Effect.flatMap(Transcriber.asEffect(), (t) => t.transcribe(request));
|
|
24
|
+
/**
|
|
25
|
+
* Live transcription. Dual-arity: pipeable (data-last) and direct
|
|
26
|
+
* (data-first). Requires `SttStreaming` in R — providers without
|
|
27
|
+
* streaming support are a type error at provide time.
|
|
28
|
+
*
|
|
29
|
+
* @example
|
|
30
|
+
* ```ts
|
|
31
|
+
* // Pipeable — composes with other Stream operators
|
|
32
|
+
* mic.frames.pipe(
|
|
33
|
+
* Transcriber.streamTranscriptionFrom(req),
|
|
34
|
+
* Stream.filter((e) => e._tag === "final"),
|
|
35
|
+
* )
|
|
36
|
+
*
|
|
37
|
+
* // Direct
|
|
38
|
+
* Transcriber.streamTranscriptionFrom(mic.frames, req)
|
|
39
|
+
* ```
|
|
40
|
+
*/
|
|
41
|
+
const streamTranscriptionFrom = Function.dual(2, (audioIn, request) => Stream.unwrap(Effect.gen(function* () {
|
|
42
|
+
const t = yield* Transcriber.asEffect();
|
|
43
|
+
yield* SttStreaming.asEffect();
|
|
44
|
+
return t.streamTranscriptionFrom(audioIn, request);
|
|
45
|
+
})));
|
|
46
|
+
//#endregion
|
|
47
|
+
export { SttStreaming, Transcriber, streamTranscriptionFrom, Transcriber_exports as t, transcribe };
|
|
48
|
+
|
|
49
|
+
//# sourceMappingURL=Transcriber.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"Transcriber.mjs","names":[],"sources":["../../src/transcriber/Transcriber.ts"],"sourcesContent":["import { Context, Effect, Function, Stream } from \"effect\"\nimport * as AiError from \"../domain/AiError.js\"\nimport type { AudioFormat, AudioSource } from \"../domain/Audio.js\"\nimport type { TranscriptEvent, TranscriptResult } from \"../domain/Transcript.js\"\n\n/**\n * Cross-provider sync transcription request. Provider-specific\n * extensions (Deepgram `keyterm[]`, ElevenLabs `diarize`, Google\n * `adaptation`, …) live on each provider's typed request which extends\n * this and narrows `model`.\n */\nexport type CommonTranscribeRequest = {\n readonly audio: AudioSource\n /** Model identifier. Each provider narrows to its typed literal union. */\n readonly model: string\n /** ISO-639-1 / BCP-47. Omit for autodetection (where supported). */\n readonly language?: string\n /**\n * Vocab biasing. Single-string covers OpenAI/Whisper-style prompts;\n * `terms[]` covers Deepgram `keyterm`, Google adaptation phrases, AWS\n * `vocabularyName`. Providers ignore what they don't support.\n */\n readonly prompt?: string | { readonly terms: ReadonlyArray<string> }\n readonly diarization?: boolean\n readonly wordTimestamps?: boolean\n}\n\n/**\n * Streaming-transcription request. `inputFormat` declares what the\n * bytes in the input stream will look like — providers reject\n * mismatches at stream startup with `AiError.InvalidRequest`.\n */\nexport type CommonStreamTranscribeRequest = Omit<CommonTranscribeRequest, \"audio\"> & {\n readonly inputFormat: AudioFormat\n readonly interimResults?: boolean\n readonly vadEvents?: boolean\n}\n\nexport type TranscriberService = {\n /**\n * One-shot transcription. Universal — AWS Transcribe (which has no\n * native sync endpoint) emulates this by draining a streaming session\n * internally.\n */\n readonly transcribe: (\n request: CommonTranscribeRequest,\n ) => Effect.Effect<TranscriptResult, AiError.AiError>\n /**\n * Live transcription as a Stream transformer. Consumes audio bytes\n * from `audioIn`; emits `TranscriptEvent`s as they arrive. The\n * underlying WS / gRPC connection is acquired on first pull and\n * released when the output stream is finalized (success, failure, or\n * interruption) via `Stream.scoped` — no explicit Scope handling at\n * the call site.\n *\n * Gated by the `SttStreaming` capability marker on the top-level\n * helper — providers without streaming-STT support don't ship the\n * marker, so calls fail at `Effect.provide` with a type error.\n */\n readonly streamTranscriptionFrom: <E, R>(\n audioIn: Stream.Stream<Uint8Array, E, R>,\n request: CommonStreamTranscribeRequest,\n ) => Stream.Stream<TranscriptEvent, AiError.AiError | E, R>\n}\n\nexport class Transcriber extends Context.Service<Transcriber, TranscriberService>()(\n \"@betalyra/effect-uai/Transcriber\",\n) {}\n\n/**\n * Capability marker — provided by provider layers whose\n * `streamTranscriptionFrom` is wired up at the wire level. Azure does\n * not ship it (streaming-STT is SDK-internal). Calling\n * `streamTranscriptionFrom` while only Azure's Layer is in scope fails\n * at `Effect.provide` with a type error, not at runtime.\n *\n * Phantom — the value is `void`; providers register with\n * `Layer.succeed(SttStreaming, undefined)`.\n */\nexport class SttStreaming extends Context.Service<SttStreaming, void>()(\n \"@betalyra/effect-uai/capability/SttStreaming\",\n) {}\n\n/** One-shot transcription. */\nexport const transcribe = (\n request: CommonTranscribeRequest,\n): Effect.Effect<TranscriptResult, AiError.AiError, Transcriber> =>\n Effect.flatMap(Transcriber.asEffect(), (t) => t.transcribe(request))\n\n/**\n * Live transcription. Dual-arity: pipeable (data-last) and direct\n * (data-first). Requires `SttStreaming` in R — providers without\n * streaming support are a type error at provide time.\n *\n * @example\n * ```ts\n * // Pipeable — composes with other Stream operators\n * mic.frames.pipe(\n * Transcriber.streamTranscriptionFrom(req),\n * Stream.filter((e) => e._tag === \"final\"),\n * )\n *\n * // Direct\n * Transcriber.streamTranscriptionFrom(mic.frames, req)\n * ```\n */\nexport const streamTranscriptionFrom: {\n (\n request: CommonStreamTranscribeRequest,\n ): <E, R>(\n audioIn: Stream.Stream<Uint8Array, E, R>,\n ) => Stream.Stream<TranscriptEvent, AiError.AiError | E, R | Transcriber | SttStreaming>\n <E, R>(\n audioIn: Stream.Stream<Uint8Array, E, R>,\n request: CommonStreamTranscribeRequest,\n ): Stream.Stream<TranscriptEvent, AiError.AiError | E, R | Transcriber | SttStreaming>\n} = Function.dual(\n 2,\n <E, R>(audioIn: Stream.Stream<Uint8Array, E, R>, request: CommonStreamTranscribeRequest) =>\n Stream.unwrap(\n Effect.gen(function* () {\n const t = yield* Transcriber.asEffect()\n yield* SttStreaming.asEffect()\n return t.streamTranscriptionFrom(audioIn, request)\n }),\n ),\n)\n"],"mappings":";;;;;;;;;AAiEA,IAAa,cAAb,cAAiC,QAAQ,SAA0C,CACjF,mCACD,CAAC;;;;;;;;;;;AAYF,IAAa,eAAb,cAAkC,QAAQ,SAA6B,CACrE,+CACD,CAAC;;AAGF,MAAa,cACX,YAEA,OAAO,QAAQ,YAAY,UAAU,GAAG,MAAM,EAAE,WAAW,QAAQ,CAAC;;;;;;;;;;;;;;;;;;AAmBtE,MAAa,0BAUT,SAAS,KACX,IACO,SAA0C,YAC/C,OAAO,OACL,OAAO,IAAI,aAAa;CACtB,MAAM,IAAI,OAAO,YAAY,UAAU;AACvC,QAAO,aAAa,UAAU;AAC9B,QAAO,EAAE,wBAAwB,SAAS,QAAQ;EAClD,CACH,CACJ"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { };
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import { streamTranscriptionFrom, transcribe } from "./Transcriber.mjs";
|
|
2
|
+
import { i as it, n as globalExpect, r as describe, t as import_dist } from "../dist-DV5ISja1.mjs";
|
|
3
|
+
import { layer, layerSyncOnly } from "../testing/MockTranscriber.mjs";
|
|
4
|
+
import { Effect, Stream } from "effect";
|
|
5
|
+
//#region src/transcriber/Transcriber.test.ts
|
|
6
|
+
describe("Transcriber.transcribe", () => {
|
|
7
|
+
it("returns the scripted TranscriptResult", async () => {
|
|
8
|
+
const mock = layer({ transcripts: [{
|
|
9
|
+
text: "hello world",
|
|
10
|
+
durationSeconds: 1.23
|
|
11
|
+
}] });
|
|
12
|
+
const program = transcribe({
|
|
13
|
+
audio: {
|
|
14
|
+
_tag: "bytes",
|
|
15
|
+
bytes: new Uint8Array([0]),
|
|
16
|
+
mimeType: "audio/wav"
|
|
17
|
+
},
|
|
18
|
+
model: "mock-stt"
|
|
19
|
+
});
|
|
20
|
+
const result = await Effect.runPromise(program.pipe(Effect.provide(mock.layer)));
|
|
21
|
+
globalExpect(result.text).toBe("hello world");
|
|
22
|
+
globalExpect(result.durationSeconds).toBe(1.23);
|
|
23
|
+
});
|
|
24
|
+
it("records each transcribe call", async () => {
|
|
25
|
+
const mock = layer({ transcripts: [{ text: "a" }, { text: "b" }] });
|
|
26
|
+
const program = Effect.gen(function* () {
|
|
27
|
+
yield* transcribe({
|
|
28
|
+
audio: {
|
|
29
|
+
_tag: "bytes",
|
|
30
|
+
bytes: new Uint8Array([1]),
|
|
31
|
+
mimeType: "audio/wav"
|
|
32
|
+
},
|
|
33
|
+
model: "m1"
|
|
34
|
+
});
|
|
35
|
+
yield* transcribe({
|
|
36
|
+
audio: {
|
|
37
|
+
_tag: "bytes",
|
|
38
|
+
bytes: new Uint8Array([2]),
|
|
39
|
+
mimeType: "audio/wav"
|
|
40
|
+
},
|
|
41
|
+
model: "m2"
|
|
42
|
+
});
|
|
43
|
+
return yield* mock.recorder;
|
|
44
|
+
});
|
|
45
|
+
globalExpect((await Effect.runPromise(program.pipe(Effect.provide(mock.layer)))).transcribeCalls.map((c) => c.model)).toEqual(["m1", "m2"]);
|
|
46
|
+
});
|
|
47
|
+
});
|
|
48
|
+
describe("Transcriber capability marker (compile-time)", () => {
|
|
49
|
+
const sttReq = {
|
|
50
|
+
model: "mock-stt",
|
|
51
|
+
inputFormat: {
|
|
52
|
+
container: "raw",
|
|
53
|
+
encoding: "pcm_s16le",
|
|
54
|
+
sampleRate: 16e3
|
|
55
|
+
}
|
|
56
|
+
};
|
|
57
|
+
it("requires `SttStreaming` on the R channel of streamTranscriptionFrom", () => {
|
|
58
|
+
(0, import_dist.expectTypeOf)(Stream.fromIterable([new Uint8Array([0])]).pipe(streamTranscriptionFrom(sttReq))).toEqualTypeOf();
|
|
59
|
+
});
|
|
60
|
+
it("does NOT require `SttStreaming` for sync `transcribe`", () => {
|
|
61
|
+
(0, import_dist.expectTypeOf)(transcribe({
|
|
62
|
+
audio: {
|
|
63
|
+
_tag: "bytes",
|
|
64
|
+
bytes: new Uint8Array([0]),
|
|
65
|
+
mimeType: "audio/wav"
|
|
66
|
+
},
|
|
67
|
+
model: "m"
|
|
68
|
+
})).toEqualTypeOf();
|
|
69
|
+
});
|
|
70
|
+
it("a sync-only layer leaves `SttStreaming` unsatisfied in R", () => {
|
|
71
|
+
const syncOnly = layerSyncOnly({});
|
|
72
|
+
const events = Stream.fromIterable([new Uint8Array([0])]).pipe(streamTranscriptionFrom(sttReq));
|
|
73
|
+
(0, import_dist.expectTypeOf)(Stream.runDrain(events).pipe(Effect.provide(syncOnly.layer))).toEqualTypeOf();
|
|
74
|
+
});
|
|
75
|
+
it("a full layer (with marker) clears R to never", () => {
|
|
76
|
+
const fullMock = layer({ streams: [[]] });
|
|
77
|
+
const events = Stream.fromIterable([new Uint8Array([0])]).pipe(streamTranscriptionFrom(sttReq));
|
|
78
|
+
(0, import_dist.expectTypeOf)(Stream.runDrain(events).pipe(Effect.provide(fullMock.layer))).toEqualTypeOf();
|
|
79
|
+
});
|
|
80
|
+
});
|
|
81
|
+
describe("Transcriber.streamTranscriptionFrom", () => {
|
|
82
|
+
const sttReq = {
|
|
83
|
+
model: "mock-stt",
|
|
84
|
+
inputFormat: {
|
|
85
|
+
container: "raw",
|
|
86
|
+
encoding: "pcm_s16le",
|
|
87
|
+
sampleRate: 16e3
|
|
88
|
+
}
|
|
89
|
+
};
|
|
90
|
+
it("emits scripted events after draining the input audio stream", async () => {
|
|
91
|
+
const mock = layer({ streams: [[{
|
|
92
|
+
_tag: "partial",
|
|
93
|
+
text: "hello"
|
|
94
|
+
}, {
|
|
95
|
+
_tag: "final",
|
|
96
|
+
text: "hello world"
|
|
97
|
+
}]] });
|
|
98
|
+
const events = Stream.fromIterable([new Uint8Array([
|
|
99
|
+
0,
|
|
100
|
+
1,
|
|
101
|
+
2
|
|
102
|
+
]), new Uint8Array([
|
|
103
|
+
3,
|
|
104
|
+
4,
|
|
105
|
+
5
|
|
106
|
+
])]).pipe(streamTranscriptionFrom(sttReq));
|
|
107
|
+
globalExpect(await Effect.runPromise(Stream.runCollect(events).pipe(Effect.provide(mock.layer)))).toEqual([{
|
|
108
|
+
_tag: "partial",
|
|
109
|
+
text: "hello"
|
|
110
|
+
}, {
|
|
111
|
+
_tag: "final",
|
|
112
|
+
text: "hello world"
|
|
113
|
+
}]);
|
|
114
|
+
});
|
|
115
|
+
it("works data-first (direct call) as well as pipeable (data-last)", async () => {
|
|
116
|
+
const mock = layer({ streams: [[{
|
|
117
|
+
_tag: "final",
|
|
118
|
+
text: "x"
|
|
119
|
+
}]] });
|
|
120
|
+
const events = streamTranscriptionFrom(Stream.fromIterable([new Uint8Array([0])]), sttReq);
|
|
121
|
+
globalExpect(await Effect.runPromise(Stream.runCollect(events).pipe(Effect.provide(mock.layer)))).toEqual([{
|
|
122
|
+
_tag: "final",
|
|
123
|
+
text: "x"
|
|
124
|
+
}]);
|
|
125
|
+
});
|
|
126
|
+
});
|
|
127
|
+
//#endregion
|
|
128
|
+
export {};
|
|
129
|
+
|
|
130
|
+
//# sourceMappingURL=Transcriber.test.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"Transcriber.test.mjs","names":["MockTranscriber.layer","Transcriber.transcribe","Transcriber.streamTranscriptionFrom","MockTranscriber.layerSyncOnly"],"sources":["../../src/transcriber/Transcriber.test.ts"],"sourcesContent":["import { Effect, Stream } from \"effect\"\nimport { describe, expect, expectTypeOf, it } from \"vitest\"\nimport type * as AiError from \"../domain/AiError.js\"\nimport type { TranscriptEvent, TranscriptResult } from \"../domain/Transcript.js\"\nimport * as MockTranscriber from \"../testing/MockTranscriber.js\"\nimport * as Transcriber from \"./Transcriber.js\"\n\ndescribe(\"Transcriber.transcribe\", () => {\n it(\"returns the scripted TranscriptResult\", async () => {\n const mock = MockTranscriber.layer({\n transcripts: [{ text: \"hello world\", durationSeconds: 1.23 }],\n })\n const program = Transcriber.transcribe({\n audio: { _tag: \"bytes\", bytes: new Uint8Array([0]), mimeType: \"audio/wav\" },\n model: \"mock-stt\",\n })\n const result = await Effect.runPromise(program.pipe(Effect.provide(mock.layer)))\n expect(result.text).toBe(\"hello world\")\n expect(result.durationSeconds).toBe(1.23)\n })\n\n it(\"records each transcribe call\", async () => {\n const mock = MockTranscriber.layer({\n transcripts: [{ text: \"a\" }, { text: \"b\" }],\n })\n const program = Effect.gen(function* () {\n yield* Transcriber.transcribe({\n audio: { _tag: \"bytes\", bytes: new Uint8Array([1]), mimeType: \"audio/wav\" },\n model: \"m1\",\n })\n yield* Transcriber.transcribe({\n audio: { _tag: \"bytes\", bytes: new Uint8Array([2]), mimeType: \"audio/wav\" },\n model: \"m2\",\n })\n return yield* mock.recorder\n })\n const rec = await Effect.runPromise(program.pipe(Effect.provide(mock.layer)))\n expect(rec.transcribeCalls.map((c) => c.model)).toEqual([\"m1\", \"m2\"])\n })\n})\n\ndescribe(\"Transcriber capability marker (compile-time)\", () => {\n const sttReq: Transcriber.CommonStreamTranscribeRequest = {\n model: \"mock-stt\",\n inputFormat: { container: \"raw\", encoding: \"pcm_s16le\", sampleRate: 16000 },\n }\n\n it(\"requires `SttStreaming` on the R channel of streamTranscriptionFrom\", () => {\n const audio: Stream.Stream<Uint8Array> = Stream.fromIterable([new Uint8Array([0])])\n const events = audio.pipe(Transcriber.streamTranscriptionFrom(sttReq))\n expectTypeOf(events).toEqualTypeOf<\n Stream.Stream<\n TranscriptEvent,\n AiError.AiError,\n Transcriber.Transcriber | Transcriber.SttStreaming\n >\n >()\n })\n\n it(\"does NOT require `SttStreaming` for sync `transcribe`\", () => {\n const eff = Transcriber.transcribe({\n audio: { _tag: \"bytes\", bytes: new Uint8Array([0]), mimeType: \"audio/wav\" },\n model: \"m\",\n })\n expectTypeOf(eff).toEqualTypeOf<\n Effect.Effect<TranscriptResult, AiError.AiError, Transcriber.Transcriber>\n >()\n })\n\n it(\"a sync-only layer leaves `SttStreaming` unsatisfied in R\", () => {\n const syncOnly = MockTranscriber.layerSyncOnly({})\n const audio: Stream.Stream<Uint8Array> = Stream.fromIterable([new Uint8Array([0])])\n const events = audio.pipe(Transcriber.streamTranscriptionFrom(sttReq))\n const program = Stream.runDrain(events).pipe(Effect.provide(syncOnly.layer))\n // `Transcriber` is provided by syncOnly.layer; `SttStreaming` is not.\n expectTypeOf(program).toEqualTypeOf<\n Effect.Effect<void, AiError.AiError, Transcriber.SttStreaming>\n >()\n })\n\n it(\"a full layer (with marker) clears R to never\", () => {\n const fullMock = MockTranscriber.layer({ streams: [[]] })\n const audio: Stream.Stream<Uint8Array> = Stream.fromIterable([new Uint8Array([0])])\n const events = audio.pipe(Transcriber.streamTranscriptionFrom(sttReq))\n const program = Stream.runDrain(events).pipe(Effect.provide(fullMock.layer))\n expectTypeOf(program).toEqualTypeOf<Effect.Effect<void, AiError.AiError, never>>()\n })\n})\n\ndescribe(\"Transcriber.streamTranscriptionFrom\", () => {\n const sttReq: Transcriber.CommonStreamTranscribeRequest = {\n model: \"mock-stt\",\n inputFormat: { container: \"raw\", encoding: \"pcm_s16le\", sampleRate: 16000 },\n }\n\n it(\"emits scripted events after draining the input audio stream\", async () => {\n const mock = MockTranscriber.layer({\n streams: [\n [\n { _tag: \"partial\", text: \"hello\" },\n { _tag: \"final\", text: \"hello world\" },\n ],\n ],\n })\n const audio = Stream.fromIterable([new Uint8Array([0, 1, 2]), new Uint8Array([3, 4, 5])])\n const events = audio.pipe(Transcriber.streamTranscriptionFrom(sttReq))\n const collected = await Effect.runPromise(\n Stream.runCollect(events).pipe(Effect.provide(mock.layer)),\n )\n expect(collected).toEqual([\n { _tag: \"partial\", text: \"hello\" },\n { _tag: \"final\", text: \"hello world\" },\n ])\n })\n\n it(\"works data-first (direct call) as well as pipeable (data-last)\", async () => {\n const mock = MockTranscriber.layer({\n streams: [[{ _tag: \"final\", text: \"x\" }]],\n })\n const audio = Stream.fromIterable([new Uint8Array([0])])\n const events = Transcriber.streamTranscriptionFrom(audio, sttReq)\n const out = await Effect.runPromise(Stream.runCollect(events).pipe(Effect.provide(mock.layer)))\n expect(out).toEqual([{ _tag: \"final\", text: \"x\" }])\n })\n})\n"],"mappings":";;;;;AAOA,SAAS,gCAAgC;AACvC,IAAG,yCAAyC,YAAY;EACtD,MAAM,OAAOA,MAAsB,EACjC,aAAa,CAAC;GAAE,MAAM;GAAe,iBAAiB;GAAM,CAAC,EAC9D,CAAC;EACF,MAAM,UAAUC,WAAuB;GACrC,OAAO;IAAE,MAAM;IAAS,OAAO,IAAI,WAAW,CAAC,EAAE,CAAC;IAAE,UAAU;IAAa;GAC3E,OAAO;GACR,CAAC;EACF,MAAM,SAAS,MAAM,OAAO,WAAW,QAAQ,KAAK,OAAO,QAAQ,KAAK,MAAM,CAAC,CAAC;AAChF,eAAO,OAAO,KAAK,CAAC,KAAK,cAAc;AACvC,eAAO,OAAO,gBAAgB,CAAC,KAAK,KAAK;GACzC;AAEF,IAAG,gCAAgC,YAAY;EAC7C,MAAM,OAAOD,MAAsB,EACjC,aAAa,CAAC,EAAE,MAAM,KAAK,EAAE,EAAE,MAAM,KAAK,CAAC,EAC5C,CAAC;EACF,MAAM,UAAU,OAAO,IAAI,aAAa;AACtC,UAAOC,WAAuB;IAC5B,OAAO;KAAE,MAAM;KAAS,OAAO,IAAI,WAAW,CAAC,EAAE,CAAC;KAAE,UAAU;KAAa;IAC3E,OAAO;IACR,CAAC;AACF,UAAOA,WAAuB;IAC5B,OAAO;KAAE,MAAM;KAAS,OAAO,IAAI,WAAW,CAAC,EAAE,CAAC;KAAE,UAAU;KAAa;IAC3E,OAAO;IACR,CAAC;AACF,UAAO,OAAO,KAAK;IACnB;AAEF,gBAAO,MADW,OAAO,WAAW,QAAQ,KAAK,OAAO,QAAQ,KAAK,MAAM,CAAC,CAAC,EAClE,gBAAgB,KAAK,MAAM,EAAE,MAAM,CAAC,CAAC,QAAQ,CAAC,MAAM,KAAK,CAAC;GACrE;EACF;AAEF,SAAS,sDAAsD;CAC7D,MAAM,SAAoD;EACxD,OAAO;EACP,aAAa;GAAE,WAAW;GAAO,UAAU;GAAa,YAAY;GAAO;EAC5E;AAED,IAAG,6EAA6E;AAG9E,GAAA,GAAA,YAAA,cAFyC,OAAO,aAAa,CAAC,IAAI,WAAW,CAAC,EAAE,CAAC,CAAC,CAC9D,CAAC,KAAKC,wBAAoC,OAAO,CAClD,CAAC,CAAC,eAMlB;GACH;AAEF,IAAG,+DAA+D;AAKhE,GAAA,GAAA,YAAA,cAJYD,WAAuB;GACjC,OAAO;IAAE,MAAM;IAAS,OAAO,IAAI,WAAW,CAAC,EAAE,CAAC;IAAE,UAAU;IAAa;GAC3E,OAAO;GACR,CACe,CAAC,CAAC,eAEf;GACH;AAEF,IAAG,kEAAkE;EACnE,MAAM,WAAWE,cAA8B,EAAE,CAAC;EAElD,MAAM,SADmC,OAAO,aAAa,CAAC,IAAI,WAAW,CAAC,EAAE,CAAC,CAAC,CAC9D,CAAC,KAAKD,wBAAoC,OAAO,CAAC;AAGtE,GAAA,GAAA,YAAA,cAFgB,OAAO,SAAS,OAAO,CAAC,KAAK,OAAO,QAAQ,SAAS,MAAM,CAEvD,CAAC,CAAC,eAEnB;GACH;AAEF,IAAG,sDAAsD;EACvD,MAAM,WAAWF,MAAsB,EAAE,SAAS,CAAC,EAAE,CAAC,EAAE,CAAC;EAEzD,MAAM,SADmC,OAAO,aAAa,CAAC,IAAI,WAAW,CAAC,EAAE,CAAC,CAAC,CAC9D,CAAC,KAAKE,wBAAoC,OAAO,CAAC;AAEtE,GAAA,GAAA,YAAA,cADgB,OAAO,SAAS,OAAO,CAAC,KAAK,OAAO,QAAQ,SAAS,MAAM,CACvD,CAAC,CAAC,eAA4D;GAClF;EACF;AAEF,SAAS,6CAA6C;CACpD,MAAM,SAAoD;EACxD,OAAO;EACP,aAAa;GAAE,WAAW;GAAO,UAAU;GAAa,YAAY;GAAO;EAC5E;AAED,IAAG,+DAA+D,YAAY;EAC5E,MAAM,OAAOF,MAAsB,EACjC,SAAS,CACP,CACE;GAAE,MAAM;GAAW,MAAM;GAAS,EAClC;GAAE,MAAM;GAAS,MAAM;GAAe,CACvC,CACF,EACF,CAAC;EAEF,MAAM,SADQ,OAAO,aAAa,CAAC,IAAI,WAAW;GAAC;GAAG;GAAG;GAAE,CAAC,EAAE,IAAI,WAAW;GAAC;GAAG;GAAG;GAAE,CAAC,CAAC,CACpE,CAAC,KAAKE,wBAAoC,OAAO,CAAC;AAItE,eAAO,MAHiB,OAAO,WAC7B,OAAO,WAAW,OAAO,CAAC,KAAK,OAAO,QAAQ,KAAK,MAAM,CAAC,CAC3D,CACgB,CAAC,QAAQ,CACxB;GAAE,MAAM;GAAW,MAAM;GAAS,EAClC;GAAE,MAAM;GAAS,MAAM;GAAe,CACvC,CAAC;GACF;AAEF,IAAG,kEAAkE,YAAY;EAC/E,MAAM,OAAOF,MAAsB,EACjC,SAAS,CAAC,CAAC;GAAE,MAAM;GAAS,MAAM;GAAK,CAAC,CAAC,EAC1C,CAAC;EAEF,MAAM,SAASE,wBADD,OAAO,aAAa,CAAC,IAAI,WAAW,CAAC,EAAE,CAAC,CAAC,CACJ,EAAO,OAAO;AAEjE,eAAO,MADW,OAAO,WAAW,OAAO,WAAW,OAAO,CAAC,KAAK,OAAO,QAAQ,KAAK,MAAM,CAAC,CAAC,CACpF,CAAC,QAAQ,CAAC;GAAE,MAAM;GAAS,MAAM;GAAK,CAAC,CAAC;GACnD;EACF"}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@effect-uai/core",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.5.0",
|
|
4
4
|
"description": "Low-level primitives (loop, conversation, items, tools, streaming codecs) for building AI agents with Effect.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"agents",
|
|
@@ -38,6 +38,10 @@
|
|
|
38
38
|
"types": "./dist/domain/AiError.d.mts",
|
|
39
39
|
"import": "./dist/domain/AiError.mjs"
|
|
40
40
|
},
|
|
41
|
+
"./Audio": {
|
|
42
|
+
"types": "./dist/domain/Audio.d.mts",
|
|
43
|
+
"import": "./dist/domain/Audio.mjs"
|
|
44
|
+
},
|
|
41
45
|
"./Image": {
|
|
42
46
|
"types": "./dist/domain/Image.d.mts",
|
|
43
47
|
"import": "./dist/domain/Image.mjs"
|
|
@@ -50,6 +54,14 @@
|
|
|
50
54
|
"types": "./dist/domain/Media.d.mts",
|
|
51
55
|
"import": "./dist/domain/Media.mjs"
|
|
52
56
|
},
|
|
57
|
+
"./Music": {
|
|
58
|
+
"types": "./dist/domain/Music.d.mts",
|
|
59
|
+
"import": "./dist/domain/Music.mjs"
|
|
60
|
+
},
|
|
61
|
+
"./Transcript": {
|
|
62
|
+
"types": "./dist/domain/Transcript.d.mts",
|
|
63
|
+
"import": "./dist/domain/Transcript.mjs"
|
|
64
|
+
},
|
|
53
65
|
"./Turn": {
|
|
54
66
|
"types": "./dist/domain/Turn.d.mts",
|
|
55
67
|
"import": "./dist/domain/Turn.mjs"
|
|
@@ -66,6 +78,18 @@
|
|
|
66
78
|
"types": "./dist/language-model/LanguageModel.d.mts",
|
|
67
79
|
"import": "./dist/language-model/LanguageModel.mjs"
|
|
68
80
|
},
|
|
81
|
+
"./MusicGenerator": {
|
|
82
|
+
"types": "./dist/music-generator/MusicGenerator.d.mts",
|
|
83
|
+
"import": "./dist/music-generator/MusicGenerator.mjs"
|
|
84
|
+
},
|
|
85
|
+
"./SpeechSynthesizer": {
|
|
86
|
+
"types": "./dist/speech-synthesizer/SpeechSynthesizer.d.mts",
|
|
87
|
+
"import": "./dist/speech-synthesizer/SpeechSynthesizer.mjs"
|
|
88
|
+
},
|
|
89
|
+
"./Transcriber": {
|
|
90
|
+
"types": "./dist/transcriber/Transcriber.d.mts",
|
|
91
|
+
"import": "./dist/transcriber/Transcriber.mjs"
|
|
92
|
+
},
|
|
69
93
|
"./Loop": {
|
|
70
94
|
"types": "./dist/loop/Loop.d.mts",
|
|
71
95
|
"import": "./dist/loop/Loop.mjs"
|
|
@@ -121,6 +145,18 @@
|
|
|
121
145
|
"./testing/MockProvider": {
|
|
122
146
|
"types": "./dist/testing/MockProvider.d.mts",
|
|
123
147
|
"import": "./dist/testing/MockProvider.mjs"
|
|
148
|
+
},
|
|
149
|
+
"./testing/MockMusicGenerator": {
|
|
150
|
+
"types": "./dist/testing/MockMusicGenerator.d.mts",
|
|
151
|
+
"import": "./dist/testing/MockMusicGenerator.mjs"
|
|
152
|
+
},
|
|
153
|
+
"./testing/MockSpeechSynthesizer": {
|
|
154
|
+
"types": "./dist/testing/MockSpeechSynthesizer.d.mts",
|
|
155
|
+
"import": "./dist/testing/MockSpeechSynthesizer.mjs"
|
|
156
|
+
},
|
|
157
|
+
"./testing/MockTranscriber": {
|
|
158
|
+
"types": "./dist/testing/MockTranscriber.d.mts",
|
|
159
|
+
"import": "./dist/testing/MockTranscriber.mjs"
|
|
124
160
|
}
|
|
125
161
|
},
|
|
126
162
|
"publishConfig": {
|
package/src/domain/AiError.ts
CHANGED
|
@@ -72,7 +72,7 @@ export class GenerationFailed extends Data.TaggedError("GenerationFailed")<{
|
|
|
72
72
|
}> {}
|
|
73
73
|
|
|
74
74
|
/**
|
|
75
|
-
* The provider's delta stream ended without a terminal `
|
|
75
|
+
* The provider's delta stream ended without a terminal `TurnComplete`.
|
|
76
76
|
* Indicates a misbehaving provider or a connection that dropped mid-flight.
|
|
77
77
|
* Non-terminal deltas seen so far have already been emitted downstream.
|
|
78
78
|
*/
|
|
@@ -80,6 +80,26 @@ export class IncompleteTurn extends Data.TaggedError("IncompleteTurn")<{
|
|
|
80
80
|
raw?: unknown
|
|
81
81
|
}> {}
|
|
82
82
|
|
|
83
|
+
/**
|
|
84
|
+
* The provider does not implement the requested capability for this
|
|
85
|
+
* specific request. Distinct from `InvalidRequest` (the request shape is
|
|
86
|
+
* malformed) and `AuthFailed` (the request was rejected).
|
|
87
|
+
*
|
|
88
|
+
* Reserved for request-data-dependent gaps where the provider supports
|
|
89
|
+
* the method in general but not for these inputs — e.g. Google's
|
|
90
|
+
* `streamSynthesisFrom` works only for Chirp 3 HD voices; calling it
|
|
91
|
+
* with a Neural2 voice ID fails `Unsupported`.
|
|
92
|
+
*
|
|
93
|
+
* Blanket provider-level gaps (e.g. OpenAI has no incremental-text-in
|
|
94
|
+
* TTS at all) are gated at compile time via capability marker tags
|
|
95
|
+
* (`TtsIncrementalText`, `SttStreaming`) on the R channel instead.
|
|
96
|
+
*/
|
|
97
|
+
export class Unsupported extends Data.TaggedError("Unsupported")<{
|
|
98
|
+
provider: string
|
|
99
|
+
capability: string
|
|
100
|
+
reason?: string
|
|
101
|
+
}> {}
|
|
102
|
+
|
|
83
103
|
export type AiError =
|
|
84
104
|
| RateLimited
|
|
85
105
|
| Unavailable
|
|
@@ -91,3 +111,4 @@ export type AiError =
|
|
|
91
111
|
| Cancelled
|
|
92
112
|
| IncompleteTurn
|
|
93
113
|
| GenerationFailed
|
|
114
|
+
| Unsupported
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import type { MediaBase64, MediaBytes, MediaUrl } from "./Media.js"
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* MIME types we care about across STT input and TTS output. Container-
|
|
5
|
+
* level only — sample rate / encoding flavours live on `AudioFormat`.
|
|
6
|
+
*
|
|
7
|
+
* Per-provider request types narrow this further. The `(string & {})`
|
|
8
|
+
* tail keeps autocomplete on the literals while still accepting any
|
|
9
|
+
* string, so unusual formats work without an SDK update.
|
|
10
|
+
*/
|
|
11
|
+
export type AudioMimeType =
|
|
12
|
+
| "audio/mpeg"
|
|
13
|
+
| "audio/wav"
|
|
14
|
+
| "audio/x-wav"
|
|
15
|
+
| "audio/ogg"
|
|
16
|
+
| "audio/opus"
|
|
17
|
+
| "audio/flac"
|
|
18
|
+
| "audio/aac"
|
|
19
|
+
| "audio/mp4"
|
|
20
|
+
| "audio/webm"
|
|
21
|
+
| "audio/L16"
|
|
22
|
+
| "audio/pcm"
|
|
23
|
+
| "audio/mulaw"
|
|
24
|
+
| "audio/alaw"
|
|
25
|
+
// eslint-disable-next-line @typescript-eslint/ban-types
|
|
26
|
+
| (string & {})
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Audio at rest — instantiates `MediaSource` with the audio MIME union.
|
|
30
|
+
* Used for sync STT input.
|
|
31
|
+
*
|
|
32
|
+
* URL variant is best-effort: some providers (OpenAI, Cartesia, Azure
|
|
33
|
+
* short-audio) reject URL ingestion and the adapter must upload via the
|
|
34
|
+
* `bytes` or `base64` variant instead. Adapter layers reject unsupported
|
|
35
|
+
* shapes up front with `AiError.InvalidRequest`.
|
|
36
|
+
*/
|
|
37
|
+
export type AudioSource =
|
|
38
|
+
| MediaUrl<AudioMimeType>
|
|
39
|
+
| MediaBase64<AudioMimeType>
|
|
40
|
+
| MediaBytes<AudioMimeType>
|
|
41
|
+
|
|
42
|
+
export const isAudioUrl = (s: AudioSource): s is MediaUrl<AudioMimeType> => s._tag === "url"
|
|
43
|
+
export const isAudioBase64 = (s: AudioSource): s is MediaBase64<AudioMimeType> =>
|
|
44
|
+
s._tag === "base64"
|
|
45
|
+
export const isAudioBytes = (s: AudioSource): s is MediaBytes<AudioMimeType> => s._tag === "bytes"
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Structural audio format. Used both as TTS output spec and as STT
|
|
49
|
+
* streaming-input declaration. Providers that use compound slugs
|
|
50
|
+
* (`mp3_44100_128`, `audio-16khz-128kbitrate-mono-mp3`,
|
|
51
|
+
* `aura-2-thalia-en`) are encoded at the adapter layer.
|
|
52
|
+
*/
|
|
53
|
+
export type AudioFormat = {
|
|
54
|
+
readonly container: "mp3" | "wav" | "ogg" | "opus" | "flac" | "aac" | "webm" | "raw"
|
|
55
|
+
readonly encoding:
|
|
56
|
+
| "pcm_s16le"
|
|
57
|
+
| "pcm_f32le"
|
|
58
|
+
| "pcm_mulaw"
|
|
59
|
+
| "pcm_alaw"
|
|
60
|
+
| "mp3"
|
|
61
|
+
| "opus"
|
|
62
|
+
| "vorbis"
|
|
63
|
+
| "flac"
|
|
64
|
+
| "aac"
|
|
65
|
+
readonly sampleRate: 8000 | 16000 | 22050 | 24000 | 32000 | 44100 | 48000
|
|
66
|
+
/** mp3 / opus only. */
|
|
67
|
+
readonly bitRate?: number
|
|
68
|
+
readonly channels?: 1 | 2
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
/**
|
|
72
|
+
* Streamed audio chunk. `bytes` carries the codec-encoded payload as
|
|
73
|
+
* declared on the stream's `AudioFormat`. No per-chunk timestamp here —
|
|
74
|
+
* providers that emit timing do so via `TranscriptEvent.words[]`.
|
|
75
|
+
*/
|
|
76
|
+
export type AudioChunk = {
|
|
77
|
+
readonly bytes: Uint8Array
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Full audio result for sync TTS. Format mirrors the request; provider
|
|
82
|
+
* layers normalize.
|
|
83
|
+
*/
|
|
84
|
+
export type AudioBlob = {
|
|
85
|
+
readonly format: AudioFormat
|
|
86
|
+
readonly bytes: Uint8Array
|
|
87
|
+
readonly durationSeconds?: number
|
|
88
|
+
}
|
package/src/domain/Items.ts
CHANGED
|
@@ -82,7 +82,7 @@ export type OutputText = typeof OutputText.Type
|
|
|
82
82
|
* Model-emitted refusal. Distinct from `output_text`: the model declined
|
|
83
83
|
* to answer rather than producing normal output. Pair with
|
|
84
84
|
* `stop_reason: "refusal"` on the surrounding `Turn`. Streamed via the
|
|
85
|
-
* `
|
|
85
|
+
* `RefusalDelta` `TurnEvent`.
|
|
86
86
|
*/
|
|
87
87
|
export const Refusal = Schema.Struct({
|
|
88
88
|
type: Schema.Literal("refusal"),
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import type { AudioBlob, AudioFormat } from "./Audio.js"
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Prompt fragment with a relative weight. Native to Lyria RealTime
|
|
5
|
+
* (`{ text, weight }` pairs blended in the model). Single-prompt
|
|
6
|
+
* providers (Suno, Mureka, MiniMax) flatten to text at the adapter
|
|
7
|
+
* layer.
|
|
8
|
+
*/
|
|
9
|
+
export type WeightedPrompt = {
|
|
10
|
+
readonly text: string
|
|
11
|
+
/** Default `1.0`. Range typically `[0, 1]`; provider-dependent. */
|
|
12
|
+
readonly weight?: number
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Cross-provider music-generation request. Provider-specific extras
|
|
17
|
+
* (Lyria `mode`, ElevenLabs `composition_plan`, Suno custom-mode `title`,
|
|
18
|
+
* MiniMax `lyrics_optimizer`) live on each provider's typed request
|
|
19
|
+
* which extends this and narrows `model`.
|
|
20
|
+
*/
|
|
21
|
+
export type CommonGenerateMusicRequest = {
|
|
22
|
+
/** Model identifier. Each provider narrows. */
|
|
23
|
+
readonly model: string
|
|
24
|
+
/** Single prompt string or weighted-prompt list (blended where supported). */
|
|
25
|
+
readonly prompts: string | ReadonlyArray<WeightedPrompt>
|
|
26
|
+
/**
|
|
27
|
+
* Lyrics text, optionally with section tags like `[Verse]` / `[Chorus]` /
|
|
28
|
+
* `[Bridge]` / `[Outro]`. Ignored for instrumental-only providers or
|
|
29
|
+
* when `instrumental: true`.
|
|
30
|
+
*/
|
|
31
|
+
readonly lyrics?: string
|
|
32
|
+
/** Target duration in seconds. Provider may treat as a hint or hard limit. */
|
|
33
|
+
readonly durationSeconds?: number
|
|
34
|
+
/** Beats per minute (60–200 typical). */
|
|
35
|
+
readonly bpm?: number
|
|
36
|
+
/**
|
|
37
|
+
* Musical key/mode hint. Provider-specific vocabulary (e.g. Lyria
|
|
38
|
+
* RealTime uses enum values like `"C_MAJOR"`, `"A_MINOR"`).
|
|
39
|
+
*/
|
|
40
|
+
readonly scale?: string
|
|
41
|
+
/** Skip vocals / lyrics. */
|
|
42
|
+
readonly instrumental?: boolean
|
|
43
|
+
/** Preferred output format. Provider may override. */
|
|
44
|
+
readonly outputFormat?: AudioFormat
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Streamed-output request. Same shape as the sync request — the
|
|
49
|
+
* streaming variant only differs in how the response is delivered.
|
|
50
|
+
*/
|
|
51
|
+
export type CommonStreamGenerateMusicRequest = CommonGenerateMusicRequest
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Bidirectional-session input. The user pushes one of these per
|
|
55
|
+
* change: a new prompt blend, a config delta, or a playback control.
|
|
56
|
+
* Lyria RealTime is the only provider currently surfacing these.
|
|
57
|
+
*/
|
|
58
|
+
export type MusicSessionInput =
|
|
59
|
+
| { readonly _tag: "prompts"; readonly prompts: ReadonlyArray<WeightedPrompt> }
|
|
60
|
+
| {
|
|
61
|
+
readonly _tag: "config"
|
|
62
|
+
readonly config: {
|
|
63
|
+
readonly bpm?: number
|
|
64
|
+
readonly scale?: string
|
|
65
|
+
readonly density?: number
|
|
66
|
+
readonly brightness?: number
|
|
67
|
+
readonly guidance?: number
|
|
68
|
+
readonly temperature?: number
|
|
69
|
+
readonly topK?: number
|
|
70
|
+
readonly seed?: number
|
|
71
|
+
readonly muteBass?: boolean
|
|
72
|
+
readonly muteDrums?: boolean
|
|
73
|
+
readonly onlyBassAndDrums?: boolean
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
| { readonly _tag: "control"; readonly action: "play" | "pause" | "stop" | "reset_context" }
|
|
77
|
+
|
|
78
|
+
export const promptsInput = (prompts: ReadonlyArray<WeightedPrompt>): MusicSessionInput => ({
|
|
79
|
+
_tag: "prompts",
|
|
80
|
+
prompts,
|
|
81
|
+
})
|
|
82
|
+
|
|
83
|
+
export const configInput = (
|
|
84
|
+
config: (MusicSessionInput & { _tag: "config" })["config"],
|
|
85
|
+
): MusicSessionInput => ({ _tag: "config", config })
|
|
86
|
+
|
|
87
|
+
export const controlInput = (
|
|
88
|
+
action: (MusicSessionInput & { _tag: "control" })["action"],
|
|
89
|
+
): MusicSessionInput => ({ _tag: "control", action })
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Sync-generation result. Extends `AudioBlob` with provider-side
|
|
93
|
+
* metadata that's common across music providers:
|
|
94
|
+
*
|
|
95
|
+
* - `songId` — Suno task id, ElevenLabs `song_id`, etc. Used for
|
|
96
|
+
* back-reference (re-download, stem export, follow-up edits).
|
|
97
|
+
* - `lyrics` — generated lyrics when the model returned them (Lyria
|
|
98
|
+
* text part, Mureka, Suno).
|
|
99
|
+
* - `sections` — structured section markers (Lyria optional JSON
|
|
100
|
+
* structure response).
|
|
101
|
+
* - `watermark` — presence marker (Lyria SynthID is always set).
|
|
102
|
+
*/
|
|
103
|
+
export type MusicResult = AudioBlob & {
|
|
104
|
+
readonly songId?: string
|
|
105
|
+
readonly lyrics?: string
|
|
106
|
+
readonly sections?: ReadonlyArray<{
|
|
107
|
+
readonly label: string
|
|
108
|
+
readonly startSeconds: number
|
|
109
|
+
readonly endSeconds: number
|
|
110
|
+
}>
|
|
111
|
+
readonly watermark?: { readonly kind: string }
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
export const isPromptsInput = (
|
|
115
|
+
i: MusicSessionInput,
|
|
116
|
+
): i is MusicSessionInput & { _tag: "prompts" } => i._tag === "prompts"
|
|
117
|
+
export const isConfigInput = (i: MusicSessionInput): i is MusicSessionInput & { _tag: "config" } =>
|
|
118
|
+
i._tag === "config"
|
|
119
|
+
export const isControlInput = (
|
|
120
|
+
i: MusicSessionInput,
|
|
121
|
+
): i is MusicSessionInput & { _tag: "control" } => i._tag === "control"
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-word timing + metadata. `confidence` and `speakerId` are optional
|
|
3
|
+
* because providers vary widely in what they emit and when (some only on
|
|
4
|
+
* final, some only with diarization enabled, some not at all).
|
|
5
|
+
*/
|
|
6
|
+
export type WordTimestamp = {
|
|
7
|
+
readonly text: string
|
|
8
|
+
readonly startSeconds: number
|
|
9
|
+
readonly endSeconds: number
|
|
10
|
+
readonly confidence?: number
|
|
11
|
+
readonly speakerId?: string
|
|
12
|
+
readonly languageCode?: string
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Sync STT result. `raw` preserves the provider-specific response for
|
|
17
|
+
* consumers that need fields the common shape doesn't expose
|
|
18
|
+
* (alternatives, segments, NBest, audio events, etc.).
|
|
19
|
+
*/
|
|
20
|
+
export type TranscriptResult = {
|
|
21
|
+
readonly text: string
|
|
22
|
+
readonly languageCode?: string
|
|
23
|
+
readonly durationSeconds?: number
|
|
24
|
+
readonly words?: ReadonlyArray<WordTimestamp>
|
|
25
|
+
readonly raw?: unknown
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Streaming STT event union. Collapses every provider's vocabulary into
|
|
30
|
+
* a small set; provider-specific shapes survive on `metadata.raw`.
|
|
31
|
+
*
|
|
32
|
+
* - `partial`: interim hypothesis. `stability` is Google-only.
|
|
33
|
+
* - `final`: committed transcript for the current utterance / segment.
|
|
34
|
+
* - `speech-started` / `utterance-ended`: VAD-derived boundaries. Not
|
|
35
|
+
* all providers emit them (OpenAI Realtime, Google with
|
|
36
|
+
* `voice_activity_events`, Deepgram with `vad_events`, AssemblyAI).
|
|
37
|
+
* - `audio-event`: non-speech label (`(laughter)`, `(music)`) — ElevenLabs only.
|
|
38
|
+
* - `metadata`: opaque server-side bookkeeping (request_id, model info).
|
|
39
|
+
* - `error`: non-fatal provider error mid-stream. Fatal errors surface
|
|
40
|
+
* on the `Stream`'s error channel as `AiError.AiError`.
|
|
41
|
+
*/
|
|
42
|
+
export type TranscriptEvent =
|
|
43
|
+
| {
|
|
44
|
+
readonly _tag: "partial"
|
|
45
|
+
readonly text: string
|
|
46
|
+
readonly words?: ReadonlyArray<WordTimestamp>
|
|
47
|
+
readonly stability?: number
|
|
48
|
+
}
|
|
49
|
+
| {
|
|
50
|
+
readonly _tag: "final"
|
|
51
|
+
readonly text: string
|
|
52
|
+
readonly words?: ReadonlyArray<WordTimestamp>
|
|
53
|
+
readonly languageCode?: string
|
|
54
|
+
}
|
|
55
|
+
| { readonly _tag: "speech-started"; readonly atSeconds: number }
|
|
56
|
+
| { readonly _tag: "utterance-ended"; readonly atSeconds: number }
|
|
57
|
+
| {
|
|
58
|
+
readonly _tag: "audio-event"
|
|
59
|
+
readonly label: string
|
|
60
|
+
readonly startSeconds: number
|
|
61
|
+
readonly endSeconds: number
|
|
62
|
+
}
|
|
63
|
+
| { readonly _tag: "metadata"; readonly raw: unknown }
|
|
64
|
+
| { readonly _tag: "error"; readonly code?: string; readonly message: string }
|
|
65
|
+
|
|
66
|
+
export const isPartial = (e: TranscriptEvent): e is Extract<TranscriptEvent, { _tag: "partial" }> =>
|
|
67
|
+
e._tag === "partial"
|
|
68
|
+
export const isFinal = (e: TranscriptEvent): e is Extract<TranscriptEvent, { _tag: "final" }> =>
|
|
69
|
+
e._tag === "final"
|
|
70
|
+
export const isSpeechStarted = (
|
|
71
|
+
e: TranscriptEvent,
|
|
72
|
+
): e is Extract<TranscriptEvent, { _tag: "speech-started" }> => e._tag === "speech-started"
|
|
73
|
+
export const isUtteranceEnded = (
|
|
74
|
+
e: TranscriptEvent,
|
|
75
|
+
): e is Extract<TranscriptEvent, { _tag: "utterance-ended" }> => e._tag === "utterance-ended"
|
|
76
|
+
export const isAudioEvent = (
|
|
77
|
+
e: TranscriptEvent,
|
|
78
|
+
): e is Extract<TranscriptEvent, { _tag: "audio-event" }> => e._tag === "audio-event"
|
|
79
|
+
export const isMetadata = (
|
|
80
|
+
e: TranscriptEvent,
|
|
81
|
+
): e is Extract<TranscriptEvent, { _tag: "metadata" }> => e._tag === "metadata"
|
|
82
|
+
export const isError = (e: TranscriptEvent): e is Extract<TranscriptEvent, { _tag: "error" }> =>
|
|
83
|
+
e._tag === "error"
|