@framers/agentos 0.1.110 → 0.1.111

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/dist/api/agency.d.ts.map +1 -1
  2. package/dist/api/agency.js +38 -2
  3. package/dist/api/agency.js.map +1 -1
  4. package/dist/api/agent.js +1 -1
  5. package/dist/api/agent.js.map +1 -1
  6. package/dist/api/strategies/debate.d.ts.map +1 -1
  7. package/dist/api/strategies/debate.js.map +1 -1
  8. package/dist/api/strategies/graph.d.ts.map +1 -1
  9. package/dist/api/strategies/graph.js +1 -2
  10. package/dist/api/strategies/graph.js.map +1 -1
  11. package/dist/api/strategies/hierarchical.d.ts.map +1 -1
  12. package/dist/api/strategies/hierarchical.js +1 -2
  13. package/dist/api/strategies/hierarchical.js.map +1 -1
  14. package/dist/api/strategies/index.d.ts +1 -9
  15. package/dist/api/strategies/index.d.ts.map +1 -1
  16. package/dist/api/strategies/index.js +1 -11
  17. package/dist/api/strategies/index.js.map +1 -1
  18. package/dist/api/strategies/parallel.d.ts.map +1 -1
  19. package/dist/api/strategies/parallel.js +23 -4
  20. package/dist/api/strategies/parallel.js.map +1 -1
  21. package/dist/api/strategies/review-loop.d.ts.map +1 -1
  22. package/dist/api/strategies/review-loop.js.map +1 -1
  23. package/dist/api/strategies/sequential.d.ts.map +1 -1
  24. package/dist/api/strategies/sequential.js +1 -2
  25. package/dist/api/strategies/sequential.js.map +1 -1
  26. package/dist/api/strategies/shared.d.ts +8 -0
  27. package/dist/api/strategies/shared.d.ts.map +1 -1
  28. package/dist/api/strategies/shared.js +10 -1
  29. package/dist/api/strategies/shared.js.map +1 -1
  30. package/dist/api/types.d.ts +6 -0
  31. package/dist/api/types.d.ts.map +1 -1
  32. package/dist/api/types.js.map +1 -1
  33. package/dist/memory/AgentMemory.d.ts +2 -1
  34. package/dist/memory/AgentMemory.d.ts.map +1 -1
  35. package/dist/memory/AgentMemory.js +1 -1
  36. package/dist/memory/AgentMemory.js.map +1 -1
  37. package/dist/memory/CognitiveMemoryManager.d.ts.map +1 -1
  38. package/dist/memory/CognitiveMemoryManager.js +7 -2
  39. package/dist/memory/CognitiveMemoryManager.js.map +1 -1
  40. package/dist/memory/facade/Memory.d.ts.map +1 -1
  41. package/dist/memory/facade/Memory.js +6 -9
  42. package/dist/memory/facade/Memory.js.map +1 -1
  43. package/dist/memory/store/MemoryStore.d.ts +9 -0
  44. package/dist/memory/store/MemoryStore.d.ts.map +1 -1
  45. package/dist/memory/store/MemoryStore.js +66 -6
  46. package/dist/memory/store/MemoryStore.js.map +1 -1
  47. package/dist/memory/store/SqliteMemoryGraph.d.ts.map +1 -1
  48. package/dist/memory/store/SqliteMemoryGraph.js +27 -13
  49. package/dist/memory/store/SqliteMemoryGraph.js.map +1 -1
  50. package/dist/speech/FallbackProxy.d.ts +194 -41
  51. package/dist/speech/FallbackProxy.d.ts.map +1 -1
  52. package/dist/speech/FallbackProxy.js +155 -32
  53. package/dist/speech/FallbackProxy.js.map +1 -1
  54. package/dist/speech/SpeechProviderResolver.d.ts +278 -36
  55. package/dist/speech/SpeechProviderResolver.d.ts.map +1 -1
  56. package/dist/speech/SpeechProviderResolver.js +306 -40
  57. package/dist/speech/SpeechProviderResolver.js.map +1 -1
  58. package/dist/speech/providers/AssemblyAISTTProvider.d.ts +119 -19
  59. package/dist/speech/providers/AssemblyAISTTProvider.d.ts.map +1 -1
  60. package/dist/speech/providers/AssemblyAISTTProvider.js +153 -25
  61. package/dist/speech/providers/AssemblyAISTTProvider.js.map +1 -1
  62. package/dist/speech/providers/AzureSpeechSTTProvider.d.ts +121 -17
  63. package/dist/speech/providers/AzureSpeechSTTProvider.d.ts.map +1 -1
  64. package/dist/speech/providers/AzureSpeechSTTProvider.js +122 -14
  65. package/dist/speech/providers/AzureSpeechSTTProvider.js.map +1 -1
  66. package/dist/speech/providers/AzureSpeechTTSProvider.d.ts +130 -15
  67. package/dist/speech/providers/AzureSpeechTTSProvider.d.ts.map +1 -1
  68. package/dist/speech/providers/AzureSpeechTTSProvider.js +163 -18
  69. package/dist/speech/providers/AzureSpeechTTSProvider.js.map +1 -1
  70. package/dist/speech/providers/BuiltInAdaptiveVadProvider.d.ts +159 -0
  71. package/dist/speech/providers/BuiltInAdaptiveVadProvider.d.ts.map +1 -1
  72. package/dist/speech/providers/BuiltInAdaptiveVadProvider.js +119 -0
  73. package/dist/speech/providers/BuiltInAdaptiveVadProvider.js.map +1 -1
  74. package/dist/speech/providers/DeepgramBatchSTTProvider.d.ts +102 -16
  75. package/dist/speech/providers/DeepgramBatchSTTProvider.d.ts.map +1 -1
  76. package/dist/speech/providers/DeepgramBatchSTTProvider.js +108 -13
  77. package/dist/speech/providers/DeepgramBatchSTTProvider.js.map +1 -1
  78. package/dist/speech/providers/ElevenLabsTextToSpeechProvider.d.ts +149 -0
  79. package/dist/speech/providers/ElevenLabsTextToSpeechProvider.d.ts.map +1 -1
  80. package/dist/speech/providers/ElevenLabsTextToSpeechProvider.js +137 -2
  81. package/dist/speech/providers/ElevenLabsTextToSpeechProvider.js.map +1 -1
  82. package/dist/speech/providers/OpenAITextToSpeechProvider.d.ts +125 -0
  83. package/dist/speech/providers/OpenAITextToSpeechProvider.d.ts.map +1 -1
  84. package/dist/speech/providers/OpenAITextToSpeechProvider.js +128 -4
  85. package/dist/speech/providers/OpenAITextToSpeechProvider.js.map +1 -1
  86. package/dist/speech/providers/OpenAIWhisperSpeechToTextProvider.d.ts +110 -0
  87. package/dist/speech/providers/OpenAIWhisperSpeechToTextProvider.d.ts.map +1 -1
  88. package/dist/speech/providers/OpenAIWhisperSpeechToTextProvider.js +115 -0
  89. package/dist/speech/providers/OpenAIWhisperSpeechToTextProvider.js.map +1 -1
  90. package/package.json +1 -1
@@ -1,48 +1,148 @@
1
1
  import type { SpeechAudioInput, SpeechToTextProvider, SpeechTranscriptionOptions, SpeechTranscriptionResult } from '../types.js';
2
- /** Configuration for the AssemblyAISTTProvider. */
2
+ /**
3
+ * Configuration for the {@link AssemblyAISTTProvider}.
4
+ *
5
+ * @see {@link AssemblyAISTTProvider} for usage examples
6
+ */
3
7
  export interface AssemblyAISTTProviderConfig {
4
- /** AssemblyAI API key. */
8
+ /**
9
+ * AssemblyAI API key used for authentication.
10
+ * Sent as the `Authorization` header value (without a prefix like "Bearer").
11
+ * Obtain from https://www.assemblyai.com/dashboard/account
12
+ */
5
13
  apiKey: string;
6
14
  /**
7
- * Custom fetch implementation, useful for testing.
8
- * Defaults to the global `fetch`.
15
+ * Custom fetch implementation for dependency injection in tests.
16
+ * When omitted, the global `fetch` is used.
17
+ * @default globalThis.fetch
9
18
  */
10
19
  fetchImpl?: typeof fetch;
11
20
  }
12
21
  /**
13
22
  * Speech-to-text provider that uses the AssemblyAI async transcription API.
14
23
  *
15
- * The three-step workflow is:
16
- * 1. **Upload** – POST the raw audio to `/v2/upload` to obtain an upload URL.
17
- * 2. **Submit** POST to `/v2/transcript` with the upload URL to start processing.
18
- * 3. **Poll** – GET `/v2/transcript/:id` every second until `status` is
19
- * `completed` or `error`, or until the optional timeout elapses.
24
+ * ## Three-Step Workflow
25
+ *
26
+ * AssemblyAI uses an asynchronous transcription pipeline that requires three
27
+ * sequential HTTP requests:
28
+ *
29
+ * 1. **Upload** — `POST /v2/upload` sends the raw audio bytes to AssemblyAI's
30
+ * CDN and returns an `upload_url`. This step is necessary because the
31
+ * transcript endpoint accepts URLs, not raw audio.
32
+ *
33
+ * 2. **Submit** — `POST /v2/transcript` creates a transcription job referencing
34
+ * the upload URL. Returns a transcript `id` used for polling. Optional
35
+ * features like `speaker_labels` are enabled in this request's JSON body.
36
+ *
37
+ * 3. **Poll** — `GET /v2/transcript/:id` is called every {@link POLL_INTERVAL_MS}
38
+ * (1 second) until the transcript `status` transitions to `'completed'` or
39
+ * `'error'`. The polling loop is bounded by {@link DEFAULT_TIMEOUT_MS}
40
+ * (120 seconds) to prevent indefinite waiting.
41
+ *
42
+ * ## AbortController Usage
43
+ *
44
+ * An optional `AbortSignal` can be passed via
45
+ * `options.providerSpecificOptions.signal` to cancel the transcription at any
46
+ * point. The signal is forwarded to all three fetch calls and also checked at
47
+ * the top of each polling iteration. When aborted, an error is thrown
48
+ * immediately without waiting for the current fetch to complete.
49
+ *
50
+ * ## Error Handling
51
+ *
52
+ * - Non-2xx responses at any step throw an `Error` with the HTTP status and body.
53
+ * - `status === 'error'` on the transcript throws with AssemblyAI's error message.
54
+ * - Timeout expiry throws with the transcript ID for manual inspection.
55
+ * - Aborted signals throw with a descriptive cancellation message.
56
+ *
57
+ * @see {@link AssemblyAISTTProviderConfig} for configuration options
58
+ * @see {@link AssemblyAITranscript} for the polling response shape
20
59
  *
21
60
  * @example
22
61
  * ```ts
23
- * const provider = new AssemblyAISTTProvider({ apiKey: process.env.ASSEMBLYAI_API_KEY! });
24
- * const result = await provider.transcribe({ data: audioBuffer }, { enableSpeakerDiarization: true });
25
- * console.log(result.text);
62
+ * const provider = new AssemblyAISTTProvider({
63
+ * apiKey: process.env.ASSEMBLYAI_API_KEY!,
64
+ * });
65
+ *
66
+ * // Basic transcription
67
+ * const result = await provider.transcribe({ data: audioBuffer });
68
+ *
69
+ * // With diarization and cancellation support
70
+ * const controller = new AbortController();
71
+ * const result = await provider.transcribe(
72
+ * { data: audioBuffer },
73
+ * {
74
+ * enableSpeakerDiarization: true,
75
+ * providerSpecificOptions: { signal: controller.signal },
76
+ * },
77
+ * );
26
78
  * ```
27
79
  */
28
80
  export declare class AssemblyAISTTProvider implements SpeechToTextProvider {
29
81
  private readonly config;
82
+ /** Unique provider identifier used for registration and resolution. */
30
83
  readonly id = "assemblyai";
84
+ /** Human-readable display name for UI and logging. */
31
85
  readonly displayName = "AssemblyAI";
86
+ /**
87
+ * Streaming is not supported by this provider's async pipeline.
88
+ * AssemblyAI does offer a separate real-time streaming API via WebSocket,
89
+ * but that would be a different provider implementation.
90
+ */
32
91
  readonly supportsStreaming = false;
92
+ /** Fetch implementation — injected for testability, defaults to global fetch. */
33
93
  private readonly fetchImpl;
94
+ /**
95
+ * Creates a new AssemblyAISTTProvider.
96
+ *
97
+ * @param config - Provider configuration including the API key.
98
+ *
99
+ * @example
100
+ * ```ts
101
+ * const provider = new AssemblyAISTTProvider({
102
+ * apiKey: 'your-assemblyai-api-key',
103
+ * });
104
+ * ```
105
+ */
34
106
  constructor(config: AssemblyAISTTProviderConfig);
35
- /** Returns the human-readable provider name. */
107
+ /**
108
+ * Returns the human-readable provider name.
109
+ *
110
+ * @returns The display name string `'AssemblyAI'`.
111
+ *
112
+ * @example
113
+ * ```ts
114
+ * provider.getProviderName(); // 'AssemblyAI'
115
+ * ```
116
+ */
36
117
  getProviderName(): string;
37
118
  /**
38
- * Transcribes an audio buffer via the AssemblyAI async pipeline.
119
+ * Transcribes an audio buffer via the AssemblyAI three-step async pipeline:
120
+ * upload, submit, and poll.
39
121
  *
40
- * @param audio - Raw audio data and associated metadata.
122
+ * @param audio - Raw audio data and associated metadata. The `data` buffer
123
+ * is uploaded to AssemblyAI's CDN in step 1.
41
124
  * @param options - Optional transcription settings. Pass
42
- * `providerSpecificOptions.signal` (an {@link AbortSignal}) to cancel.
43
- * @returns A promise resolving to the normalised transcription result.
44
- * @throws When the API returns a non-2xx status, when transcription fails,
45
- * or when the 120-second timeout is exceeded.
125
+ * `providerSpecificOptions.signal` (an {@link AbortSignal}) to cancel
126
+ * at any point in the pipeline.
127
+ * @returns A promise resolving to the normalized transcription result.
128
+ * @throws {Error} When the upload API returns a non-2xx status.
129
+ * @throws {Error} When the transcript submit API returns a non-2xx status.
130
+ * @throws {Error} When the polling API returns a non-2xx status.
131
+ * @throws {Error} When the transcript status becomes `'error'` (includes
132
+ * AssemblyAI's error message, e.g. "Audio file could not be decoded").
133
+ * @throws {Error} When the 120-second timeout is exceeded (includes the
134
+ * transcript ID for manual inspection via the AssemblyAI dashboard).
135
+ * @throws {Error} When the caller's AbortSignal is triggered.
136
+ *
137
+ * @example
138
+ * ```ts
139
+ * const result = await provider.transcribe(
140
+ * { data: wavBuffer, mimeType: 'audio/wav' },
141
+ * { enableSpeakerDiarization: true, language: 'en' },
142
+ * );
143
+ * console.log(result.text);
144
+ * console.log(result.segments?.map(s => `[${s.speaker}] ${s.text}`));
145
+ * ```
46
146
  */
47
147
  transcribe(audio: SpeechAudioInput, options?: SpeechTranscriptionOptions): Promise<SpeechTranscriptionResult>;
48
148
  }
@@ -1 +1 @@
1
- {"version":3,"file":"AssemblyAISTTProvider.d.ts","sourceRoot":"","sources":["../../../src/speech/providers/AssemblyAISTTProvider.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,gBAAgB,EAChB,oBAAoB,EACpB,0BAA0B,EAC1B,yBAAyB,EAE1B,MAAM,aAAa,CAAC;AAErB,mDAAmD;AACnD,MAAM,WAAW,2BAA2B;IAC1C,0BAA0B;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,SAAS,CAAC,EAAE,OAAO,KAAK,CAAC;CAC1B;AAsDD;;;;;;;;;;;;;;;GAeG;AACH,qBAAa,qBAAsB,YAAW,oBAAoB;IAOpD,OAAO,CAAC,QAAQ,CAAC,MAAM;IANnC,SAAgB,EAAE,gBAAgB;IAClC,SAAgB,WAAW,gBAAgB;IAC3C,SAAgB,iBAAiB,SAAS;IAE1C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAe;gBAEZ,MAAM,EAAE,2BAA2B;IAIhE,gDAAgD;IAChD,eAAe,IAAI,MAAM;IAIzB;;;;;;;;;OASG;IACG,UAAU,CACd,KAAK,EAAE,gBAAgB,EACvB,OAAO,GAAE,0BAA+B,GACvC,OAAO,CAAC,yBAAyB,CAAC;CAqGtC"}
1
+ {"version":3,"file":"AssemblyAISTTProvider.d.ts","sourceRoot":"","sources":["../../../src/speech/providers/AssemblyAISTTProvider.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,gBAAgB,EAChB,oBAAoB,EACpB,0BAA0B,EAC1B,yBAAyB,EAE1B,MAAM,aAAa,CAAC;AAErB;;;;GAIG;AACH,MAAM,WAAW,2BAA2B;IAC1C;;;;OAIG;IACH,MAAM,EAAE,MAAM,CAAC;IAEf;;;;OAIG;IACH,SAAS,CAAC,EAAE,OAAO,KAAK,CAAC;CAC1B;AAmHD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0DG;AACH,qBAAa,qBAAsB,YAAW,oBAAoB;IA6BpD,OAAO,CAAC,QAAQ,CAAC,MAAM;IA5BnC,uEAAuE;IACvE,SAAgB,EAAE,gBAAgB;IAElC,sDAAsD;IACtD,SAAgB,WAAW,gBAAgB;IAE3C;;;;OAIG;IACH,SAAgB,iBAAiB,SAAS;IAE1C,iFAAiF;IACjF,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAe;IAEzC;;;;;;;;;;;OAWG;gBAC0B,MAAM,EAAE,2BAA2B;IAIhE;;;;;;;;;OASG;IACH,eAAe,IAAI,MAAM;IAIzB;;;;;;;;;;;;;;;;;;;;;;;;;;;;OA4BG;IACG,UAAU,CACd,KAAK,EAAE,gBAAgB,EACvB,OAAO,GAAE,0BAA+B,GACvC,OAAO,CAAC,yBAAyB,CAAC;CAkHtC"}
@@ -1,20 +1,45 @@
1
+ /** Base URL for all AssemblyAI API v2 endpoints. */
1
2
  const ASSEMBLYAI_BASE = 'https://api.assemblyai.com/v2';
2
- /** Maximum time (ms) to wait for a transcript before rejecting. */
3
+ /**
4
+ * Maximum time (in milliseconds) to wait for a transcript to complete
5
+ * before throwing a timeout error.
6
+ *
7
+ * 120 seconds is generous — most transcripts complete within 10–30 seconds.
8
+ * The timeout exists to prevent indefinite polling in case of AssemblyAI
9
+ * service degradation or stuck transcripts.
10
+ */
3
11
  const DEFAULT_TIMEOUT_MS = 120000;
4
- /** Polling interval (ms) between transcript status checks. */
12
+ /**
13
+ * Polling interval (in milliseconds) between transcript status checks.
14
+ *
15
+ * 1 second strikes a balance between responsiveness and API rate limiting.
16
+ * AssemblyAI does not document a rate limit for polling, but 1-second
17
+ * intervals are considered polite and are used in their official examples.
18
+ */
5
19
  const POLL_INTERVAL_MS = 1000;
6
20
  /**
7
21
  * Maps AssemblyAI word objects to {@link SpeechTranscriptionSegment} entries.
8
22
  *
9
23
  * Each word becomes its own segment so that per-word timing and speaker
10
- * attribution are preserved in the normalised result.
24
+ * attribution are preserved in the normalized result.
25
+ *
26
+ * **Important:** AssemblyAI returns word timings in milliseconds, so we
27
+ * divide by 1000 to convert to seconds for consistency with our normalized
28
+ * {@link SpeechTranscriptionSegment} interface (which uses seconds).
29
+ *
30
+ * @param words - Array of AssemblyAI word objects with millisecond timings.
31
+ * @returns An array of normalized transcription segments with second-based timings.
32
+ *
33
+ * @see {@link AssemblyAIWord} for the input shape
34
+ * @see {@link SpeechTranscriptionSegment} for the output shape
11
35
  */
12
36
  function wordsToSegments(words) {
13
37
  return words.map((w) => ({
14
38
  text: w.text,
15
- startTime: w.start / 1000, // AssemblyAI returns milliseconds
39
+ startTime: w.start / 1000, // AssemblyAI returns milliseconds -> convert to seconds
16
40
  endTime: w.end / 1000,
17
41
  confidence: w.confidence,
42
+ // Convert null speaker labels to undefined for type consistency
18
43
  speaker: w.speaker ?? undefined,
19
44
  words: [
20
45
  {
@@ -29,45 +54,139 @@ function wordsToSegments(words) {
29
54
  /**
30
55
  * Speech-to-text provider that uses the AssemblyAI async transcription API.
31
56
  *
32
- * The three-step workflow is:
33
- * 1. **Upload** – POST the raw audio to `/v2/upload` to obtain an upload URL.
34
- * 2. **Submit** POST to `/v2/transcript` with the upload URL to start processing.
35
- * 3. **Poll** – GET `/v2/transcript/:id` every second until `status` is
36
- * `completed` or `error`, or until the optional timeout elapses.
57
+ * ## Three-Step Workflow
58
+ *
59
+ * AssemblyAI uses an asynchronous transcription pipeline that requires three
60
+ * sequential HTTP requests:
61
+ *
62
+ * 1. **Upload** — `POST /v2/upload` sends the raw audio bytes to AssemblyAI's
63
+ * CDN and returns an `upload_url`. This step is necessary because the
64
+ * transcript endpoint accepts URLs, not raw audio.
65
+ *
66
+ * 2. **Submit** — `POST /v2/transcript` creates a transcription job referencing
67
+ * the upload URL. Returns a transcript `id` used for polling. Optional
68
+ * features like `speaker_labels` are enabled in this request's JSON body.
69
+ *
70
+ * 3. **Poll** — `GET /v2/transcript/:id` is called every {@link POLL_INTERVAL_MS}
71
+ * (1 second) until the transcript `status` transitions to `'completed'` or
72
+ * `'error'`. The polling loop is bounded by {@link DEFAULT_TIMEOUT_MS}
73
+ * (120 seconds) to prevent indefinite waiting.
74
+ *
75
+ * ## AbortController Usage
76
+ *
77
+ * An optional `AbortSignal` can be passed via
78
+ * `options.providerSpecificOptions.signal` to cancel the transcription at any
79
+ * point. The signal is forwarded to all three fetch calls and also checked at
80
+ * the top of each polling iteration. When aborted, an error is thrown
81
+ * immediately without waiting for the current fetch to complete.
82
+ *
83
+ * ## Error Handling
84
+ *
85
+ * - Non-2xx responses at any step throw an `Error` with the HTTP status and body.
86
+ * - `status === 'error'` on the transcript throws with AssemblyAI's error message.
87
+ * - Timeout expiry throws with the transcript ID for manual inspection.
88
+ * - Aborted signals throw with a descriptive cancellation message.
89
+ *
90
+ * @see {@link AssemblyAISTTProviderConfig} for configuration options
91
+ * @see {@link AssemblyAITranscript} for the polling response shape
37
92
  *
38
93
  * @example
39
94
  * ```ts
40
- * const provider = new AssemblyAISTTProvider({ apiKey: process.env.ASSEMBLYAI_API_KEY! });
41
- * const result = await provider.transcribe({ data: audioBuffer }, { enableSpeakerDiarization: true });
42
- * console.log(result.text);
95
+ * const provider = new AssemblyAISTTProvider({
96
+ * apiKey: process.env.ASSEMBLYAI_API_KEY!,
97
+ * });
98
+ *
99
+ * // Basic transcription
100
+ * const result = await provider.transcribe({ data: audioBuffer });
101
+ *
102
+ * // With diarization and cancellation support
103
+ * const controller = new AbortController();
104
+ * const result = await provider.transcribe(
105
+ * { data: audioBuffer },
106
+ * {
107
+ * enableSpeakerDiarization: true,
108
+ * providerSpecificOptions: { signal: controller.signal },
109
+ * },
110
+ * );
43
111
  * ```
44
112
  */
45
113
  export class AssemblyAISTTProvider {
114
+ /**
115
+ * Creates a new AssemblyAISTTProvider.
116
+ *
117
+ * @param config - Provider configuration including the API key.
118
+ *
119
+ * @example
120
+ * ```ts
121
+ * const provider = new AssemblyAISTTProvider({
122
+ * apiKey: 'your-assemblyai-api-key',
123
+ * });
124
+ * ```
125
+ */
46
126
  constructor(config) {
47
127
  this.config = config;
128
+ /** Unique provider identifier used for registration and resolution. */
48
129
  this.id = 'assemblyai';
130
+ /** Human-readable display name for UI and logging. */
49
131
  this.displayName = 'AssemblyAI';
132
+ /**
133
+ * Streaming is not supported by this provider's async pipeline.
134
+ * AssemblyAI does offer a separate real-time streaming API via WebSocket,
135
+ * but that would be a different provider implementation.
136
+ */
50
137
  this.supportsStreaming = false;
51
138
  this.fetchImpl = config.fetchImpl ?? fetch;
52
139
  }
53
- /** Returns the human-readable provider name. */
140
+ /**
141
+ * Returns the human-readable provider name.
142
+ *
143
+ * @returns The display name string `'AssemblyAI'`.
144
+ *
145
+ * @example
146
+ * ```ts
147
+ * provider.getProviderName(); // 'AssemblyAI'
148
+ * ```
149
+ */
54
150
  getProviderName() {
55
151
  return this.displayName;
56
152
  }
57
153
  /**
58
- * Transcribes an audio buffer via the AssemblyAI async pipeline.
154
+ * Transcribes an audio buffer via the AssemblyAI three-step async pipeline:
155
+ * upload, submit, and poll.
59
156
  *
60
- * @param audio - Raw audio data and associated metadata.
157
+ * @param audio - Raw audio data and associated metadata. The `data` buffer
158
+ * is uploaded to AssemblyAI's CDN in step 1.
61
159
  * @param options - Optional transcription settings. Pass
62
- * `providerSpecificOptions.signal` (an {@link AbortSignal}) to cancel.
63
- * @returns A promise resolving to the normalised transcription result.
64
- * @throws When the API returns a non-2xx status, when transcription fails,
65
- * or when the 120-second timeout is exceeded.
160
+ * `providerSpecificOptions.signal` (an {@link AbortSignal}) to cancel
161
+ * at any point in the pipeline.
162
+ * @returns A promise resolving to the normalized transcription result.
163
+ * @throws {Error} When the upload API returns a non-2xx status.
164
+ * @throws {Error} When the transcript submit API returns a non-2xx status.
165
+ * @throws {Error} When the polling API returns a non-2xx status.
166
+ * @throws {Error} When the transcript status becomes `'error'` (includes
167
+ * AssemblyAI's error message, e.g. "Audio file could not be decoded").
168
+ * @throws {Error} When the 120-second timeout is exceeded (includes the
169
+ * transcript ID for manual inspection via the AssemblyAI dashboard).
170
+ * @throws {Error} When the caller's AbortSignal is triggered.
171
+ *
172
+ * @example
173
+ * ```ts
174
+ * const result = await provider.transcribe(
175
+ * { data: wavBuffer, mimeType: 'audio/wav' },
176
+ * { enableSpeakerDiarization: true, language: 'en' },
177
+ * );
178
+ * console.log(result.text);
179
+ * console.log(result.segments?.map(s => `[${s.speaker}] ${s.text}`));
180
+ * ```
66
181
  */
67
182
  async transcribe(audio, options = {}) {
183
+ // Extract the optional AbortSignal for cancellation support.
184
+ // Cast is safe because we document the expected type in the JSDoc.
68
185
  const signal = options.providerSpecificOptions?.signal;
69
186
  const timeoutMs = DEFAULT_TIMEOUT_MS;
70
- // ── Step 1: Upload audio ────────────────────────────────────────────────
187
+ // ── Step 1: Upload audio to AssemblyAI's CDN ──────────────────────────
188
+ // The upload endpoint returns an `upload_url` that the transcript
189
+ // endpoint can reference. This avoids sending raw bytes to /transcript.
71
190
  const uploadResponse = await this.fetchImpl(`${ASSEMBLYAI_BASE}/upload`, {
72
191
  method: 'POST',
73
192
  headers: {
@@ -82,7 +201,9 @@ export class AssemblyAISTTProvider {
82
201
  throw new Error(`AssemblyAI upload failed (${uploadResponse.status}): ${msg}`);
83
202
  }
84
203
  const { upload_url } = (await uploadResponse.json());
85
- // ── Step 2: Submit transcript request ───────────────────────────────────
204
+ // ── Step 2: Submit transcript request ─────────────────────────────────
205
+ // Create a transcription job with the uploaded audio URL and any
206
+ // optional features like speaker diarization.
86
207
  const submitPayload = {
87
208
  audio_url: upload_url,
88
209
  speaker_labels: options.enableSpeakerDiarization ?? false,
@@ -103,12 +224,16 @@ export class AssemblyAISTTProvider {
103
224
  throw new Error(`AssemblyAI transcript submit failed (${submitResponse.status}): ${msg}`);
104
225
  }
105
226
  const { id } = (await submitResponse.json());
106
- // ── Step 3: Poll until completed ────────────────────────────────────────
227
+ // ── Step 3: Poll until completed or error ─────────────────────────────
228
+ // Check the transcript status every POLL_INTERVAL_MS until it reaches
229
+ // a terminal state or the timeout is exceeded.
107
230
  const deadline = Date.now() + timeoutMs;
108
231
  while (true) {
232
+ // Check for caller-initiated cancellation before each poll
109
233
  if (signal?.aborted) {
110
234
  throw new Error('AssemblyAI transcription aborted by caller signal');
111
235
  }
236
+ // Check for timeout before each poll to avoid one extra unnecessary request
112
237
  if (Date.now() >= deadline) {
113
238
  throw new Error(`AssemblyAI transcription timed out after ${timeoutMs / 1000}s (transcript id: ${id})`);
114
239
  }
@@ -121,9 +246,11 @@ export class AssemblyAISTTProvider {
121
246
  throw new Error(`AssemblyAI poll failed (${pollResponse.status}): ${msg}`);
122
247
  }
123
248
  const transcript = (await pollResponse.json());
249
+ // Terminal state: transcription failed on AssemblyAI's side
124
250
  if (transcript.status === 'error') {
125
251
  throw new Error(`AssemblyAI transcription error: ${transcript.error ?? 'unknown error'}`);
126
252
  }
253
+ // Terminal state: transcription succeeded — normalize and return
127
254
  if (transcript.status === 'completed') {
128
255
  const text = transcript.text ?? '';
129
256
  const durationSeconds = transcript.audio_duration ?? audio.durationSeconds;
@@ -133,17 +260,18 @@ export class AssemblyAISTTProvider {
133
260
  language: transcript.language_code ?? options.language,
134
261
  durationSeconds,
135
262
  confidence: transcript.confidence ?? undefined,
136
- cost: 0,
263
+ cost: 0, // Cost tracking is handled at a higher layer
137
264
  segments: words.length > 0 ? wordsToSegments(words) : undefined,
138
265
  providerResponse: transcript,
139
- isFinal: true,
266
+ isFinal: true, // Async API always returns final results
140
267
  usage: {
141
268
  durationMinutes: (durationSeconds ?? 0) / 60,
142
269
  modelUsed: 'assemblyai',
143
270
  },
144
271
  };
145
272
  }
146
- // Still queued or processing — wait before polling again.
273
+ // Non-terminal state ('queued' or 'processing') — wait before polling again.
274
+ // Using setTimeout instead of a busy loop to yield the event loop.
147
275
  await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
148
276
  }
149
277
  }
@@ -1 +1 @@
1
- {"version":3,"file":"AssemblyAISTTProvider.js","sourceRoot":"","sources":["../../../src/speech/providers/AssemblyAISTTProvider.ts"],"names":[],"mappings":"AAyCA,MAAM,eAAe,GAAG,+BAA+B,CAAC;AACxD,mEAAmE;AACnE,MAAM,kBAAkB,GAAG,MAAO,CAAC;AACnC,8DAA8D;AAC9D,MAAM,gBAAgB,GAAG,IAAK,CAAC;AAE/B;;;;;GAKG;AACH,SAAS,eAAe,CAAC,KAAuB;IAC9C,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACvB,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,SAAS,EAAE,CAAC,CAAC,KAAK,GAAG,IAAI,EAAE,kCAAkC;QAC7D,OAAO,EAAE,CAAC,CAAC,GAAG,GAAG,IAAI;QACrB,UAAU,EAAE,CAAC,CAAC,UAAU;QACxB,OAAO,EAAE,CAAC,CAAC,OAAO,IAAI,SAAS;QAC/B,KAAK,EAAE;YACL;gBACE,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,KAAK,EAAE,CAAC,CAAC,KAAK,GAAG,IAAI;gBACrB,GAAG,EAAE,CAAC,CAAC,GAAG,GAAG,IAAI;gBACjB,UAAU,EAAE,CAAC,CAAC,UAAU;aACzB;SACF;KACF,CAAC,CAAC,CAAC;AACN,CAAC;AAED;;;;;;;;;;;;;;;GAeG;AACH,MAAM,OAAO,qBAAqB;IAOhC,YAA6B,MAAmC;QAAnC,WAAM,GAAN,MAAM,CAA6B;QANhD,OAAE,GAAG,YAAY,CAAC;QAClB,gBAAW,GAAG,YAAY,CAAC;QAC3B,sBAAiB,GAAG,KAAK,CAAC;QAKxC,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,SAAS,IAAI,KAAK,CAAC;IAC7C,CAAC;IAED,gDAAgD;IAChD,eAAe;QACb,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;IAED;;;;;;;;;OASG;IACH,KAAK,CAAC,UAAU,CACd,KAAuB,EACvB,UAAsC,EAAE;QAExC,MAAM,MAAM,GAAG,OAAO,CAAC,uBAAuB,EAAE,MAAiC,CAAC;QAClF,MAAM,SAAS,GAAG,kBAAkB,CAAC;QAErC,2EAA2E;QAC3E,MAAM,cAAc,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,GAAG,eAAe,SAAS,EAAE;YACvE,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,aAAa,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM;gBACjC,cAAc,EAAE,KAAK,CAAC,QAAQ,IAAI,WAAW;aAC9C;YACD,IAAI,EAAE,KAAK,CAAC,IAA2B;YACvC,MAAM;SACP,CAAC,CAAC;QAEH,IAAI,CAAC,cAAc,CAAC,EAAE,EAAE,CAAC;YACvB,MAAM,GAAG,GAAG,MAAM,cAAc,CAAC,IAAI,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CAAC,6BAA6B,cAAc,CAAC,MAAM,MAAM,GAAG,EAAE,CAAC,CAAC;QACjF,CAAC;QAED,MAAM,EAAE,UAAU,EAAE,GAAG,CAAC,MAAM,cAAc,CAAC,IAAI,EAAE,CAA2B,CAAC;QAE/E,2EAA2E;QAC3E,MAAM,aAAa,GAA4B;YAC7C,SAAS,EAAE,UAAU;YACrB,cAAc,EAAE,OAAO,CAAC,wBAAwB,IAAI,KAAK;SAC1D,CAAC;QACF,IAAI,OAAO,CAAC,QAAQ;YAAE,aAAa,CAAC,aAAa,GAAG,OAAO,CAAC,QAAQ,CAAC;QAErE,MAAM,cAAc,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,GAAG,eAAe,aAAa,EAAE;YAC3E,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,aAAa,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM;gBACjC,cAAc,EAAE,kBAAkB;aACnC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC;YACnC,MAAM;SACP,CAAC,CAAC;QAEH,IAAI,CAAC,cAAc,CAAC,EAAE,EAAE,CAAC;YACvB,MAAM,GAAG,GAAG,MAAM,cAAc,CAAC,IAAI,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CAAC,wCAAwC,cAAc,CAAC,MAAM,MAAM,GAAG,EAAE,CAAC,CAAC;QAC5F,CAAC;QAED,MAAM,EAAE,EAAE,EAAE,GAAG,CAAC,MAAM,cAAc,CAAC,IAAI,EAAE,CAAmB,CAAC;QAE/D,2EAA2E;QAC3E,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;QAExC,OAAO,IAAI,EAAE,CAAC;YACZ,IAAI,MAAM,EAAE,OAAO,EAAE,CAAC;gBACpB,MAAM,IAAI,KAAK,CAAC,mDAAmD,CAAC,CAAC;YACvE,CAAC;YAED,IAAI,IAAI,CAAC,GAAG,EAAE,IAAI,QAAQ,EAAE,CAAC;gBAC3B,MAAM,IAAI,KAAK,CACb,4CAA4C,SAAS,GAAG,IAAI,qBAAqB,EAAE,GAAG,CACvF,CAAC;YACJ,CAAC;YAED,MAAM,YAAY,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,GAAG,eAAe,eAAe,EAAE,EAAE,EAAE;gBAC/E,OAAO,EAAE,EAAE,aAAa,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE;gBAC9C,MAAM;aACP,CAAC,CAAC;YAEH,IAAI,CAAC,YAAY,CAAC,EAAE,EAAE,CAAC;gBACrB,MAAM,GAAG,GAAG,MAAM,YAAY,CAAC,IAAI,EAAE,CAAC;gBACtC,MAAM,IAAI,KAAK,CAAC,2BAA2B,YAAY,CAAC,MAAM,MAAM,GAAG,EAAE,CAAC,CAAC;YAC7E,CAAC;YAED,MAAM,UAAU,GAAG,CAAC,MAAM,YAAY,CAAC,IAAI,EAAE,CAAyB,CAAC;YAEvE,IAAI,UAAU,CAAC,MAAM,KAAK,OAAO,EAAE,CAAC;gBAClC,MAAM,IAAI,KAAK,CAAC,mCAAmC,UAAU,CAAC,KAAK,IAAI,eAAe,EAAE,CAAC,CAAC;YAC5F,CAAC;YAED,IAAI,UAAU,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;gBACtC,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,IAAI,EAAE,CAAC;gBACnC,MAAM,eAAe,GAAG,UAAU,CAAC,cAAc,IAAI,KAAK,CAAC,eAAe,CAAC;gBAC3E,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,IAAI,EAAE,CAAC;gBAErC,OAAO;oBACL,IAAI;oBACJ,QAAQ,EAAE,UAAU,CAAC,aAAa,IAAI,OAAO,CAAC,QAAQ;oBACtD,eAAe;oBACf,UAAU,EAAE,UAAU,CAAC,UAAU,IAAI,SAAS;oBAC9C,IAAI,EAAE,CAAC;oBACP,QAAQ,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS;oBAC/D,gBAAgB,EAAE,UAAU;oBAC5B,OAAO,EAAE,IAAI;oBACb,KAAK,EAAE;wBACL,eAAe,EAAE,CAAC,eAAe,IAAI,CAAC,CAAC,GAAG,EAAE;wBAC5C,SAAS,EAAE,YAAY;qBACxB;iBACF,CAAC;YACJ,CAAC;YAED,0DAA0D;YAC1D,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,gBAAgB,CAAC,CAAC,CAAC;QAC9E,CAAC;IACH,CAAC;CACF"}
1
+ {"version":3,"file":"AssemblyAISTTProvider.js","sourceRoot":"","sources":["../../../src/speech/providers/AssemblyAISTTProvider.ts"],"names":[],"mappings":"AAqFA,oDAAoD;AACpD,MAAM,eAAe,GAAG,+BAA+B,CAAC;AAExD;;;;;;;GAOG;AACH,MAAM,kBAAkB,GAAG,MAAO,CAAC;AAEnC;;;;;;GAMG;AACH,MAAM,gBAAgB,GAAG,IAAK,CAAC;AAE/B;;;;;;;;;;;;;;;GAeG;AACH,SAAS,eAAe,CAAC,KAAuB;IAC9C,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACvB,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,SAAS,EAAE,CAAC,CAAC,KAAK,GAAG,IAAI,EAAE,wDAAwD;QACnF,OAAO,EAAE,CAAC,CAAC,GAAG,GAAG,IAAI;QACrB,UAAU,EAAE,CAAC,CAAC,UAAU;QACxB,gEAAgE;QAChE,OAAO,EAAE,CAAC,CAAC,OAAO,IAAI,SAAS;QAC/B,KAAK,EAAE;YACL;gBACE,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,KAAK,EAAE,CAAC,CAAC,KAAK,GAAG,IAAI;gBACrB,GAAG,EAAE,CAAC,CAAC,GAAG,GAAG,IAAI;gBACjB,UAAU,EAAE,CAAC,CAAC,UAAU;aACzB;SACF;KACF,CAAC,CAAC,CAAC;AACN,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0DG;AACH,MAAM,OAAO,qBAAqB;IAiBhC;;;;;;;;;;;OAWG;IACH,YAA6B,MAAmC;QAAnC,WAAM,GAAN,MAAM,CAA6B;QA5BhE,uEAAuE;QACvD,OAAE,GAAG,YAAY,CAAC;QAElC,sDAAsD;QACtC,gBAAW,GAAG,YAAY,CAAC;QAE3C;;;;WAIG;QACa,sBAAiB,GAAG,KAAK,CAAC;QAkBxC,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,SAAS,IAAI,KAAK,CAAC;IAC7C,CAAC;IAED;;;;;;;;;OASG;IACH,eAAe;QACb,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;IAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;OA4BG;IACH,KAAK,CAAC,UAAU,CACd,KAAuB,EACvB,UAAsC,EAAE;QAExC,6DAA6D;QAC7D,mEAAmE;QACnE,MAAM,MAAM,GAAG,OAAO,CAAC,uBAAuB,EAAE,MAAiC,CAAC;QAClF,MAAM,SAAS,GAAG,kBAAkB,CAAC;QAErC,yEAAyE;QACzE,kEAAkE;QAClE,wEAAwE;QACxE,MAAM,cAAc,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,GAAG,eAAe,SAAS,EAAE;YACvE,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,aAAa,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM;gBACjC,cAAc,EAAE,KAAK,CAAC,QAAQ,IAAI,WAAW;aAC9C;YACD,IAAI,EAAE,KAAK,CAAC,IAA2B;YACvC,MAAM;SACP,CAAC,CAAC;QAEH,IAAI,CAAC,cAAc,CAAC,EAAE,EAAE,CAAC;YACvB,MAAM,GAAG,GAAG,MAAM,cAAc,CAAC,IAAI,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CAAC,6BAA6B,cAAc,CAAC,MAAM,MAAM,GAAG,EAAE,CAAC,CAAC;QACjF,CAAC;QAED,MAAM,EAAE,UAAU,EAAE,GAAG,CAAC,MAAM,cAAc,CAAC,IAAI,EAAE,CAA2B,CAAC;QAE/E,yEAAyE;QACzE,iEAAiE;QACjE,8CAA8C;QAC9C,MAAM,aAAa,GAA4B;YAC7C,SAAS,EAAE,UAAU;YACrB,cAAc,EAAE,OAAO,CAAC,wBAAwB,IAAI,KAAK;SAC1D,CAAC;QACF,IAAI,OAAO,CAAC,QAAQ;YAAE,aAAa,CAAC,aAAa,GAAG,OAAO,CAAC,QAAQ,CAAC;QAErE,MAAM,cAAc,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,GAAG,eAAe,aAAa,EAAE;YAC3E,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,aAAa,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM;gBACjC,cAAc,EAAE,kBAAkB;aACnC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC;YACnC,MAAM;SACP,CAAC,CAAC;QAEH,IAAI,CAAC,cAAc,CAAC,EAAE,EAAE,CAAC;YACvB,MAAM,GAAG,GAAG,MAAM,cAAc,CAAC,IAAI,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CAAC,wCAAwC,cAAc,CAAC,MAAM,MAAM,GAAG,EAAE,CAAC,CAAC;QAC5F,CAAC;QAED,MAAM,EAAE,EAAE,EAAE,GAAG,CAAC,MAAM,cAAc,CAAC,IAAI,EAAE,CAAmB,CAAC;QAE/D,yEAAyE;QACzE,sEAAsE;QACtE,+CAA+C;QAC/C,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;QAExC,OAAO,IAAI,EAAE,CAAC;YACZ,2DAA2D;YAC3D,IAAI,MAAM,EAAE,OAAO,EAAE,CAAC;gBACpB,MAAM,IAAI,KAAK,CAAC,mDAAmD,CAAC,CAAC;YACvE,CAAC;YAED,4EAA4E;YAC5E,IAAI,IAAI,CAAC,GAAG,EAAE,IAAI,QAAQ,EAAE,CAAC;gBAC3B,MAAM,IAAI,KAAK,CACb,4CAA4C,SAAS,GAAG,IAAI,qBAAqB,EAAE,GAAG,CACvF,CAAC;YACJ,CAAC;YAED,MAAM,YAAY,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,GAAG,eAAe,eAAe,EAAE,EAAE,EAAE;gBAC/E,OAAO,EAAE,EAAE,aAAa,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE;gBAC9C,MAAM;aACP,CAAC,CAAC;YAEH,IAAI,CAAC,YAAY,CAAC,EAAE,EAAE,CAAC;gBACrB,MAAM,GAAG,GAAG,MAAM,YAAY,CAAC,IAAI,EAAE,CAAC;gBACtC,MAAM,IAAI,KAAK,CAAC,2BAA2B,YAAY,CAAC,MAAM,MAAM,GAAG,EAAE,CAAC,CAAC;YAC7E,CAAC;YAED,MAAM,UAAU,GAAG,CAAC,MAAM,YAAY,CAAC,IAAI,EAAE,CAAyB,CAAC;YAEvE,4DAA4D;YAC5D,IAAI,UAAU,CAAC,MAAM,KAAK,OAAO,EAAE,CAAC;gBAClC,MAAM,IAAI,KAAK,CAAC,mCAAmC,UAAU,CAAC,KAAK,IAAI,eAAe,EAAE,CAAC,CAAC;YAC5F,CAAC;YAED,iEAAiE;YACjE,IAAI,UAAU,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;gBACtC,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,IAAI,EAAE,CAAC;gBACnC,MAAM,eAAe,GAAG,UAAU,CAAC,cAAc,IAAI,KAAK,CAAC,eAAe,CAAC;gBAC3E,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,IAAI,EAAE,CAAC;gBAErC,OAAO;oBACL,IAAI;oBACJ,QAAQ,EAAE,UAAU,CAAC,aAAa,IAAI,OAAO,CAAC,QAAQ;oBACtD,eAAe;oBACf,UAAU,EAAE,UAAU,CAAC,UAAU,IAAI,SAAS;oBAC9C,IAAI,EAAE,CAAC,EAAE,6CAA6C;oBACtD,QAAQ,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS;oBAC/D,gBAAgB,EAAE,UAAU;oBAC5B,OAAO,EAAE,IAAI,EAAE,yCAAyC;oBACxD,KAAK,EAAE;wBACL,eAAe,EAAE,CAAC,eAAe,IAAI,CAAC,CAAC,GAAG,EAAE;wBAC5C,SAAS,EAAE,YAAY;qBACxB;iBACF,CAAC;YACJ,CAAC;YAED,6EAA6E;YAC7E,mEAAmE;YACnE,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,gBAAgB,CAAC,CAAC,CAAC;QAC9E,CAAC;IACH,CAAC;CACF"}
@@ -1,47 +1,151 @@
1
1
  import type { SpeechAudioInput, SpeechToTextProvider, SpeechTranscriptionOptions, SpeechTranscriptionResult } from '../types.js';
2
- /** Configuration for the AzureSpeechSTTProvider. */
2
+ /**
3
+ * Configuration for the {@link AzureSpeechSTTProvider}.
4
+ *
5
+ * @see {@link AzureSpeechSTTProvider} for usage examples
6
+ * @see https://learn.microsoft.com/azure/ai-services/speech-service/rest-speech-to-text
7
+ */
3
8
  export interface AzureSpeechSTTProviderConfig {
4
- /** Azure Cognitive Services subscription key. */
9
+ /**
10
+ * Azure Cognitive Services subscription key.
11
+ * Sent as the `Ocp-Apim-Subscription-Key` header — this is Azure's
12
+ * standard authentication mechanism for Cognitive Services REST APIs.
13
+ * Obtain from the Azure portal under your Speech resource's "Keys and Endpoint".
14
+ */
5
15
  key: string;
6
- /** Azure region, e.g. `'eastus'` or `'westeurope'`. */
16
+ /**
17
+ * Azure region where the Speech resource is deployed, e.g. `'eastus'`,
18
+ * `'westeurope'`, `'southeastasia'`.
19
+ *
20
+ * The region determines the REST endpoint hostname:
21
+ * `https://{region}.stt.speech.microsoft.com`
22
+ *
23
+ * @see https://learn.microsoft.com/azure/ai-services/speech-service/regions
24
+ */
7
25
  region: string;
8
26
  /**
9
- * Custom fetch implementation, useful for testing.
10
- * Defaults to the global `fetch`.
27
+ * Custom fetch implementation for dependency injection in tests.
28
+ * @default globalThis.fetch
11
29
  */
12
30
  fetchImpl?: typeof fetch;
13
31
  }
14
32
  /**
15
33
  * Speech-to-text provider that uses the Azure Cognitive Services Speech REST API.
16
34
  *
17
- * Sends WAV audio as a raw binary body and returns a normalised
18
- * {@link SpeechTranscriptionResult}. A `RecognitionStatus` of `'NoMatch'`
19
- * is mapped to an empty text result rather than an error, matching the
20
- * Azure SDK behaviour.
35
+ * ## Azure REST Endpoint Format
36
+ *
37
+ * The endpoint URL follows this pattern:
38
+ * ```
39
+ * https://{region}.stt.speech.microsoft.com/speech/recognition/conversation/cognitiveservices/v1?language={lang}
40
+ * ```
41
+ *
42
+ * - `{region}` — The Azure region from config (e.g. `eastus`, `westeurope`).
43
+ * - `{lang}` — BCP-47 language code from options or `'en-US'` default.
44
+ * - The `/conversation/` path segment selects the conversation recognition mode
45
+ * (as opposed to `/interactive/` or `/dictation/`).
46
+ *
47
+ * ## Authentication: `Ocp-Apim-Subscription-Key`
48
+ *
49
+ * Azure Cognitive Services uses the `Ocp-Apim-Subscription-Key` HTTP header
50
+ * for authentication, which differs from the typical `Authorization: Bearer`
51
+ * pattern. The subscription key is sent as a plain-text header value — no
52
+ * "Bearer" or "Token" prefix.
53
+ *
54
+ * An alternative is to use a short-lived token from the token endpoint, but
55
+ * this provider uses the simpler key-based approach for reliability.
56
+ *
57
+ * ## NoMatch Handling
58
+ *
59
+ * When Azure's recognizer detects audio but cannot identify any speech, it
60
+ * returns `RecognitionStatus: 'NoMatch'` instead of raising an HTTP error.
61
+ * This provider maps `NoMatch` to an empty-text result (`text: ''`) with
62
+ * `isFinal: true`, matching the Azure Speech SDK's behaviour. This prevents
63
+ * the fallback proxy from unnecessarily trying another provider when the
64
+ * audio genuinely contains no speech.
65
+ *
66
+ * ## Limitations
67
+ *
68
+ * - Audio must be PCM WAV format. The `Content-Type` is hardcoded to
69
+ * `audio/wav` regardless of the `audio.mimeType` value.
70
+ * - Streaming is not supported — use the Azure Speech SDK for real-time STT.
71
+ * - Speaker diarization is not available via the REST API.
72
+ *
73
+ * @see {@link AzureSpeechSTTProviderConfig} for configuration options
74
+ * @see {@link AzureSpeechTTSProvider} for the corresponding TTS provider
21
75
  *
22
76
  * @example
23
77
  * ```ts
24
- * const provider = new AzureSpeechSTTProvider({ key: process.env.AZURE_SPEECH_KEY!, region: 'eastus' });
25
- * const result = await provider.transcribe({ data: wavBuffer });
26
- * console.log(result.text);
78
+ * const provider = new AzureSpeechSTTProvider({
79
+ * key: process.env.AZURE_SPEECH_KEY!,
80
+ * region: 'eastus',
81
+ * });
82
+ * const result = await provider.transcribe(
83
+ * { data: wavBuffer, mimeType: 'audio/wav' },
84
+ * { language: 'de-DE' },
85
+ * );
86
+ * console.log(result.text); // '' if no speech detected
27
87
  * ```
28
88
  */
29
89
  export declare class AzureSpeechSTTProvider implements SpeechToTextProvider {
30
90
  private readonly config;
91
+ /** Unique provider identifier used for registration and resolution. */
31
92
  readonly id = "azure-speech-stt";
93
+ /** Human-readable display name for UI and logging. */
32
94
  readonly displayName = "Azure Speech (STT)";
95
+ /** This provider uses synchronous HTTP requests, not WebSocket streaming. */
33
96
  readonly supportsStreaming = false;
97
+ /** Fetch implementation — injected for testability, defaults to global fetch. */
34
98
  private readonly fetchImpl;
99
+ /**
100
+ * Creates a new AzureSpeechSTTProvider.
101
+ *
102
+ * @param config - Provider configuration including the subscription key and region.
103
+ *
104
+ * @example
105
+ * ```ts
106
+ * const provider = new AzureSpeechSTTProvider({
107
+ * key: 'your-azure-subscription-key',
108
+ * region: 'eastus',
109
+ * });
110
+ * ```
111
+ */
35
112
  constructor(config: AzureSpeechSTTProviderConfig);
36
- /** Returns the human-readable provider name. */
113
+ /**
114
+ * Returns the human-readable provider name.
115
+ *
116
+ * @returns The display name string `'Azure Speech (STT)'`.
117
+ *
118
+ * @example
119
+ * ```ts
120
+ * provider.getProviderName(); // 'Azure Speech (STT)'
121
+ * ```
122
+ */
37
123
  getProviderName(): string;
38
124
  /**
39
125
  * Transcribes an audio buffer using the Azure Speech recognition REST endpoint.
40
126
  *
41
- * @param audio - Raw audio data. Azure expects PCM WAV; pass `mimeType: 'audio/wav'`.
42
- * @param options - Optional transcription settings (language…).
43
- * @returns A promise resolving to the normalised transcription result.
44
- * @throws When the Azure API returns a non-2xx status.
127
+ * Sends the raw audio as PCM WAV and returns a normalized result. Azure's
128
+ * `NoMatch` status is treated as an empty transcript (not an error).
129
+ *
130
+ * @param audio - Raw audio data. Azure expects PCM WAV format; the
131
+ * Content-Type header is always set to `'audio/wav'` regardless of
132
+ * `audio.mimeType`.
133
+ * @param options - Optional transcription settings. Only `language` is
134
+ * supported by the Azure REST endpoint.
135
+ * @returns A promise resolving to the normalized transcription result.
136
+ * @throws {Error} When the Azure API returns a non-2xx HTTP status code.
137
+ * The error message includes the status and response body text.
138
+ *
139
+ * @example
140
+ * ```ts
141
+ * const result = await provider.transcribe(
142
+ * { data: wavBuffer, durationSeconds: 5 },
143
+ * { language: 'fr-FR' },
144
+ * );
145
+ * if (result.text === '') {
146
+ * console.log('No speech detected in the audio');
147
+ * }
148
+ * ```
45
149
  */
46
150
  transcribe(audio: SpeechAudioInput, options?: SpeechTranscriptionOptions): Promise<SpeechTranscriptionResult>;
47
151
  }
@@ -1 +1 @@
1
- {"version":3,"file":"AzureSpeechSTTProvider.d.ts","sourceRoot":"","sources":["../../../src/speech/providers/AzureSpeechSTTProvider.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,gBAAgB,EAChB,oBAAoB,EACpB,0BAA0B,EAC1B,yBAAyB,EAC1B,MAAM,aAAa,CAAC;AAErB,oDAAoD;AACpD,MAAM,WAAW,4BAA4B;IAC3C,iDAAiD;IACjD,GAAG,EAAE,MAAM,CAAC;IACZ,uDAAuD;IACvD,MAAM,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,SAAS,CAAC,EAAE,OAAO,KAAK,CAAC;CAC1B;AAoBD;;;;;;;;;;;;;;GAcG;AACH,qBAAa,sBAAuB,YAAW,oBAAoB;IAOrD,OAAO,CAAC,QAAQ,CAAC,MAAM;IANnC,SAAgB,EAAE,sBAAsB;IACxC,SAAgB,WAAW,wBAAwB;IACnD,SAAgB,iBAAiB,SAAS;IAE1C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAe;gBAEZ,MAAM,EAAE,4BAA4B;IAIjE,gDAAgD;IAChD,eAAe,IAAI,MAAM;IAIzB;;;;;;;OAOG;IACG,UAAU,CACd,KAAK,EAAE,gBAAgB,EACvB,OAAO,GAAE,0BAA+B,GACvC,OAAO,CAAC,yBAAyB,CAAC;CA0DtC"}
1
+ {"version":3,"file":"AzureSpeechSTTProvider.d.ts","sourceRoot":"","sources":["../../../src/speech/providers/AzureSpeechSTTProvider.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,gBAAgB,EAChB,oBAAoB,EACpB,0BAA0B,EAC1B,yBAAyB,EAC1B,MAAM,aAAa,CAAC;AAErB;;;;;GAKG;AACH,MAAM,WAAW,4BAA4B;IAC3C;;;;;OAKG;IACH,GAAG,EAAE,MAAM,CAAC;IAEZ;;;;;;;;OAQG;IACH,MAAM,EAAE,MAAM,CAAC;IAEf;;;OAGG;IACH,SAAS,CAAC,EAAE,OAAO,KAAK,CAAC;CAC1B;AA4DD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAwDG;AACH,qBAAa,sBAAuB,YAAW,oBAAoB;IA0BrD,OAAO,CAAC,QAAQ,CAAC,MAAM;IAzBnC,uEAAuE;IACvE,SAAgB,EAAE,sBAAsB;IAExC,sDAAsD;IACtD,SAAgB,WAAW,wBAAwB;IAEnD,6EAA6E;IAC7E,SAAgB,iBAAiB,SAAS;IAE1C,iFAAiF;IACjF,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAe;IAEzC;;;;;;;;;;;;OAYG;gBAC0B,MAAM,EAAE,4BAA4B;IAIjE;;;;;;;;;OASG;IACH,eAAe,IAAI,MAAM;IAIzB;;;;;;;;;;;;;;;;;;;;;;;;;OAyBG;IACG,UAAU,CACd,KAAK,EAAE,gBAAgB,EACvB,OAAO,GAAE,0BAA+B,GACvC,OAAO,CAAC,yBAAyB,CAAC;CAmEtC"}