@speech-sdk/core 0.6.2 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/LICENSE +202 -21
  2. package/README.md +215 -269
  3. package/dist/__tests__/e2e/_save-audio.d.ts +25 -2
  4. package/dist/__tests__/e2e/_save-audio.d.ts.map +1 -1
  5. package/dist/__tests__/e2e/_save-audio.js +46 -10
  6. package/dist/__tests__/e2e/_save-audio.js.map +1 -1
  7. package/dist/audio-utils.d.ts +2 -0
  8. package/dist/audio-utils.d.ts.map +1 -1
  9. package/dist/audio-utils.js +9 -0
  10. package/dist/audio-utils.js.map +1 -1
  11. package/dist/captions.d.ts +137 -0
  12. package/dist/captions.d.ts.map +1 -0
  13. package/dist/captions.js +283 -0
  14. package/dist/captions.js.map +1 -0
  15. package/dist/conversation/stitch.d.ts +5 -0
  16. package/dist/conversation/stitch.d.ts.map +1 -1
  17. package/dist/conversation/stitch.js +37 -0
  18. package/dist/conversation/stitch.js.map +1 -1
  19. package/dist/conversation/types.d.ts +16 -0
  20. package/dist/conversation/types.d.ts.map +1 -1
  21. package/dist/derive-timestamps.d.ts +14 -0
  22. package/dist/derive-timestamps.d.ts.map +1 -0
  23. package/dist/derive-timestamps.js +38 -0
  24. package/dist/derive-timestamps.js.map +1 -0
  25. package/dist/errors.d.ts +25 -0
  26. package/dist/errors.d.ts.map +1 -1
  27. package/dist/errors.js +28 -0
  28. package/dist/errors.js.map +1 -1
  29. package/dist/generate-conversation.d.ts +1 -1
  30. package/dist/generate-conversation.d.ts.map +1 -1
  31. package/dist/generate-conversation.js +59 -0
  32. package/dist/generate-conversation.js.map +1 -1
  33. package/dist/generate-speech.d.ts +18 -1
  34. package/dist/generate-speech.d.ts.map +1 -1
  35. package/dist/generate-speech.js +73 -16
  36. package/dist/generate-speech.js.map +1 -1
  37. package/dist/index.d.ts +6 -2
  38. package/dist/index.d.ts.map +1 -1
  39. package/dist/index.js +2 -1
  40. package/dist/index.js.map +1 -1
  41. package/dist/logger.d.ts +2 -0
  42. package/dist/logger.d.ts.map +1 -0
  43. package/dist/logger.js +40 -0
  44. package/dist/logger.js.map +1 -0
  45. package/dist/provider-utils.d.ts +8 -0
  46. package/dist/provider-utils.d.ts.map +1 -1
  47. package/dist/provider-utils.js +16 -2
  48. package/dist/provider-utils.js.map +1 -1
  49. package/dist/providers/cartesia/alignment.d.ts +24 -0
  50. package/dist/providers/cartesia/alignment.d.ts.map +1 -0
  51. package/dist/providers/cartesia/alignment.js +23 -0
  52. package/dist/providers/cartesia/alignment.js.map +1 -0
  53. package/dist/providers/cartesia/index.d.ts +12 -2
  54. package/dist/providers/cartesia/index.d.ts.map +1 -1
  55. package/dist/providers/cartesia/index.js +137 -2
  56. package/dist/providers/cartesia/index.js.map +1 -1
  57. package/dist/providers/elevenlabs/alignment.d.ts +24 -0
  58. package/dist/providers/elevenlabs/alignment.d.ts.map +1 -0
  59. package/dist/providers/elevenlabs/alignment.js +48 -0
  60. package/dist/providers/elevenlabs/alignment.js.map +1 -0
  61. package/dist/providers/elevenlabs/index.d.ts +19 -4
  62. package/dist/providers/elevenlabs/index.d.ts.map +1 -1
  63. package/dist/providers/elevenlabs/index.js +83 -13
  64. package/dist/providers/elevenlabs/index.js.map +1 -1
  65. package/dist/providers/fal/index.d.ts +0 -25
  66. package/dist/providers/fal/index.d.ts.map +1 -1
  67. package/dist/providers/fal/index.js +3 -58
  68. package/dist/providers/fal/index.js.map +1 -1
  69. package/dist/providers/hume/alignment.d.ts +38 -0
  70. package/dist/providers/hume/alignment.d.ts.map +1 -0
  71. package/dist/providers/hume/alignment.js +31 -0
  72. package/dist/providers/hume/alignment.js.map +1 -0
  73. package/dist/providers/hume/index.d.ts +8 -1
  74. package/dist/providers/hume/index.d.ts.map +1 -1
  75. package/dist/providers/hume/index.js +75 -1
  76. package/dist/providers/hume/index.js.map +1 -1
  77. package/dist/providers/inworld/alignment.d.ts +25 -0
  78. package/dist/providers/inworld/alignment.d.ts.map +1 -0
  79. package/dist/providers/inworld/alignment.js +23 -0
  80. package/dist/providers/inworld/alignment.js.map +1 -0
  81. package/dist/providers/inworld/index.d.ts +11 -2
  82. package/dist/providers/inworld/index.d.ts.map +1 -1
  83. package/dist/providers/inworld/index.js +11 -2
  84. package/dist/providers/inworld/index.js.map +1 -1
  85. package/dist/providers/murf/alignment.d.ts +22 -0
  86. package/dist/providers/murf/alignment.d.ts.map +1 -0
  87. package/dist/providers/murf/alignment.js +17 -0
  88. package/dist/providers/murf/alignment.js.map +1 -0
  89. package/dist/providers/murf/index.d.ts +8 -1
  90. package/dist/providers/murf/index.d.ts.map +1 -1
  91. package/dist/providers/murf/index.js +10 -1
  92. package/dist/providers/murf/index.js.map +1 -1
  93. package/dist/providers/openai/index.d.ts +12 -3
  94. package/dist/providers/openai/index.d.ts.map +1 -1
  95. package/dist/providers/openai/index.js +7 -3
  96. package/dist/providers/openai/index.js.map +1 -1
  97. package/dist/providers/resemble/alignment.d.ts +32 -0
  98. package/dist/providers/resemble/alignment.d.ts.map +1 -0
  99. package/dist/providers/resemble/alignment.js +57 -0
  100. package/dist/providers/resemble/alignment.js.map +1 -0
  101. package/dist/providers/resemble/index.d.ts +7 -1
  102. package/dist/providers/resemble/index.d.ts.map +1 -1
  103. package/dist/providers/resemble/index.js +13 -1
  104. package/dist/providers/resemble/index.js.map +1 -1
  105. package/dist/resolve-provider.d.ts.map +1 -1
  106. package/dist/resolve-provider.js +3 -12
  107. package/dist/resolve-provider.js.map +1 -1
  108. package/dist/speech-provider.d.ts +48 -4
  109. package/dist/speech-provider.d.ts.map +1 -1
  110. package/dist/speech-provider.js +16 -0
  111. package/dist/speech-provider.js.map +1 -1
  112. package/dist/speech-result.d.ts +10 -0
  113. package/dist/speech-result.d.ts.map +1 -1
  114. package/dist/speech-result.js.map +1 -1
  115. package/dist/speech-to-text-provider.d.ts +40 -0
  116. package/dist/speech-to-text-provider.d.ts.map +1 -0
  117. package/dist/speech-to-text-provider.js +2 -0
  118. package/dist/speech-to-text-provider.js.map +1 -0
  119. package/dist/stt-providers/openai/index.d.ts +42 -0
  120. package/dist/stt-providers/openai/index.d.ts.map +1 -0
  121. package/dist/stt-providers/openai/index.js +184 -0
  122. package/dist/stt-providers/openai/index.js.map +1 -0
  123. package/dist/timestamps.d.ts +23 -0
  124. package/dist/timestamps.d.ts.map +1 -0
  125. package/dist/timestamps.js +2 -0
  126. package/dist/timestamps.js.map +1 -0
  127. package/package.json +6 -2
@@ -0,0 +1,184 @@
1
+ import { parseMediaTypeParam, wrapPcm16Mono } from "../../audio-utils.js";
2
+ import { handleErrorResponse, resolveApiKey, SDK_USER_AGENT, } from "../../provider-utils.js";
3
+ // OpenAI Whisper is advertised as 50+ languages; we list the ISO-639-1 codes
4
+ // the API's `language` parameter accepts. Matches the TTS provider's list.
5
+ const OPENAI_STT_LANGUAGES = [
6
+ "af",
7
+ "ar",
8
+ "az",
9
+ "be",
10
+ "bg",
11
+ "bn",
12
+ "bs",
13
+ "ca",
14
+ "cs",
15
+ "cy",
16
+ "da",
17
+ "de",
18
+ "el",
19
+ "en",
20
+ "es",
21
+ "et",
22
+ "fa",
23
+ "fi",
24
+ "fr",
25
+ "gl",
26
+ "he",
27
+ "hi",
28
+ "hr",
29
+ "hu",
30
+ "hy",
31
+ "id",
32
+ "is",
33
+ "it",
34
+ "ja",
35
+ "kk",
36
+ "kn",
37
+ "ko",
38
+ "lt",
39
+ "lv",
40
+ "mi",
41
+ "mk",
42
+ "mr",
43
+ "ms",
44
+ "ne",
45
+ "nl",
46
+ "no",
47
+ "pl",
48
+ "pt",
49
+ "ro",
50
+ "ru",
51
+ "sk",
52
+ "sl",
53
+ "sr",
54
+ "sv",
55
+ "sw",
56
+ "ta",
57
+ "th",
58
+ "tl",
59
+ "tr",
60
+ "uk",
61
+ "ur",
62
+ "vi",
63
+ "zh",
64
+ ];
65
+ /**
66
+ * OpenAI Whisper / gpt-4o-transcribe adapter for the SDK's derived-timestamps
67
+ * path. Uses `/v1/audio/transcriptions` with `timestamp_granularities: ["word"]`
68
+ * and `response_format: "verbose_json"`.
69
+ *
70
+ * Note: `gpt-4o-transcribe-diarize` is intentionally not listed — that
71
+ * variant does not support `timestamp_granularities`.
72
+ */
73
+ export class OpenAISpeechToTextProvider {
74
+ id = "openai";
75
+ defaultModel = "whisper-1";
76
+ // Only whisper-1 supports word-level timestamps. The newer
77
+ // gpt-4o-transcribe / gpt-4o-mini-transcribe models accept `json` /
78
+ // `text` response formats only and don't expose `timestamp_granularities`,
79
+ // so they can't satisfy this provider's contract.
80
+ models = [
81
+ {
82
+ id: "whisper-1",
83
+ releaseDate: "2023-03-01",
84
+ languages: OPENAI_STT_LANGUAGES,
85
+ },
86
+ ];
87
+ apiKey;
88
+ baseURL;
89
+ fetchFn;
90
+ constructor(config = {}) {
91
+ this.apiKey = config.apiKey;
92
+ this.baseURL = config.baseURL ?? "https://api.openai.com/v1";
93
+ this.fetchFn = config.fetch ?? globalThis.fetch.bind(globalThis);
94
+ }
95
+ async transcribe(options) {
96
+ const { audio, mediaType } = await normalizeAudioForOpenAI(options.audio, options.mediaType);
97
+ const form = new FormData();
98
+ const filename = `audio.${mediaTypeToExtension(mediaType)}`;
99
+ // Cast via BlobPart: TS narrowing of Uint8Array<ArrayBufferLike> vs
100
+ // Blob's required ArrayBuffer-backed view is stricter than runtime.
101
+ form.append("file", new Blob([audio], { type: mediaType }), filename);
102
+ form.append("model", options.modelId);
103
+ form.append("response_format", "verbose_json");
104
+ form.append("timestamp_granularities[]", "word");
105
+ if (options.language) {
106
+ form.append("language", options.language);
107
+ }
108
+ const response = await this.fetchFn(`${this.baseURL}/audio/transcriptions`, {
109
+ method: "POST",
110
+ headers: {
111
+ Authorization: `Bearer ${resolveApiKey(this.apiKey, "OPENAI_API_KEY", "OpenAI")}`,
112
+ "X-User-Agent": SDK_USER_AGENT,
113
+ ...options.headers,
114
+ },
115
+ body: form,
116
+ signal: options.abortSignal,
117
+ });
118
+ await handleErrorResponse(response, `openai/${options.modelId}`);
119
+ const data = (await response.json());
120
+ const timestamps = (data.words ?? []).map((w) => ({
121
+ text: w.word,
122
+ start: w.start,
123
+ end: w.end,
124
+ }));
125
+ return {
126
+ timestamps,
127
+ text: data.text,
128
+ };
129
+ }
130
+ }
131
+ export function createOpenAISTT(config = {}) {
132
+ const provider = new OpenAISpeechToTextProvider(config);
133
+ return function openaiSTT(modelId) {
134
+ return {
135
+ provider,
136
+ modelId: modelId ?? provider.defaultModel,
137
+ };
138
+ };
139
+ }
140
+ // OpenAI transcription accepts mp3/mp4/mpeg/mpga/m4a/wav/webm/flac/ogg/opus
141
+ // but rejects raw PCM. When a TTS provider hands us raw little-endian PCM
142
+ // (stitch mode), we wrap it with a WAV header so the STT endpoint will
143
+ // parse it. `audio/l16` is intentionally NOT handled: RFC 2586 defines it
144
+ // as big-endian and `wrapPcm16Mono` writes little-endian — silently mis-
145
+ // wrapping would corrupt audio. No current provider emits L16; add an
146
+ // explicit byte-swap branch here if one does.
147
+ async function normalizeAudioForOpenAI(audio, mediaType) {
148
+ if (mediaTypeBase(mediaType) === "audio/pcm") {
149
+ const sampleRate = parseMediaTypeParam(mediaType, "rate") ?? 24_000;
150
+ return {
151
+ audio: await wrapPcm16Mono(audio, sampleRate),
152
+ mediaType: "audio/wav",
153
+ };
154
+ }
155
+ return { audio, mediaType };
156
+ }
157
+ function mediaTypeBase(mediaType) {
158
+ return mediaType.split(";")[0]?.trim().toLowerCase() ?? "";
159
+ }
160
+ function mediaTypeToExtension(mediaType) {
161
+ switch (mediaTypeBase(mediaType)) {
162
+ case "audio/mpeg":
163
+ case "audio/mp3":
164
+ return "mp3";
165
+ case "audio/wav":
166
+ case "audio/x-wav":
167
+ return "wav";
168
+ case "audio/ogg":
169
+ return "ogg";
170
+ case "audio/opus":
171
+ return "opus";
172
+ case "audio/flac":
173
+ return "flac";
174
+ case "audio/webm":
175
+ return "webm";
176
+ case "audio/mp4":
177
+ case "audio/m4a":
178
+ case "audio/x-m4a":
179
+ return "m4a";
180
+ default:
181
+ return "mp3";
182
+ }
183
+ }
184
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/stt-providers/openai/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAC;AAC1E,OAAO,EACL,mBAAmB,EACnB,aAAa,EACb,cAAc,GACf,MAAM,yBAAyB,CAAC;AAajC,6EAA6E;AAC7E,2EAA2E;AAC3E,MAAM,oBAAoB,GAAG;IAC3B,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;CACI,CAAC;AAEX;;;;;;;GAOG;AACH,MAAM,OAAO,0BAA0B;IAC5B,EAAE,GAAG,QAAQ,CAAC;IACd,YAAY,GAAG,WAAW,CAAC;IAEpC,2DAA2D;IAC3D,oEAAoE;IACpE,2EAA2E;IAC3E,kDAAkD;IACzC,MAAM,GAAG;QAChB;YACE,EAAE,EAAE,WAAW;YACf,WAAW,EAAE,YAAY;YACzB,SAAS,EAAE,oBAAoB;SAChC;KACO,CAAC;IAEM,MAAM,CAAqB;IAC3B,OAAO,CAAS;IAChB,OAAO,CAA0B;IAElD,YAAY,SAA2C,EAAE;QACvD,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAC5B,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,2BAA2B,CAAC;QAC7D,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,IAAI,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACnE,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,OAOhB;QAKC,MAAM,EAAE,KAAK,EAAE,SAAS,EAAE,GAAG,MAAM,uBAAuB,CACxD,OAAO,CAAC,KAAK,EACb,OAAO,CAAC,SAAS,CAClB,CAAC;QAEF,MAAM,IAAI,GAAG,IAAI,QAAQ,EAAE,CAAC;QAC5B,MAAM,QAAQ,GAAG,SAAS,oBAAoB,CAAC,SAAS,CAAC,EAAE,CAAC;QAC5D,oEAAoE;QACpE,oEAAoE;QACpE,IAAI,CAAC,MAAM,CACT,MAAM,EACN,IAAI,IAAI,CAAC,CAAC,KAAiB,CAAC,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC,EAClD,QAAQ,CACT,CAAC;QACF,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,OAAO,CAAC,OAAO,CAAC,CAAC;QACtC,IAAI,CAAC,MAAM,CAAC,iBAAiB,EAAE,cAAc,CAAC,CAAC;QAC/C,IAAI,CAAC,MAAM,CAAC,2BAA2B,EAAE,MAAM,CAAC,CAAC;QACjD,IAAI,OAAO,CAAC,QAAQ,EAAE,CAAC;YACrB,IAAI,CAAC,MAAM,CAAC,UAAU,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;QAC5C,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CACjC,GAAG,IAAI,CAAC,OAAO,uBAAuB,EACtC;YACE,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,aAAa,EAAE,UAAU,aAAa,CAAC,IAAI,CAAC,MAAM,EAAE,gBAAgB,EAAE,QAAQ,CAAC,EAAE;gBACjF,cAAc,EAAE,cAAc;gBAC9B,GAAG,OAAO,CAAC,OAAO;aACnB;YACD,IAAI,EAAE,IAAI;YACV,MAAM,EAAE,OAAO,CAAC,WAAW;SAC5B,CACF,CAAC;QAEF,MAAM,mBAAmB,CAAC,QAAQ,EAAE,UAAU,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC;QAEjE,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAGlC,CAAC;QAEF,MAAM,UAAU,GAAoB,CAAC,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACjE,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,KAAK,EAAE,CAAC,CAAC,KAAK;YACd,GAAG,EAAE,CAAC,CAAC,GAAG;SACX,CAAC,CAAC,CAAC;QAEJ,OAAO;YACL,UAAU;YACV,IAAI,EAAE,IAAI,CAAC,IAAI;SAChB,CAAC;IACJ,CAAC;CACF;AAED,MAAM,UAAU,eAAe,CAAC,SAA2C,EAAE;IAC3E,MAAM,QAAQ,GAAG,IAAI,0BAA0B,CAAC,MAAM,CAAC,CAAC;IAExD,OAAO,SAAS,SAAS,CAAC,OAAgB;QACxC,OAAO;YACL,QAAQ;YACR,OAAO,EAAE,OAAO,IAAI,QAAQ,CAAC,YAAY;SAC1C,CAAC;IACJ,CAAC,CAAC;AACJ,CAAC;AAED,4EAA4E;AAC5E,0EAA0E;AAC1E,uEAAuE;AACvE,0EAA0E;AAC1E,yEAAyE;AACzE,sEAAsE;AACtE,8CAA8C;AAC9C,KAAK,UAAU,uBAAuB,CACpC,KAAiB,EACjB,SAAiB;IAEjB,IAAI,aAAa,CAAC,SAAS,CAAC,KAAK,WAAW,EAAE,CAAC;QAC7C,MAAM,UAAU,GAAG,mBAAmB,CAAC,SAAS,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC;QACpE,OAAO;YACL,KAAK,EAAE,MAAM,aAAa,CAAC,KAAK,EAAE,UAAU,CAAC;YAC7C,SAAS,EAAE,WAAW;SACvB,CAAC;IACJ,CAAC;IACD,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC;AAC9B,CAAC;AAED,SAAS,aAAa,CAAC,SAAiB;IACtC,OAAO,SAAS,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,CAAC;AAC7D,CAAC;AAED,SAAS,oBAAoB,CAAC,SAAiB;IAC7C,QAAQ,aAAa,CAAC,SAAS,CAAC,EAAE,CAAC;QACjC,KAAK,YAAY,CAAC;QAClB,KAAK,WAAW;YACd,OAAO,KAAK,CAAC;QACf,KAAK,WAAW,CAAC;QACjB,KAAK,aAAa;YAChB,OAAO,KAAK,CAAC;QACf,KAAK,WAAW;YACd,OAAO,KAAK,CAAC;QACf,KAAK,YAAY;YACf,OAAO,MAAM,CAAC;QAChB,KAAK,YAAY;YACf,OAAO,MAAM,CAAC;QAChB,KAAK,YAAY;YACf,OAAO,MAAM,CAAC;QAChB,KAAK,WAAW,CAAC;QACjB,KAAK,WAAW,CAAC;QACjB,KAAK,aAAa;YAChB,OAAO,KAAK,CAAC;QACf;YACE,OAAO,KAAK,CAAC;IACjB,CAAC;AACH,CAAC"}
@@ -0,0 +1,23 @@
1
+ /**
2
+ * Word-granularity alignment data. Timestamps are always in seconds from
3
+ * the start of the generated audio. Providers that natively return character
4
+ * or phoneme granularity are aggregated to words internally.
5
+ */
6
+ export interface WordTimestamp {
7
+ readonly end: number;
8
+ readonly start: number;
9
+ readonly text: string;
10
+ }
11
+ /**
12
+ * Controls whether `generateSpeech()` returns word timestamps.
13
+ *
14
+ * - `"auto"` (default): return timestamps only if the TTS provider supplies
15
+ * them natively. Free, no extra API calls.
16
+ * - `"on"`: always return timestamps. Uses native data when available;
17
+ * otherwise falls back to a speech-to-text round-trip of the synthesized
18
+ * audio (cost + latency implications).
19
+ * - `"off"`: never return timestamps, even when the provider would give them
20
+ * away for free.
21
+ */
22
+ export type TimestampMode = "on" | "auto" | "off";
23
+ //# sourceMappingURL=timestamps.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"timestamps.d.ts","sourceRoot":"","sources":["../src/timestamps.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AACH,MAAM,WAAW,aAAa;IAC5B,QAAQ,CAAC,GAAG,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;CACvB;AAED;;;;;;;;;;GAUG;AACH,MAAM,MAAM,aAAa,GAAG,IAAI,GAAG,MAAM,GAAG,KAAK,CAAC"}
@@ -0,0 +1,2 @@
1
+ export {};
2
+ //# sourceMappingURL=timestamps.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"timestamps.js","sourceRoot":"","sources":["../src/timestamps.ts"],"names":[],"mappings":""}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@speech-sdk/core",
3
- "version": "0.6.2",
3
+ "version": "0.7.0",
4
4
  "description": "Universal, cross-platform text-to-speech SDK with multi-provider support.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -69,6 +69,10 @@
69
69
  "./xai": {
70
70
  "types": "./dist/providers/xai/index.d.ts",
71
71
  "default": "./dist/providers/xai/index.js"
72
+ },
73
+ "./stt/openai": {
74
+ "types": "./dist/stt-providers/openai/index.d.ts",
75
+ "default": "./dist/stt-providers/openai/index.js"
72
76
  }
73
77
  },
74
78
  "files": [
@@ -84,7 +88,7 @@
84
88
  "inworld",
85
89
  "ai"
86
90
  ],
87
- "license": "MIT",
91
+ "license": "Apache-2.0",
88
92
  "repository": {
89
93
  "type": "git",
90
94
  "url": "https://github.com/Jellypod-Inc/speech-sdk"