@speech-sdk/core 0.8.1-alpha → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. package/README.md +63 -1
  2. package/dist/audio-decode.d.ts +7 -0
  3. package/dist/audio-decode.d.ts.map +1 -0
  4. package/dist/audio-decode.js +109 -0
  5. package/dist/audio-decode.js.map +1 -0
  6. package/dist/audio-duration.d.ts.map +1 -1
  7. package/dist/audio-duration.js +3 -12
  8. package/dist/audio-duration.js.map +1 -1
  9. package/dist/audio-output.d.ts +39 -0
  10. package/dist/audio-output.d.ts.map +1 -0
  11. package/dist/audio-output.js +111 -0
  12. package/dist/audio-output.js.map +1 -0
  13. package/dist/audio-utils.d.ts +2 -0
  14. package/dist/audio-utils.d.ts.map +1 -1
  15. package/dist/audio-utils.js +55 -1
  16. package/dist/audio-utils.js.map +1 -1
  17. package/dist/conversation/pcm-concat.d.ts +0 -1
  18. package/dist/conversation/pcm-concat.d.ts.map +1 -1
  19. package/dist/conversation/pcm-concat.js +6 -143
  20. package/dist/conversation/pcm-concat.js.map +1 -1
  21. package/dist/conversation/stitch.d.ts +4 -0
  22. package/dist/conversation/stitch.d.ts.map +1 -1
  23. package/dist/conversation/stitch.js +30 -15
  24. package/dist/conversation/stitch.js.map +1 -1
  25. package/dist/conversation/types.d.ts +6 -2
  26. package/dist/conversation/types.d.ts.map +1 -1
  27. package/dist/encoders/mp3.d.ts +6 -0
  28. package/dist/encoders/mp3.d.ts.map +1 -0
  29. package/dist/encoders/mp3.js +54 -0
  30. package/dist/encoders/mp3.js.map +1 -0
  31. package/dist/errors.d.ts +15 -1
  32. package/dist/errors.d.ts.map +1 -1
  33. package/dist/errors.js +37 -1
  34. package/dist/errors.js.map +1 -1
  35. package/dist/generate-conversation.d.ts +2 -2
  36. package/dist/generate-conversation.d.ts.map +1 -1
  37. package/dist/generate-conversation.js +106 -44
  38. package/dist/generate-conversation.js.map +1 -1
  39. package/dist/generate-speech.d.ts +6 -2
  40. package/dist/generate-speech.d.ts.map +1 -1
  41. package/dist/generate-speech.js +130 -60
  42. package/dist/generate-speech.js.map +1 -1
  43. package/dist/index.d.ts +4 -1
  44. package/dist/index.d.ts.map +1 -1
  45. package/dist/index.js +2 -1
  46. package/dist/index.js.map +1 -1
  47. package/dist/pronunciations/errors.d.ts +5 -0
  48. package/dist/pronunciations/errors.d.ts.map +1 -0
  49. package/dist/pronunciations/errors.js +8 -0
  50. package/dist/pronunciations/errors.js.map +1 -0
  51. package/dist/pronunciations/index.d.ts +5 -0
  52. package/dist/pronunciations/index.d.ts.map +1 -0
  53. package/dist/pronunciations/index.js +5 -0
  54. package/dist/pronunciations/index.js.map +1 -0
  55. package/dist/pronunciations/inverse-align.d.ts +4 -0
  56. package/dist/pronunciations/inverse-align.d.ts.map +1 -0
  57. package/dist/pronunciations/inverse-align.js +54 -0
  58. package/dist/pronunciations/inverse-align.js.map +1 -0
  59. package/dist/pronunciations/merge.d.ts +4 -0
  60. package/dist/pronunciations/merge.d.ts.map +1 -0
  61. package/dist/pronunciations/merge.js +13 -0
  62. package/dist/pronunciations/merge.js.map +1 -0
  63. package/dist/pronunciations/substitute.d.ts +6 -0
  64. package/dist/pronunciations/substitute.d.ts.map +1 -0
  65. package/dist/pronunciations/substitute.js +67 -0
  66. package/dist/pronunciations/substitute.js.map +1 -0
  67. package/dist/pronunciations/types.d.ts +18 -0
  68. package/dist/pronunciations/types.d.ts.map +1 -0
  69. package/dist/pronunciations/types.js +2 -0
  70. package/dist/pronunciations/types.js.map +1 -0
  71. package/dist/pronunciations/validate.d.ts +3 -0
  72. package/dist/pronunciations/validate.d.ts.map +1 -0
  73. package/dist/pronunciations/validate.js +26 -0
  74. package/dist/pronunciations/validate.js.map +1 -0
  75. package/dist/provider-utils.d.ts +1 -0
  76. package/dist/provider-utils.d.ts.map +1 -1
  77. package/dist/provider-utils.js +26 -0
  78. package/dist/provider-utils.js.map +1 -1
  79. package/dist/providers/cartesia/index.d.ts +22 -0
  80. package/dist/providers/cartesia/index.d.ts.map +1 -1
  81. package/dist/providers/cartesia/index.js +48 -0
  82. package/dist/providers/cartesia/index.js.map +1 -1
  83. package/dist/providers/deepgram/index.d.ts +16 -0
  84. package/dist/providers/deepgram/index.d.ts.map +1 -1
  85. package/dist/providers/deepgram/index.js +34 -0
  86. package/dist/providers/deepgram/index.js.map +1 -1
  87. package/dist/providers/elevenlabs/index.d.ts +7 -0
  88. package/dist/providers/elevenlabs/index.d.ts.map +1 -1
  89. package/dist/providers/elevenlabs/index.js +25 -0
  90. package/dist/providers/elevenlabs/index.js.map +1 -1
  91. package/dist/providers/fal/index.d.ts +5 -3
  92. package/dist/providers/fal/index.d.ts.map +1 -1
  93. package/dist/providers/fal/index.js +13 -7
  94. package/dist/providers/fal/index.js.map +1 -1
  95. package/dist/providers/fish-audio/index.d.ts +7 -0
  96. package/dist/providers/fish-audio/index.d.ts.map +1 -1
  97. package/dist/providers/fish-audio/index.js +24 -0
  98. package/dist/providers/fish-audio/index.js.map +1 -1
  99. package/dist/providers/gateway/index.d.ts +8 -0
  100. package/dist/providers/gateway/index.d.ts.map +1 -1
  101. package/dist/providers/gateway/index.js +16 -1
  102. package/dist/providers/gateway/index.js.map +1 -1
  103. package/dist/providers/google/index.d.ts +5 -0
  104. package/dist/providers/google/index.d.ts.map +1 -1
  105. package/dist/providers/google/index.js +19 -11
  106. package/dist/providers/google/index.js.map +1 -1
  107. package/dist/providers/hume/index.d.ts +9 -0
  108. package/dist/providers/hume/index.d.ts.map +1 -1
  109. package/dist/providers/hume/index.js +24 -0
  110. package/dist/providers/hume/index.js.map +1 -1
  111. package/dist/providers/inworld/index.d.ts +10 -0
  112. package/dist/providers/inworld/index.d.ts.map +1 -1
  113. package/dist/providers/inworld/index.js +32 -8
  114. package/dist/providers/inworld/index.js.map +1 -1
  115. package/dist/providers/mistral/index.d.ts +7 -0
  116. package/dist/providers/mistral/index.d.ts.map +1 -1
  117. package/dist/providers/mistral/index.js +24 -10
  118. package/dist/providers/mistral/index.js.map +1 -1
  119. package/dist/providers/murf/index.d.ts +8 -0
  120. package/dist/providers/murf/index.d.ts.map +1 -1
  121. package/dist/providers/murf/index.js +40 -1
  122. package/dist/providers/murf/index.js.map +1 -1
  123. package/dist/providers/openai/index.d.ts +7 -0
  124. package/dist/providers/openai/index.d.ts.map +1 -1
  125. package/dist/providers/openai/index.js +24 -0
  126. package/dist/providers/openai/index.js.map +1 -1
  127. package/dist/providers/resemble/index.d.ts +14 -0
  128. package/dist/providers/resemble/index.d.ts.map +1 -1
  129. package/dist/providers/resemble/index.js +35 -1
  130. package/dist/providers/resemble/index.js.map +1 -1
  131. package/dist/providers/smallest-ai/index.d.ts +47 -0
  132. package/dist/providers/smallest-ai/index.d.ts.map +1 -0
  133. package/dist/providers/smallest-ai/index.js +107 -0
  134. package/dist/providers/smallest-ai/index.js.map +1 -0
  135. package/dist/providers/xai/index.d.ts +18 -0
  136. package/dist/providers/xai/index.d.ts.map +1 -1
  137. package/dist/providers/xai/index.js +26 -0
  138. package/dist/providers/xai/index.js.map +1 -1
  139. package/dist/providers.d.ts +2 -0
  140. package/dist/providers.d.ts.map +1 -1
  141. package/dist/providers.js +1 -0
  142. package/dist/providers.js.map +1 -1
  143. package/dist/retry-options.d.ts +6 -0
  144. package/dist/retry-options.d.ts.map +1 -0
  145. package/dist/retry-options.js +48 -0
  146. package/dist/retry-options.js.map +1 -0
  147. package/dist/speech-provider.d.ts +15 -0
  148. package/dist/speech-provider.d.ts.map +1 -1
  149. package/dist/speech-provider.js.map +1 -1
  150. package/dist/stream-speech.d.ts +4 -2
  151. package/dist/stream-speech.d.ts.map +1 -1
  152. package/dist/stream-speech.js +36 -21
  153. package/dist/stream-speech.js.map +1 -1
  154. package/dist/types.d.ts +6 -0
  155. package/dist/types.d.ts.map +1 -1
  156. package/dist/volume-adjust.d.ts.map +1 -1
  157. package/dist/volume-adjust.js +4 -10
  158. package/dist/volume-adjust.js.map +1 -1
  159. package/package.json +7 -1
@@ -1,14 +1,19 @@
1
1
  import pRetry from "p-retry";
2
2
  import { detectAudioTags, stripAudioTags } from "./audio-tags.js";
3
- import { ApiError, NoSpeechGeneratedError, StreamingNotSupportedError, } from "./errors.js";
4
- import { isRetriableApiError } from "./provider-utils.js";
3
+ import { NoSpeechGeneratedError, StreamingNotSupportedError, } from "./errors.js";
4
+ import { mergeRules } from "./pronunciations/merge.js";
5
+ import { substitute } from "./pronunciations/substitute.js";
6
+ import { validatePronunciationsInput } from "./pronunciations/validate.js";
5
7
  import { resolveModel } from "./resolve-provider.js";
6
- import { FEATURES, hasFeature, } from "./speech-provider.js";
8
+ import { buildRetryOptions } from "./retry-options.js";
9
+ import { FEATURES, hasFeature, isSpeechGatewayModel, } from "./speech-provider.js";
7
10
  export async function streamSpeech(options) {
8
11
  const { model, voice, providerOptions, abortSignal, headers } = options;
9
12
  const maxRetries = options.maxRetries ?? 2;
10
13
  const resolved = resolveModel(model, { apiKey: options.apiKey });
11
14
  const modelIdentifier = `${resolved.provider.id}/${resolved.modelId}`;
15
+ const isGateway = isSpeechGatewayModel(resolved);
16
+ validatePronunciationsInput(options.pronunciations, isGateway);
12
17
  const modelInfo = resolved.provider.models.find((m) => m.id === resolved.modelId);
13
18
  if (modelInfo && !hasFeature(modelInfo, FEATURES.STREAMING)) {
14
19
  throw new StreamingNotSupportedError(modelIdentifier);
@@ -36,30 +41,40 @@ export async function streamSpeech(options) {
36
41
  ? `Text is empty after removing unsupported audio tags for ${modelIdentifier}.`
37
42
  : "Text must not be empty.");
38
43
  }
44
+ let textToSend = processedText;
45
+ if (!isGateway && options.pronunciations?.rules?.length) {
46
+ const ruleMap = mergeRules(options.pronunciations.rules);
47
+ textToSend = substitute(processedText, ruleMap).text;
48
+ }
39
49
  const streamFn = resolved.provider.stream.bind(resolved.provider);
40
50
  const startTime = performance.now();
41
- const result = await pRetry(() => streamFn({
42
- modelId: resolved.modelId,
43
- text: processedText,
44
- voice,
45
- providerOptions,
46
- abortSignal,
47
- headers,
48
- }), {
49
- retries: maxRetries,
50
- signal: abortSignal,
51
- shouldRetry: ({ error }) => {
52
- if (error instanceof ApiError && !isRetriableApiError(error)) {
53
- return false;
54
- }
55
- return true;
56
- },
57
- });
51
+ const result = await pRetry(() => {
52
+ if (isGateway) {
53
+ const gatewayProvider = resolved.provider;
54
+ return gatewayProvider.stream({
55
+ modelId: resolved.modelId,
56
+ text: textToSend,
57
+ voice: voice,
58
+ providerOptions,
59
+ abortSignal,
60
+ headers,
61
+ pronunciations: options.pronunciations,
62
+ });
63
+ }
64
+ return streamFn({
65
+ modelId: resolved.modelId,
66
+ text: textToSend,
67
+ voice,
68
+ providerOptions,
69
+ abortSignal,
70
+ headers,
71
+ });
72
+ }, buildRetryOptions({ maxRetries, abortSignal }));
58
73
  const ttfbMs = Math.round(performance.now() - startTime);
59
74
  const metadata = {
60
75
  latencyMs: ttfbMs,
61
76
  ttfbMs,
62
- inputChars: processedText.length,
77
+ inputChars: options.text.length,
63
78
  ...(result.audioDurationMs != null && {
64
79
  audioDurationMs: result.audioDurationMs,
65
80
  }),
@@ -1 +1 @@
1
- {"version":3,"file":"stream-speech.js","sourceRoot":"","sources":["../src/stream-speech.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,SAAS,CAAC;AAC7B,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAClE,OAAO,EACL,QAAQ,EACR,sBAAsB,EACtB,0BAA0B,GAC3B,MAAM,aAAa,CAAC;AAErB,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAC1D,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACrD,OAAO,EACL,QAAQ,EACR,UAAU,GAGX,MAAM,sBAAsB,CAAC;AAG9B,MAAM,CAAC,KAAK,UAAU,YAAY,CAA0B,OAS3D;IACC,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,eAAe,EAAE,WAAW,EAAE,OAAO,EAAE,GAAG,OAAO,CAAC;IACxE,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,CAAC,CAAC;IAE3C,MAAM,QAAQ,GAAG,YAAY,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IACjE,MAAM,eAAe,GAAG,GAAG,QAAQ,CAAC,QAAQ,CAAC,EAAE,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;IAEtE,MAAM,SAAS,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAC7C,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,QAAQ,CAAC,OAAO,CACjC,CAAC;IACF,IAAI,SAAS,IAAI,CAAC,UAAU,CAAC,SAAS,EAAE,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;QAC5D,MAAM,IAAI,0BAA0B,CAAC,eAAe,CAAC,CAAC;IACxD,CAAC;IACD,IAAI,OAAO,QAAQ,CAAC,QAAQ,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;QACnD,MAAM,IAAI,0BAA0B,CAAC,eAAe,CAAC,CAAC;IACxD,CAAC;IAED,IAAI,aAAqB,CAAC;IAC1B,IAAI,QAAkB,CAAC;IAEvB,IAAI,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,EAAE,CAAC;QACvC,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,QAAQ,EAAE,GAAG,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,CACrE,OAAO,CAAC,IAAI,EACZ,QAAQ,CAAC,OAAO,CACjB,CAAC,CAAC;IACL,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,GAAG,eAAe,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAC3C,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpB,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,QAAQ,EAAE,GAAG,cAAc,CACjD,OAAO,CAAC,IAAI,EACZ,eAAe,CAChB,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,aAAa,GAAG,OAAO,CAAC,IAAI,CAAC;YAC7B,QAAQ,GAAG,EAAE,CAAC;QAChB,CAAC;IACH,CAAC;IAED,IAAI,aAAa,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtC,MAAM,IAAI,sBAAsB,CAC9B,QAAQ,CAAC,MAAM,GAAG,CAAC;YACjB,CAAC,CAAC,2DAA2D,eAAe,GAAG;YAC/E,CAAC,CAAC,yBAAyB,CAC9B,CAAC;IACJ,CAAC;IAED,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAElE,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;IAEpC,MAAM,MAAM,GAAG,MAAM,MAAM,CACzB,GAAG,EAAE,CACH,QAAQ,CAAC;QACP,OAAO,EAAE,QAAQ,CAAC,OAAO;QACzB,IAAI,EAAE,aAAa;QACnB,KAAK;QACL,eAAe;QACf,WAAW;QACX,OAAO;KACR,CAAC,EACJ;QACE,OAAO,EAAE,UAAU;QACnB,MAAM,EAAE,WAAW;QACnB,WAAW,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE;YACzB,IAAI,KAAK,YAAY,QAAQ,IAAI,CAAC,mBAAmB,CAAC,KAAK,CAAC,EAAE,CAAC;gBAC7D,OAAO,KAAK,CAAC;YACf,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC;KACF,CACF,CAAC;IAEF,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC,CAAC;IAEzD,MAAM,QAAQ,GAAmB;QAC/B,SAAS,EAAE,MAAM;QACjB,MAAM;QACN,UAAU,EAAE,aAAa,CAAC,MAAM;QAChC,GAAG,CAAC,MAAM,CAAC,eAAe,IAAI,IAAI,IAAI;YACpC,eAAe,EAAE,MAAM,CAAC,eAAe;SACxC,CAAC;KACH,CAAC;IAEF,OAAO;QACL,KAAK,EAAE,MAAM,CAAC,MAAM;QACpB,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,QAAQ;QACR,gBAAgB,EAAE,MAAM,CAAC,gBAAgB;QACzC,QAAQ,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;KACrD,CAAC;AACJ,CAAC"}
1
+ {"version":3,"file":"stream-speech.js","sourceRoot":"","sources":["../src/stream-speech.ts"],"names":[],"mappings":"AAAA,OAAO,MAAM,MAAM,SAAS,CAAC;AAC7B,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAClE,OAAO,EACL,sBAAsB,EACtB,0BAA0B,GAC3B,MAAM,aAAa,CAAC;AAErB,OAAO,EAAE,UAAU,EAAE,MAAM,2BAA2B,CAAC;AACvD,OAAO,EAAE,UAAU,EAAE,MAAM,gCAAgC,CAAC;AAE5D,OAAO,EAAE,2BAA2B,EAAE,MAAM,8BAA8B,CAAC;AAE3E,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AACrD,OAAO,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AACvD,OAAO,EACL,QAAQ,EACR,UAAU,EACV,oBAAoB,GAGrB,MAAM,sBAAsB,CAAC;AAG9B,MAAM,CAAC,KAAK,UAAU,YAAY,CAGhC,OAUD;IACC,MAAM,EAAE,KAAK,EAAE,KAAK,EAAE,eAAe,EAAE,WAAW,EAAE,OAAO,EAAE,GAAG,OAAO,CAAC;IACxE,MAAM,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,CAAC,CAAC;IAE3C,MAAM,QAAQ,GAAG,YAAY,CAAC,KAAK,EAAE,EAAE,MAAM,EAAE,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;IACjE,MAAM,eAAe,GAAG,GAAG,QAAQ,CAAC,QAAQ,CAAC,EAAE,IAAI,QAAQ,CAAC,OAAO,EAAE,CAAC;IACtE,MAAM,SAAS,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;IACjD,2BAA2B,CAAC,OAAO,CAAC,cAAc,EAAE,SAAS,CAAC,CAAC;IAE/D,MAAM,SAAS,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAC7C,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,KAAK,QAAQ,CAAC,OAAO,CACjC,CAAC;IACF,IAAI,SAAS,IAAI,CAAC,UAAU,CAAC,SAAS,EAAE,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC;QAC5D,MAAM,IAAI,0BAA0B,CAAC,eAAe,CAAC,CAAC;IACxD,CAAC;IACD,IAAI,OAAO,QAAQ,CAAC,QAAQ,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;QACnD,MAAM,IAAI,0BAA0B,CAAC,eAAe,CAAC,CAAC;IACxD,CAAC;IAED,IAAI,aAAqB,CAAC;IAC1B,IAAI,QAAkB,CAAC;IAEvB,IAAI,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,EAAE,CAAC;QACvC,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,QAAQ,EAAE,GAAG,QAAQ,CAAC,QAAQ,CAAC,gBAAgB,CACrE,OAAO,CAAC,IAAI,EACZ,QAAQ,CAAC,OAAO,CACjB,CAAC,CAAC;IACL,CAAC;SAAM,CAAC;QACN,MAAM,IAAI,GAAG,eAAe,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QAC3C,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpB,CAAC,EAAE,IAAI,EAAE,aAAa,EAAE,QAAQ,EAAE,GAAG,cAAc,CACjD,OAAO,CAAC,IAAI,EACZ,eAAe,CAChB,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,aAAa,GAAG,OAAO,CAAC,IAAI,CAAC;YAC7B,QAAQ,GAAG,EAAE,CAAC;QAChB,CAAC;IACH,CAAC;IAED,IAAI,aAAa,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtC,MAAM,IAAI,sBAAsB,CAC9B,QAAQ,CAAC,MAAM,GAAG,CAAC;YACjB,CAAC,CAAC,2DAA2D,eAAe,GAAG;YAC/E,CAAC,CAAC,yBAAyB,CAC9B,CAAC;IACJ,CAAC;IAED,IAAI,UAAU,GAAG,aAAa,CAAC;IAC/B,IAAI,CAAC,SAAS,IAAI,OAAO,CAAC,cAAc,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC;QACxD,MAAM,OAAO,GAAG,UAAU,CAAC,OAAO,CAAC,cAAc,CAAC,KAAK,CAAC,CAAC;QACzD,UAAU,GAAG,UAAU,CAAC,aAAa,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;IACvD,CAAC;IAED,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAElE,MAAM,SAAS,GAAG,WAAW,CAAC,GAAG,EAAE,CAAC;IAEpC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,GAAG,EAAE;QAC/B,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,eAAe,GAAG,QAAQ,CAAC,QAAiC,CAAC;YACnE,OAAO,eAAe,CAAC,MAAM,CAAC;gBAC5B,OAAO,EAAE,QAAQ,CAAC,OAAO;gBACzB,IAAI,EAAE,UAAU;gBAChB,KAAK,EAAE,KAA0B;gBACjC,eAAe;gBACf,WAAW;gBACX,OAAO;gBACP,cAAc,EAAE,OAAO,CAAC,cAAc;aACvC,CAAC,CAAC;QACL,CAAC;QACD,OAAO,QAAQ,CAAC;YACd,OAAO,EAAE,QAAQ,CAAC,OAAO;YACzB,IAAI,EAAE,UAAU;YAChB,KAAK;YACL,eAAe;YACf,WAAW;YACX,OAAO;SACR,CAAC,CAAC;IACL,CAAC,EAAE,iBAAiB,CAAC,EAAE,UAAU,EAAE,WAAW,EAAE,CAAC,CAAC,CAAC;IAEnD,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC,CAAC;IAEzD,MAAM,QAAQ,GAAmB;QAC/B,SAAS,EAAE,MAAM;QACjB,MAAM;QACN,UAAU,EAAE,OAAO,CAAC,IAAI,CAAC,MAAM;QAC/B,GAAG,CAAC,MAAM,CAAC,eAAe,IAAI,IAAI,IAAI;YACpC,eAAe,EAAE,MAAM,CAAC,eAAe;SACxC,CAAC;KACH,CAAC;IAEF,OAAO;QACL,KAAK,EAAE,MAAM,CAAC,MAAM;QACpB,SAAS,EAAE,MAAM,CAAC,SAAS;QAC3B,QAAQ;QACR,gBAAgB,EAAE,MAAM,CAAC,gBAAgB;QACzC,QAAQ,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,SAAS;KACrD,CAAC;AACJ,CAAC"}
package/dist/types.d.ts CHANGED
@@ -1,7 +1,11 @@
1
+ import type { AudioOutput } from "./audio-output.js";
2
+ import type { PronunciationsInput } from "./pronunciations/types.js";
1
3
  import type { ResolvedModel, Voice } from "./speech-provider.js";
4
+ export type { AudioOutput, AudioOutputFormat } from "./audio-output.js";
2
5
  export type { CaptionFormat, CaptionsOptions } from "./captions.js";
3
6
  export type { ConversationTurn, GenerateConversationOptions, } from "./conversation/types.js";
4
7
  export type { SpeechMetadata } from "./metadata.js";
8
+ export type { Pronunciation, PronunciationsInput, } from "./pronunciations/types.js";
5
9
  export type { CartesiaSpeechProviderConfig } from "./providers/cartesia/index.js";
6
10
  export type { DeepgramSpeechProviderConfig } from "./providers/deepgram/index.js";
7
11
  export type { ElevenLabsSpeechProviderConfig } from "./providers/elevenlabs/index.js";
@@ -28,6 +32,8 @@ export interface GenerateSpeechOptions<V extends Voice = Voice> {
28
32
  headers?: Record<string, string>;
29
33
  maxRetries?: number;
30
34
  model: string | ResolvedModel<V>;
35
+ output?: AudioOutput;
36
+ pronunciations?: PronunciationsInput;
31
37
  providerOptions?: Record<string, unknown>;
32
38
  text: string;
33
39
  timestamps?: boolean;
@@ -1 +1 @@
1
- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,sBAAsB,CAAC;AAEjE,YAAY,EAAE,aAAa,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACpE,YAAY,EACV,gBAAgB,EAChB,2BAA2B,GAC5B,MAAM,yBAAyB,CAAC;AACjC,YAAY,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AACpD,YAAY,EAAE,4BAA4B,EAAE,MAAM,+BAA+B,CAAC;AAClF,YAAY,EAAE,4BAA4B,EAAE,MAAM,+BAA+B,CAAC;AAClF,YAAY,EAAE,8BAA8B,EAAE,MAAM,iCAAiC,CAAC;AACtF,YAAY,EAAE,uBAAuB,EAAE,MAAM,0BAA0B,CAAC;AACxE,YAAY,EAAE,6BAA6B,EAAE,MAAM,iCAAiC,CAAC;AACrF,YAAY,EAAE,2BAA2B,EAAE,MAAM,8BAA8B,CAAC;AAChF,YAAY,EAAE,0BAA0B,EAAE,MAAM,6BAA6B,CAAC;AAC9E,YAAY,EAAE,wBAAwB,EAAE,MAAM,2BAA2B,CAAC;AAC1E,YAAY,EAAE,2BAA2B,EAAE,MAAM,8BAA8B,CAAC;AAChF,YAAY,EAAE,2BAA2B,EAAE,MAAM,8BAA8B,CAAC;AAChF,YAAY,EAAE,wBAAwB,EAAE,MAAM,2BAA2B,CAAC;AAC1E,YAAY,EAAE,0BAA0B,EAAE,MAAM,6BAA6B,CAAC;AAC9E,YAAY,EAAE,4BAA4B,EAAE,MAAM,+BAA+B,CAAC;AAClF,YAAY,EAAE,uBAAuB,EAAE,MAAM,0BAA0B,CAAC;AACxE,YAAY,EACV,OAAO,EACP,SAAS,EACT,aAAa,EACb,cAAc,EACd,KAAK,GACN,MAAM,sBAAsB,CAAC;AAC9B,YAAY,EACV,kBAAkB,EAClB,kBAAkB,EAClB,YAAY,GACb,MAAM,oBAAoB,CAAC;AAC5B,YAAY,EACV,gBAAgB,EAChB,oBAAoB,EACpB,YAAY,GACb,MAAM,8BAA8B,CAAC;AACtC,YAAY,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AACpE,YAAY,EACV,yBAAyB,EACzB,aAAa,GACd,MAAM,iBAAiB,CAAC;AACzB,YAAY,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAEhD,MAAM,WAAW,qBAAqB,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK;IAC5D,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;IACjC,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC1C,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,KAAK,EAAE,CAAC,CAAC;IACT,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB"}
1
+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,mBAAmB,CAAC;AACrD,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,2BAA2B,CAAC;AACrE,OAAO,KAAK,EAAE,aAAa,EAAE,KAAK,EAAE,MAAM,sBAAsB,CAAC;AAEjE,YAAY,EAAE,WAAW,EAAE,iBAAiB,EAAE,MAAM,mBAAmB,CAAC;AACxE,YAAY,EAAE,aAAa,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACpE,YAAY,EACV,gBAAgB,EAChB,2BAA2B,GAC5B,MAAM,yBAAyB,CAAC;AACjC,YAAY,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AACpD,YAAY,EACV,aAAa,EACb,mBAAmB,GACpB,MAAM,2BAA2B,CAAC;AACnC,YAAY,EAAE,4BAA4B,EAAE,MAAM,+BAA+B,CAAC;AAClF,YAAY,EAAE,4BAA4B,EAAE,MAAM,+BAA+B,CAAC;AAClF,YAAY,EAAE,8BAA8B,EAAE,MAAM,iCAAiC,CAAC;AACtF,YAAY,EAAE,uBAAuB,EAAE,MAAM,0BAA0B,CAAC;AACxE,YAAY,EAAE,6BAA6B,EAAE,MAAM,iCAAiC,CAAC;AACrF,YAAY,EAAE,2BAA2B,EAAE,MAAM,8BAA8B,CAAC;AAChF,YAAY,EAAE,0BAA0B,EAAE,MAAM,6BAA6B,CAAC;AAC9E,YAAY,EAAE,wBAAwB,EAAE,MAAM,2BAA2B,CAAC;AAC1E,YAAY,EAAE,2BAA2B,EAAE,MAAM,8BAA8B,CAAC;AAChF,YAAY,EAAE,2BAA2B,EAAE,MAAM,8BAA8B,CAAC;AAChF,YAAY,EAAE,wBAAwB,EAAE,MAAM,2BAA2B,CAAC;AAC1E,YAAY,EAAE,0BAA0B,EAAE,MAAM,6BAA6B,CAAC;AAC9E,YAAY,EAAE,4BAA4B,EAAE,MAAM,+BAA+B,CAAC;AAClF,YAAY,EAAE,uBAAuB,EAAE,MAAM,0BAA0B,CAAC;AACxE,YAAY,EACV,OAAO,EACP,SAAS,EACT,aAAa,EACb,cAAc,EACd,KAAK,GACN,MAAM,sBAAsB,CAAC;AAC9B,YAAY,EACV,kBAAkB,EAClB,kBAAkB,EAClB,YAAY,GACb,MAAM,oBAAoB,CAAC;AAC5B,YAAY,EACV,gBAAgB,EAChB,oBAAoB,EACpB,YAAY,GACb,MAAM,8BAA8B,CAAC;AACtC,YAAY,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAC;AACpE,YAAY,EACV,yBAAyB,EACzB,aAAa,GACd,MAAM,iBAAiB,CAAC;AACzB,YAAY,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAEhD,MAAM,WAAW,qBAAqB,CAAC,CAAC,SAAS,KAAK,GAAG,KAAK;IAC5D,WAAW,CAAC,EAAE,WAAW,CAAC;IAC1B,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACjC,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,EAAE,MAAM,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;IACjC,MAAM,CAAC,EAAE,WAAW,CAAC;IACrB,cAAc,CAAC,EAAE,mBAAmB,CAAC;IACrC,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC1C,IAAI,EAAE,MAAM,CAAC;IACb,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,KAAK,EAAE,CAAC,CAAC;IACT,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB"}
@@ -1 +1 @@
1
- {"version":3,"file":"volume-adjust.d.ts","sourceRoot":"","sources":["../src/volume-adjust.ts"],"names":[],"mappings":"AAOA,UAAU,iBAAiB;IACzB,QAAQ,CAAC,KAAK,EAAE,MAAM,GAAG,UAAU,CAAC;IACpC,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;CAC7B;AAED,wBAAsB,YAAY,CAChC,KAAK,EAAE,iBAAiB,GACvB,OAAO,CAAC,UAAU,CAAC,CAgBrB"}
1
+ {"version":3,"file":"volume-adjust.d.ts","sourceRoot":"","sources":["../src/volume-adjust.ts"],"names":[],"mappings":"AAQA,UAAU,iBAAiB;IACzB,QAAQ,CAAC,KAAK,EAAE,MAAM,GAAG,UAAU,CAAC;IACpC,QAAQ,CAAC,SAAS,EAAE,MAAM,CAAC;IAC3B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;CAC7B;AAED,wBAAsB,YAAY,CAChC,KAAK,EAAE,iBAAiB,GACvB,OAAO,CAAC,UAAU,CAAC,CAgBrB"}
@@ -1,21 +1,15 @@
1
- import { concatPcmToWav, dbfsToInt16Rms, decodeToPcm16, normalizeRms, } from "./conversation/pcm-concat.js";
1
+ import { decodeAudioToPcm16 } from "./audio-decode.js";
2
+ import { base64ToUint8Array } from "./audio-utils.js";
3
+ import { concatPcmToWav, dbfsToInt16Rms, normalizeRms, } from "./conversation/pcm-concat.js";
2
4
  export async function adjustVolume(input) {
3
5
  const bytes = input.audio instanceof Uint8Array
4
6
  ? input.audio
5
7
  : base64ToUint8Array(input.audio);
6
- const segment = decodeToPcm16(bytes, input.mediaType);
8
+ const segment = await decodeAudioToPcm16(bytes, input.mediaType);
7
9
  const [normalized] = normalizeRms([segment], dbfsToInt16Rms(input.volumeDbfs));
8
10
  return await concatPcmToWav([normalized], {
9
11
  gapMs: 0,
10
12
  targetSampleRate: normalized.sampleRate,
11
13
  });
12
14
  }
13
- function base64ToUint8Array(b64) {
14
- const binaryString = atob(b64);
15
- const out = new Uint8Array(binaryString.length);
16
- for (let i = 0; i < binaryString.length; i++) {
17
- out[i] = binaryString.charCodeAt(i);
18
- }
19
- return out;
20
- }
21
15
  //# sourceMappingURL=volume-adjust.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"volume-adjust.js","sourceRoot":"","sources":["../src/volume-adjust.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,cAAc,EACd,cAAc,EACd,aAAa,EACb,YAAY,GACb,MAAM,8BAA8B,CAAC;AAQtC,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,KAAwB;IAExB,MAAM,KAAK,GACT,KAAK,CAAC,KAAK,YAAY,UAAU;QAC/B,CAAC,CAAC,KAAK,CAAC,KAAK;QACb,CAAC,CAAC,kBAAkB,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAEtC,MAAM,OAAO,GAAG,aAAa,CAAC,KAAK,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IACtD,MAAM,CAAC,UAAU,CAAC,GAAG,YAAY,CAC/B,CAAC,OAAO,CAAC,EACT,cAAc,CAAC,KAAK,CAAC,UAAU,CAAC,CACjC,CAAC;IAEF,OAAO,MAAM,cAAc,CAAC,CAAC,UAAU,CAAC,EAAE;QACxC,KAAK,EAAE,CAAC;QACR,gBAAgB,EAAE,UAAU,CAAC,UAAU;KACxC,CAAC,CAAC;AACL,CAAC;AAED,SAAS,kBAAkB,CAAC,GAAW;IACrC,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC;IAC/B,MAAM,GAAG,GAAG,IAAI,UAAU,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;IAChD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC7C,GAAG,CAAC,CAAC,CAAC,GAAG,YAAY,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;IACtC,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC"}
1
+ {"version":3,"file":"volume-adjust.js","sourceRoot":"","sources":["../src/volume-adjust.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,mBAAmB,CAAC;AACvD,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,EACL,cAAc,EACd,cAAc,EACd,YAAY,GACb,MAAM,8BAA8B,CAAC;AAQtC,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,KAAwB;IAExB,MAAM,KAAK,GACT,KAAK,CAAC,KAAK,YAAY,UAAU;QAC/B,CAAC,CAAC,KAAK,CAAC,KAAK;QACb,CAAC,CAAC,kBAAkB,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAEtC,MAAM,OAAO,GAAG,MAAM,kBAAkB,CAAC,KAAK,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IACjE,MAAM,CAAC,UAAU,CAAC,GAAG,YAAY,CAC/B,CAAC,OAAO,CAAC,EACT,cAAc,CAAC,KAAK,CAAC,UAAU,CAAC,CACjC,CAAC;IAEF,OAAO,MAAM,cAAc,CAAC,CAAC,UAAU,CAAC,EAAE;QACxC,KAAK,EAAE,CAAC;QACR,gBAAgB,EAAE,UAAU,CAAC,UAAU;KACxC,CAAC,CAAC;AACL,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@speech-sdk/core",
3
- "version": "0.8.1-alpha",
3
+ "version": "0.8.1",
4
4
  "description": "Universal, cross-platform text-to-speech SDK with multi-provider support.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -17,12 +17,17 @@
17
17
  "./types": {
18
18
  "types": "./dist/types.d.ts",
19
19
  "default": "./dist/types.js"
20
+ },
21
+ "./pronunciations": {
22
+ "types": "./dist/pronunciations/index.d.ts",
23
+ "default": "./dist/pronunciations/index.js"
20
24
  }
21
25
  },
22
26
  "files": [
23
27
  "dist",
24
28
  "README.md"
25
29
  ],
30
+ "sideEffects": false,
26
31
  "keywords": [
27
32
  "tts",
28
33
  "text-to-speech",
@@ -38,6 +43,7 @@
38
43
  "url": "https://github.com/Jellypod-Inc/speech-sdk"
39
44
  },
40
45
  "dependencies": {
46
+ "@mediabunny/mp3-encoder": "^1.42.0",
41
47
  "mediabunny": "^1.40.1",
42
48
  "p-retry": "^8.0.0",
43
49
  "zod": "^4.3.6"