kugelaudio 0.6.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -39,7 +39,8 @@ __export(index_exports, {
39
39
  classifyWsHandshakeError: () => classifyWsHandshakeError,
40
40
  createWavBlob: () => createWavBlob,
41
41
  createWavFile: () => createWavFile,
42
- decodePCM16: () => decodePCM16
42
+ decodePCM16: () => decodePCM16,
43
+ parseSessionUsage: () => parseSessionUsage
43
44
  });
44
45
  module.exports = __toCommonJS(index_exports);
45
46
 
@@ -429,6 +430,23 @@ function classifyWsHandshakeError(err) {
429
430
  return build(status, void 0, typeof e.message === "string" ? e.message : "");
430
431
  }
431
432
 
433
+ // src/types.ts
434
+ function parseSessionUsage(data) {
435
+ const raw = data.usage;
436
+ const source = raw && typeof raw === "object" ? raw : data;
437
+ const audioSeconds = typeof source.audio_seconds === "number" ? source.audio_seconds : typeof data.total_audio_seconds === "number" ? data.total_audio_seconds : void 0;
438
+ if (audioSeconds === void 0) return null;
439
+ const costCents = typeof source.cost_cents === "number" ? source.cost_cents : null;
440
+ return {
441
+ audioSeconds,
442
+ costCents,
443
+ currency: typeof source.currency === "string" ? source.currency : void 0,
444
+ characters: typeof source.characters === "number" ? source.characters : void 0,
445
+ modelId: typeof source.model_id === "string" ? source.model_id : void 0,
446
+ costAvailable: costCents !== null
447
+ };
448
+ }
449
+
432
450
  // src/utils.ts
433
451
  function base64ToArrayBuffer(base64) {
434
452
  if (typeof atob === "function") {
@@ -512,10 +530,75 @@ function getWebSocket() {
512
530
  );
513
531
  }
514
532
 
533
+ // package.json
534
+ var package_default = {
535
+ name: "kugelaudio",
536
+ version: "0.8.0",
537
+ description: "Official JavaScript/TypeScript SDK for KugelAudio TTS API",
538
+ main: "dist/index.js",
539
+ module: "dist/index.mjs",
540
+ types: "dist/index.d.ts",
541
+ exports: {
542
+ ".": {
543
+ types: "./dist/index.d.ts",
544
+ import: "./dist/index.mjs",
545
+ require: "./dist/index.js"
546
+ }
547
+ },
548
+ files: [
549
+ "dist",
550
+ "src",
551
+ "LICENSE",
552
+ "CHANGELOG.md"
553
+ ],
554
+ scripts: {
555
+ build: "tsup src/index.ts --format cjs,esm --dts",
556
+ dev: "tsup src/index.ts --format cjs,esm --dts --watch",
557
+ lint: "eslint src/",
558
+ test: "vitest run",
559
+ "test:watch": "vitest",
560
+ prepublishOnly: "npm run build"
561
+ },
562
+ keywords: [
563
+ "tts",
564
+ "text-to-speech",
565
+ "audio",
566
+ "streaming",
567
+ "websocket",
568
+ "kugelaudio"
569
+ ],
570
+ author: "KugelAudio <hello@kugelaudio.com>",
571
+ license: "MIT",
572
+ repository: {
573
+ type: "git",
574
+ url: "https://github.com/Kugelaudio/KugelAudio",
575
+ directory: "sdks/js"
576
+ },
577
+ homepage: "https://kugelaudio.com",
578
+ bugs: {
579
+ url: "https://github.com/Kugelaudio/KugelAudio/issues"
580
+ },
581
+ devDependencies: {
582
+ "@types/node": "^25.3.2",
583
+ tsup: "^8.0.0",
584
+ typescript: "^6.0.2",
585
+ vitest: "^4.0.18"
586
+ },
587
+ engines: {
588
+ node: ">=18.0.0"
589
+ },
590
+ dependencies: {
591
+ tsx: "^4.21.0",
592
+ ws: "^8.18.0"
593
+ }
594
+ };
595
+
515
596
  // src/client.ts
516
597
  var DEFAULT_API_URL = "https://api.kugelaudio.com";
517
598
  var EU_API_URL = "https://api.eu.kugelaudio.com";
518
599
  var SUPPORTED_REGIONS = ["eu", "us", "global"];
600
+ var SDK_NAME = "js";
601
+ var SDK_VERSION = package_default.version;
519
602
  var REGION_PREFIXES = ["eu-", "us-", "global-"];
520
603
  function parseApiKey(apiKey) {
521
604
  for (const prefix of REGION_PREFIXES) {
@@ -525,6 +608,16 @@ function parseApiKey(apiKey) {
525
608
  }
526
609
  return { cleanKey: apiKey };
527
610
  }
611
+ function sdkHeaders() {
612
+ return {
613
+ "X-KugelAudio-SDK": SDK_NAME,
614
+ "X-KugelAudio-SDK-Version": SDK_VERSION
615
+ };
616
+ }
617
+ function appendSdkQuery(url) {
618
+ const separator = url.includes("?") ? "&" : "?";
619
+ return `${url}${separator}sdk=${encodeURIComponent(SDK_NAME)}&sdk_version=${encodeURIComponent(SDK_VERSION)}`;
620
+ }
528
621
  function createWs(url) {
529
622
  const WS = getWebSocket();
530
623
  return new WS(url);
@@ -893,7 +986,7 @@ var TTSResource = class {
893
986
  if (this.client.orgId !== void 0) {
894
987
  url += `&org_id=${this.client.orgId}`;
895
988
  }
896
- return url;
989
+ return appendSdkQuery(url);
897
990
  }
898
991
  /**
899
992
  * Get or create a WebSocket connection for connection pooling.
@@ -956,7 +1049,8 @@ var TTSResource = class {
956
1049
  durationMs: data.dur_ms,
957
1050
  generationMs: data.gen_ms,
958
1051
  rtf: data.rtf,
959
- error: data.error
1052
+ error: data.error,
1053
+ usage: parseSessionUsage(data) ?? void 0
960
1054
  };
961
1055
  pending.callbacks.onFinal?.(stats);
962
1056
  this.pendingRequests.delete(requestId);
@@ -1041,17 +1135,21 @@ var TTSResource = class {
1041
1135
  callbacks.onOpen?.();
1042
1136
  ws.send(JSON.stringify({
1043
1137
  text: options.text,
1044
- model_id: options.modelId || "kugel-1-turbo",
1138
+ model_id: options.modelId || "kugel-3",
1045
1139
  voice_id: options.voiceId,
1046
1140
  cfg_scale: options.cfgScale ?? 2,
1047
1141
  ...options.temperature !== void 0 && { temperature: options.temperature },
1048
1142
  max_new_tokens: options.maxNewTokens ?? 2048,
1049
1143
  sample_rate: options.sampleRate ?? 24e3,
1144
+ ...options.outputFormat && { output_format: options.outputFormat },
1050
1145
  normalize: options.normalize ?? true,
1051
1146
  ...options.language && { language: options.language },
1052
1147
  ...options.wordTimestamps && { word_timestamps: true },
1053
1148
  ...options.speed !== void 0 && { speed: options.speed },
1054
- ...options.projectId !== void 0 && { project_id: options.projectId }
1149
+ ...options.projectId !== void 0 && { project_id: options.projectId },
1150
+ // [] is meaningful (explicit opt-out) and must be sent; only
1151
+ // undefined (use the project default) is omitted.
1152
+ ...options.dictionaryIds !== void 0 && { dictionary_ids: options.dictionaryIds }
1055
1153
  }));
1056
1154
  });
1057
1155
  }
@@ -1067,16 +1165,20 @@ var TTSResource = class {
1067
1165
  callbacks.onOpen?.();
1068
1166
  ws.send(JSON.stringify({
1069
1167
  text: options.text,
1070
- model_id: options.modelId || "kugel-1-turbo",
1168
+ model_id: options.modelId || "kugel-3",
1071
1169
  voice_id: options.voiceId,
1072
1170
  cfg_scale: options.cfgScale ?? 2,
1073
1171
  max_new_tokens: options.maxNewTokens ?? 2048,
1074
1172
  sample_rate: options.sampleRate ?? 24e3,
1173
+ ...options.outputFormat && { output_format: options.outputFormat },
1075
1174
  normalize: options.normalize ?? true,
1076
1175
  ...options.language && { language: options.language },
1077
1176
  ...options.wordTimestamps && { word_timestamps: true },
1078
1177
  ...options.speed !== void 0 && { speed: options.speed },
1079
- ...options.projectId !== void 0 && { project_id: options.projectId }
1178
+ ...options.projectId !== void 0 && { project_id: options.projectId },
1179
+ // [] is meaningful (explicit opt-out) and must be sent; only
1180
+ // undefined (use the project default) is omitted.
1181
+ ...options.dictionaryIds !== void 0 && { dictionary_ids: options.dictionaryIds }
1080
1182
  }));
1081
1183
  };
1082
1184
  ws.onmessage = (event) => {
@@ -1098,7 +1200,8 @@ var TTSResource = class {
1098
1200
  durationMs: data.dur_ms,
1099
1201
  generationMs: data.gen_ms,
1100
1202
  rtf: data.rtf,
1101
- error: data.error
1203
+ error: data.error,
1204
+ usage: parseSessionUsage(data) ?? void 0
1102
1205
  };
1103
1206
  callbacks.onFinal?.(stats);
1104
1207
  ws.close();
@@ -1268,7 +1371,11 @@ var MultiContextSession = class {
1268
1371
  this.ws = null;
1269
1372
  this.callbacks = {};
1270
1373
  this.contexts = /* @__PURE__ */ new Set();
1374
+ /** Contexts a create message has been sent for (not yet necessarily
1375
+ * confirmed by the server via context_created). */
1376
+ this.requestedContexts = /* @__PURE__ */ new Set();
1271
1377
  this._sessionId = null;
1378
+ this._contextUsage = /* @__PURE__ */ new Map();
1272
1379
  this.isStarted = false;
1273
1380
  this.config = config || {};
1274
1381
  }
@@ -1278,6 +1385,18 @@ var MultiContextSession = class {
1278
1385
  get sessionId() {
1279
1386
  return this._sessionId;
1280
1387
  }
1388
+ /**
1389
+ * Per-context usage (audio time + amount charged) for a closed context, or
1390
+ * null if that context hasn't closed yet. Each context is its own
1391
+ * conversation — use this to bill per conversation. See {@link SessionUsage}.
1392
+ */
1393
+ usageFor(contextId) {
1394
+ return this._contextUsage.get(contextId) ?? null;
1395
+ }
1396
+ /** Map of context_id → per-context usage for all closed contexts. */
1397
+ get contextUsage() {
1398
+ return new Map(this._contextUsage);
1399
+ }
1281
1400
  /**
1282
1401
  * Connect to the multi-context WebSocket endpoint.
1283
1402
  *
@@ -1297,7 +1416,7 @@ var MultiContextSession = class {
1297
1416
  } else {
1298
1417
  authParam = "api_key";
1299
1418
  }
1300
- const url = `${wsUrl}/ws/tts/multi?${authParam}=${this.client.apiKey}`;
1419
+ const url = appendSdkQuery(`${wsUrl}/ws/tts/multi?${authParam}=${this.client.apiKey}`);
1301
1420
  this.ws = createWs(url);
1302
1421
  const ws = this.ws;
1303
1422
  ws.onmessage = (event) => {
@@ -1331,12 +1450,19 @@ var MultiContextSession = class {
1331
1450
  };
1332
1451
  this.callbacks.onChunk?.(chunk);
1333
1452
  }
1453
+ if (data.final && data.context_id) {
1454
+ this.callbacks.onFinal?.(data.context_id);
1455
+ }
1334
1456
  if (data.context_closed) {
1335
1457
  this.contexts.delete(data.context_id);
1336
- this.callbacks.onContextClosed?.(data.context_id);
1458
+ this.requestedContexts.delete(data.context_id);
1459
+ const ctxUsage = parseSessionUsage(data) ?? void 0;
1460
+ if (ctxUsage) this._contextUsage.set(data.context_id, ctxUsage);
1461
+ this.callbacks.onContextClosed?.(data.context_id, ctxUsage);
1337
1462
  }
1338
1463
  if (data.context_timeout) {
1339
1464
  this.contexts.delete(data.context_id);
1465
+ this.requestedContexts.delete(data.context_id);
1340
1466
  this.callbacks.onContextTimeout?.(data.context_id);
1341
1467
  }
1342
1468
  if (data.session_closed) {
@@ -1376,6 +1502,7 @@ var MultiContextSession = class {
1376
1502
  this.ws = null;
1377
1503
  this.isStarted = false;
1378
1504
  this.contexts.clear();
1505
+ this.requestedContexts.clear();
1379
1506
  };
1380
1507
  });
1381
1508
  }
@@ -1386,6 +1513,7 @@ var MultiContextSession = class {
1386
1513
  if (!this.ws || this.ws.readyState !== WS_OPEN) {
1387
1514
  throw new KugelAudioError("WebSocket not connected");
1388
1515
  }
1516
+ this.requestedContexts.add(contextId);
1389
1517
  const msg = {
1390
1518
  text: " ",
1391
1519
  context_id: contextId
@@ -1393,23 +1521,27 @@ var MultiContextSession = class {
1393
1521
  if (!this.isStarted) {
1394
1522
  warnIfNoLanguage(this.config.language, this.config.normalize);
1395
1523
  if (this.config.sampleRate) msg.sample_rate = this.config.sampleRate;
1524
+ if (this.config.outputFormat) msg.output_format = this.config.outputFormat;
1396
1525
  if (this.config.cfgScale) msg.cfg_scale = this.config.cfgScale;
1397
1526
  if (this.config.temperature !== void 0) msg.temperature = this.config.temperature;
1398
1527
  if (this.config.maxNewTokens) msg.max_new_tokens = this.config.maxNewTokens;
1399
1528
  if (this.config.normalize !== void 0) msg.normalize = this.config.normalize;
1400
1529
  if (this.config.language) msg.language = this.config.language;
1530
+ if (this.config.dictionaryIds !== void 0) msg.dictionary_ids = this.config.dictionaryIds;
1401
1531
  if (this.config.inactivityTimeout) msg.inactivity_timeout = this.config.inactivityTimeout;
1402
1532
  }
1533
+ const voiceSettings = {};
1403
1534
  const voiceId = options?.voiceId || this.config.defaultVoiceId;
1404
- if (voiceId) msg.voice_id = voiceId;
1535
+ if (voiceId) voiceSettings.voice_id = voiceId;
1405
1536
  if (options?.voiceSettings) {
1406
- msg.voice_settings = {
1407
- stability: options.voiceSettings.stability,
1408
- similarity_boost: options.voiceSettings.similarityBoost,
1409
- style: options.voiceSettings.style,
1410
- use_speaker_boost: options.voiceSettings.useSpeakerBoost,
1411
- speed: options.voiceSettings.speed
1412
- };
1537
+ voiceSettings.stability = options.voiceSettings.stability;
1538
+ voiceSettings.similarity_boost = options.voiceSettings.similarityBoost;
1539
+ voiceSettings.style = options.voiceSettings.style;
1540
+ voiceSettings.use_speaker_boost = options.voiceSettings.useSpeakerBoost;
1541
+ voiceSettings.speed = options.voiceSettings.speed;
1542
+ }
1543
+ if (Object.keys(voiceSettings).length > 0) {
1544
+ msg.voice_settings = voiceSettings;
1413
1545
  }
1414
1546
  this.ws.send(JSON.stringify(msg));
1415
1547
  }
@@ -1420,7 +1552,7 @@ var MultiContextSession = class {
1420
1552
  if (!this.ws || this.ws.readyState !== WS_OPEN) {
1421
1553
  throw new KugelAudioError("WebSocket not connected");
1422
1554
  }
1423
- if (!this.contexts.has(contextId) && !this.isStarted) {
1555
+ if (!this.requestedContexts.has(contextId) && !this.contexts.has(contextId)) {
1424
1556
  this.createContext(contextId);
1425
1557
  }
1426
1558
  this.ws.send(JSON.stringify({
@@ -1478,6 +1610,7 @@ var MultiContextSession = class {
1478
1610
  this.ws = null;
1479
1611
  this.isStarted = false;
1480
1612
  this.contexts.clear();
1613
+ this.requestedContexts.clear();
1481
1614
  }
1482
1615
  /**
1483
1616
  * Get active context IDs.
@@ -1496,10 +1629,19 @@ var StreamingSession = class {
1496
1629
  constructor(client, config, callbacks) {
1497
1630
  this.ws = null;
1498
1631
  this.configSent = false;
1632
+ this._lastUsage = null;
1499
1633
  this.client = client;
1500
1634
  this.config = config;
1501
1635
  this.callbacks = callbacks;
1502
1636
  }
1637
+ /**
1638
+ * Per-session usage from the most recently closed session, or null before
1639
+ * the first session closes. Use this to bill your own customers per
1640
+ * conversation. See {@link SessionUsage}.
1641
+ */
1642
+ get lastUsage() {
1643
+ return this._lastUsage;
1644
+ }
1503
1645
  /**
1504
1646
  * Open the WebSocket connection and authenticate.
1505
1647
  *
@@ -1518,7 +1660,7 @@ var StreamingSession = class {
1518
1660
  } else {
1519
1661
  authParam = "api_key";
1520
1662
  }
1521
- const url = `${wsUrl}/ws/tts/stream?${authParam}=${this.client.apiKey}`;
1663
+ const url = appendSdkQuery(`${wsUrl}/ws/tts/stream?${authParam}=${this.client.apiKey}`);
1522
1664
  this.ws = createWs(url);
1523
1665
  const ws = this.ws;
1524
1666
  ws.onmessage = (event) => {
@@ -1563,7 +1705,15 @@ var StreamingSession = class {
1563
1705
  if (data.interrupted) {
1564
1706
  this.callbacks.onInterrupted?.();
1565
1707
  }
1708
+ if (data.final) {
1709
+ this.callbacks.onFinal?.(
1710
+ data.total_audio_seconds ?? 0,
1711
+ data.total_text_chunks ?? 0,
1712
+ data.total_audio_chunks ?? 0
1713
+ );
1714
+ }
1566
1715
  if (data.session_closed) {
1716
+ this._lastUsage = parseSessionUsage(data);
1567
1717
  this.callbacks.onSessionClosed?.(
1568
1718
  data.total_audio_seconds ?? 0,
1569
1719
  data.total_text_chunks ?? 0,
@@ -1631,6 +1781,7 @@ var StreamingSession = class {
1631
1781
  if (this.config.temperature !== void 0) msg.temperature = this.config.temperature;
1632
1782
  if (this.config.maxNewTokens !== void 0) msg.max_new_tokens = this.config.maxNewTokens;
1633
1783
  if (this.config.sampleRate !== void 0) msg.sample_rate = this.config.sampleRate;
1784
+ if (this.config.outputFormat !== void 0) msg.output_format = this.config.outputFormat;
1634
1785
  if (this.config.flushTimeoutMs !== void 0) msg.flush_timeout_ms = this.config.flushTimeoutMs;
1635
1786
  if (this.config.maxBufferLength !== void 0) msg.max_buffer_length = this.config.maxBufferLength;
1636
1787
  if (this.config.normalize !== void 0) msg.normalize = this.config.normalize;
@@ -1639,6 +1790,7 @@ var StreamingSession = class {
1639
1790
  if (this.config.autoMode !== void 0) msg.auto_mode = this.config.autoMode;
1640
1791
  if (this.config.chunkLengthSchedule?.length) msg.chunk_length_schedule = this.config.chunkLengthSchedule;
1641
1792
  if (this.config.speed !== void 0) msg.speed = this.config.speed;
1793
+ if (this.config.dictionaryIds !== void 0) msg.dictionary_ids = this.config.dictionaryIds;
1642
1794
  this.configSent = true;
1643
1795
  }
1644
1796
  this.ws.send(JSON.stringify(msg));
@@ -1922,7 +2074,8 @@ var KugelAudio = class _KugelAudio {
1922
2074
  const headers = {
1923
2075
  "Content-Type": "application/json",
1924
2076
  "X-API-Key": this._apiKey,
1925
- "Authorization": `Bearer ${this._apiKey}`
2077
+ "Authorization": `Bearer ${this._apiKey}`,
2078
+ ...sdkHeaders()
1926
2079
  };
1927
2080
  const controller = new AbortController();
1928
2081
  const timeoutId = setTimeout(() => controller.abort(), this._timeout);
@@ -1962,7 +2115,8 @@ var KugelAudio = class _KugelAudio {
1962
2115
  const url = `${this._apiUrl}${path}`;
1963
2116
  const headers = {
1964
2117
  "X-API-Key": this._apiKey,
1965
- "Authorization": `Bearer ${this._apiKey}`
2118
+ "Authorization": `Bearer ${this._apiKey}`,
2119
+ ...sdkHeaders()
1966
2120
  };
1967
2121
  const controller = new AbortController();
1968
2122
  const timeoutId = setTimeout(() => controller.abort(), this._timeout);
@@ -2016,5 +2170,6 @@ var KugelAudio = class _KugelAudio {
2016
2170
  classifyWsHandshakeError,
2017
2171
  createWavBlob,
2018
2172
  createWavFile,
2019
- decodePCM16
2173
+ decodePCM16,
2174
+ parseSessionUsage
2020
2175
  });