kugelaudio 0.6.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -391,6 +391,23 @@ function classifyWsHandshakeError(err) {
391
391
  return build(status, void 0, typeof e.message === "string" ? e.message : "");
392
392
  }
393
393
 
394
+ // src/types.ts
395
+ function parseSessionUsage(data) {
396
+ const raw = data.usage;
397
+ const source = raw && typeof raw === "object" ? raw : data;
398
+ const audioSeconds = typeof source.audio_seconds === "number" ? source.audio_seconds : typeof data.total_audio_seconds === "number" ? data.total_audio_seconds : void 0;
399
+ if (audioSeconds === void 0) return null;
400
+ const costCents = typeof source.cost_cents === "number" ? source.cost_cents : null;
401
+ return {
402
+ audioSeconds,
403
+ costCents,
404
+ currency: typeof source.currency === "string" ? source.currency : void 0,
405
+ characters: typeof source.characters === "number" ? source.characters : void 0,
406
+ modelId: typeof source.model_id === "string" ? source.model_id : void 0,
407
+ costAvailable: costCents !== null
408
+ };
409
+ }
410
+
394
411
  // src/utils.ts
395
412
  function base64ToArrayBuffer(base64) {
396
413
  if (typeof atob === "function") {
@@ -474,10 +491,75 @@ function getWebSocket() {
474
491
  );
475
492
  }
476
493
 
494
+ // package.json
495
+ var package_default = {
496
+ name: "kugelaudio",
497
+ version: "0.8.0",
498
+ description: "Official JavaScript/TypeScript SDK for KugelAudio TTS API",
499
+ main: "dist/index.js",
500
+ module: "dist/index.mjs",
501
+ types: "dist/index.d.ts",
502
+ exports: {
503
+ ".": {
504
+ types: "./dist/index.d.ts",
505
+ import: "./dist/index.mjs",
506
+ require: "./dist/index.js"
507
+ }
508
+ },
509
+ files: [
510
+ "dist",
511
+ "src",
512
+ "LICENSE",
513
+ "CHANGELOG.md"
514
+ ],
515
+ scripts: {
516
+ build: "tsup src/index.ts --format cjs,esm --dts",
517
+ dev: "tsup src/index.ts --format cjs,esm --dts --watch",
518
+ lint: "eslint src/",
519
+ test: "vitest run",
520
+ "test:watch": "vitest",
521
+ prepublishOnly: "npm run build"
522
+ },
523
+ keywords: [
524
+ "tts",
525
+ "text-to-speech",
526
+ "audio",
527
+ "streaming",
528
+ "websocket",
529
+ "kugelaudio"
530
+ ],
531
+ author: "KugelAudio <hello@kugelaudio.com>",
532
+ license: "MIT",
533
+ repository: {
534
+ type: "git",
535
+ url: "https://github.com/Kugelaudio/KugelAudio",
536
+ directory: "sdks/js"
537
+ },
538
+ homepage: "https://kugelaudio.com",
539
+ bugs: {
540
+ url: "https://github.com/Kugelaudio/KugelAudio/issues"
541
+ },
542
+ devDependencies: {
543
+ "@types/node": "^25.3.2",
544
+ tsup: "^8.0.0",
545
+ typescript: "^6.0.2",
546
+ vitest: "^4.0.18"
547
+ },
548
+ engines: {
549
+ node: ">=18.0.0"
550
+ },
551
+ dependencies: {
552
+ tsx: "^4.21.0",
553
+ ws: "^8.18.0"
554
+ }
555
+ };
556
+
477
557
  // src/client.ts
478
558
  var DEFAULT_API_URL = "https://api.kugelaudio.com";
479
559
  var EU_API_URL = "https://api.eu.kugelaudio.com";
480
560
  var SUPPORTED_REGIONS = ["eu", "us", "global"];
561
+ var SDK_NAME = "js";
562
+ var SDK_VERSION = package_default.version;
481
563
  var REGION_PREFIXES = ["eu-", "us-", "global-"];
482
564
  function parseApiKey(apiKey) {
483
565
  for (const prefix of REGION_PREFIXES) {
@@ -487,6 +569,16 @@ function parseApiKey(apiKey) {
487
569
  }
488
570
  return { cleanKey: apiKey };
489
571
  }
572
+ function sdkHeaders() {
573
+ return {
574
+ "X-KugelAudio-SDK": SDK_NAME,
575
+ "X-KugelAudio-SDK-Version": SDK_VERSION
576
+ };
577
+ }
578
+ function appendSdkQuery(url) {
579
+ const separator = url.includes("?") ? "&" : "?";
580
+ return `${url}${separator}sdk=${encodeURIComponent(SDK_NAME)}&sdk_version=${encodeURIComponent(SDK_VERSION)}`;
581
+ }
490
582
  function createWs(url) {
491
583
  const WS = getWebSocket();
492
584
  return new WS(url);
@@ -855,7 +947,7 @@ var TTSResource = class {
855
947
  if (this.client.orgId !== void 0) {
856
948
  url += `&org_id=${this.client.orgId}`;
857
949
  }
858
- return url;
950
+ return appendSdkQuery(url);
859
951
  }
860
952
  /**
861
953
  * Get or create a WebSocket connection for connection pooling.
@@ -918,7 +1010,8 @@ var TTSResource = class {
918
1010
  durationMs: data.dur_ms,
919
1011
  generationMs: data.gen_ms,
920
1012
  rtf: data.rtf,
921
- error: data.error
1013
+ error: data.error,
1014
+ usage: parseSessionUsage(data) ?? void 0
922
1015
  };
923
1016
  pending.callbacks.onFinal?.(stats);
924
1017
  this.pendingRequests.delete(requestId);
@@ -1003,17 +1096,21 @@ var TTSResource = class {
1003
1096
  callbacks.onOpen?.();
1004
1097
  ws.send(JSON.stringify({
1005
1098
  text: options.text,
1006
- model_id: options.modelId || "kugel-1-turbo",
1099
+ model_id: options.modelId || "kugel-3",
1007
1100
  voice_id: options.voiceId,
1008
1101
  cfg_scale: options.cfgScale ?? 2,
1009
1102
  ...options.temperature !== void 0 && { temperature: options.temperature },
1010
1103
  max_new_tokens: options.maxNewTokens ?? 2048,
1011
1104
  sample_rate: options.sampleRate ?? 24e3,
1105
+ ...options.outputFormat && { output_format: options.outputFormat },
1012
1106
  normalize: options.normalize ?? true,
1013
1107
  ...options.language && { language: options.language },
1014
1108
  ...options.wordTimestamps && { word_timestamps: true },
1015
1109
  ...options.speed !== void 0 && { speed: options.speed },
1016
- ...options.projectId !== void 0 && { project_id: options.projectId }
1110
+ ...options.projectId !== void 0 && { project_id: options.projectId },
1111
+ // [] is meaningful (explicit opt-out) and must be sent; only
1112
+ // undefined (use the project default) is omitted.
1113
+ ...options.dictionaryIds !== void 0 && { dictionary_ids: options.dictionaryIds }
1017
1114
  }));
1018
1115
  });
1019
1116
  }
@@ -1029,16 +1126,20 @@ var TTSResource = class {
1029
1126
  callbacks.onOpen?.();
1030
1127
  ws.send(JSON.stringify({
1031
1128
  text: options.text,
1032
- model_id: options.modelId || "kugel-1-turbo",
1129
+ model_id: options.modelId || "kugel-3",
1033
1130
  voice_id: options.voiceId,
1034
1131
  cfg_scale: options.cfgScale ?? 2,
1035
1132
  max_new_tokens: options.maxNewTokens ?? 2048,
1036
1133
  sample_rate: options.sampleRate ?? 24e3,
1134
+ ...options.outputFormat && { output_format: options.outputFormat },
1037
1135
  normalize: options.normalize ?? true,
1038
1136
  ...options.language && { language: options.language },
1039
1137
  ...options.wordTimestamps && { word_timestamps: true },
1040
1138
  ...options.speed !== void 0 && { speed: options.speed },
1041
- ...options.projectId !== void 0 && { project_id: options.projectId }
1139
+ ...options.projectId !== void 0 && { project_id: options.projectId },
1140
+ // [] is meaningful (explicit opt-out) and must be sent; only
1141
+ // undefined (use the project default) is omitted.
1142
+ ...options.dictionaryIds !== void 0 && { dictionary_ids: options.dictionaryIds }
1042
1143
  }));
1043
1144
  };
1044
1145
  ws.onmessage = (event) => {
@@ -1060,7 +1161,8 @@ var TTSResource = class {
1060
1161
  durationMs: data.dur_ms,
1061
1162
  generationMs: data.gen_ms,
1062
1163
  rtf: data.rtf,
1063
- error: data.error
1164
+ error: data.error,
1165
+ usage: parseSessionUsage(data) ?? void 0
1064
1166
  };
1065
1167
  callbacks.onFinal?.(stats);
1066
1168
  ws.close();
@@ -1230,7 +1332,11 @@ var MultiContextSession = class {
1230
1332
  this.ws = null;
1231
1333
  this.callbacks = {};
1232
1334
  this.contexts = /* @__PURE__ */ new Set();
1335
+ /** Contexts a create message has been sent for (not yet necessarily
1336
+ * confirmed by the server via context_created). */
1337
+ this.requestedContexts = /* @__PURE__ */ new Set();
1233
1338
  this._sessionId = null;
1339
+ this._contextUsage = /* @__PURE__ */ new Map();
1234
1340
  this.isStarted = false;
1235
1341
  this.config = config || {};
1236
1342
  }
@@ -1240,6 +1346,18 @@ var MultiContextSession = class {
1240
1346
  get sessionId() {
1241
1347
  return this._sessionId;
1242
1348
  }
1349
+ /**
1350
+ * Per-context usage (audio time + amount charged) for a closed context, or
1351
+ * null if that context hasn't closed yet. Each context is its own
1352
+ * conversation — use this to bill per conversation. See {@link SessionUsage}.
1353
+ */
1354
+ usageFor(contextId) {
1355
+ return this._contextUsage.get(contextId) ?? null;
1356
+ }
1357
+ /** Map of context_id → per-context usage for all closed contexts. */
1358
+ get contextUsage() {
1359
+ return new Map(this._contextUsage);
1360
+ }
1243
1361
  /**
1244
1362
  * Connect to the multi-context WebSocket endpoint.
1245
1363
  *
@@ -1259,7 +1377,7 @@ var MultiContextSession = class {
1259
1377
  } else {
1260
1378
  authParam = "api_key";
1261
1379
  }
1262
- const url = `${wsUrl}/ws/tts/multi?${authParam}=${this.client.apiKey}`;
1380
+ const url = appendSdkQuery(`${wsUrl}/ws/tts/multi?${authParam}=${this.client.apiKey}`);
1263
1381
  this.ws = createWs(url);
1264
1382
  const ws = this.ws;
1265
1383
  ws.onmessage = (event) => {
@@ -1293,12 +1411,19 @@ var MultiContextSession = class {
1293
1411
  };
1294
1412
  this.callbacks.onChunk?.(chunk);
1295
1413
  }
1414
+ if (data.final && data.context_id) {
1415
+ this.callbacks.onFinal?.(data.context_id);
1416
+ }
1296
1417
  if (data.context_closed) {
1297
1418
  this.contexts.delete(data.context_id);
1298
- this.callbacks.onContextClosed?.(data.context_id);
1419
+ this.requestedContexts.delete(data.context_id);
1420
+ const ctxUsage = parseSessionUsage(data) ?? void 0;
1421
+ if (ctxUsage) this._contextUsage.set(data.context_id, ctxUsage);
1422
+ this.callbacks.onContextClosed?.(data.context_id, ctxUsage);
1299
1423
  }
1300
1424
  if (data.context_timeout) {
1301
1425
  this.contexts.delete(data.context_id);
1426
+ this.requestedContexts.delete(data.context_id);
1302
1427
  this.callbacks.onContextTimeout?.(data.context_id);
1303
1428
  }
1304
1429
  if (data.session_closed) {
@@ -1338,6 +1463,7 @@ var MultiContextSession = class {
1338
1463
  this.ws = null;
1339
1464
  this.isStarted = false;
1340
1465
  this.contexts.clear();
1466
+ this.requestedContexts.clear();
1341
1467
  };
1342
1468
  });
1343
1469
  }
@@ -1348,6 +1474,7 @@ var MultiContextSession = class {
1348
1474
  if (!this.ws || this.ws.readyState !== WS_OPEN) {
1349
1475
  throw new KugelAudioError("WebSocket not connected");
1350
1476
  }
1477
+ this.requestedContexts.add(contextId);
1351
1478
  const msg = {
1352
1479
  text: " ",
1353
1480
  context_id: contextId
@@ -1355,23 +1482,27 @@ var MultiContextSession = class {
1355
1482
  if (!this.isStarted) {
1356
1483
  warnIfNoLanguage(this.config.language, this.config.normalize);
1357
1484
  if (this.config.sampleRate) msg.sample_rate = this.config.sampleRate;
1485
+ if (this.config.outputFormat) msg.output_format = this.config.outputFormat;
1358
1486
  if (this.config.cfgScale) msg.cfg_scale = this.config.cfgScale;
1359
1487
  if (this.config.temperature !== void 0) msg.temperature = this.config.temperature;
1360
1488
  if (this.config.maxNewTokens) msg.max_new_tokens = this.config.maxNewTokens;
1361
1489
  if (this.config.normalize !== void 0) msg.normalize = this.config.normalize;
1362
1490
  if (this.config.language) msg.language = this.config.language;
1491
+ if (this.config.dictionaryIds !== void 0) msg.dictionary_ids = this.config.dictionaryIds;
1363
1492
  if (this.config.inactivityTimeout) msg.inactivity_timeout = this.config.inactivityTimeout;
1364
1493
  }
1494
+ const voiceSettings = {};
1365
1495
  const voiceId = options?.voiceId || this.config.defaultVoiceId;
1366
- if (voiceId) msg.voice_id = voiceId;
1496
+ if (voiceId) voiceSettings.voice_id = voiceId;
1367
1497
  if (options?.voiceSettings) {
1368
- msg.voice_settings = {
1369
- stability: options.voiceSettings.stability,
1370
- similarity_boost: options.voiceSettings.similarityBoost,
1371
- style: options.voiceSettings.style,
1372
- use_speaker_boost: options.voiceSettings.useSpeakerBoost,
1373
- speed: options.voiceSettings.speed
1374
- };
1498
+ voiceSettings.stability = options.voiceSettings.stability;
1499
+ voiceSettings.similarity_boost = options.voiceSettings.similarityBoost;
1500
+ voiceSettings.style = options.voiceSettings.style;
1501
+ voiceSettings.use_speaker_boost = options.voiceSettings.useSpeakerBoost;
1502
+ voiceSettings.speed = options.voiceSettings.speed;
1503
+ }
1504
+ if (Object.keys(voiceSettings).length > 0) {
1505
+ msg.voice_settings = voiceSettings;
1375
1506
  }
1376
1507
  this.ws.send(JSON.stringify(msg));
1377
1508
  }
@@ -1382,7 +1513,7 @@ var MultiContextSession = class {
1382
1513
  if (!this.ws || this.ws.readyState !== WS_OPEN) {
1383
1514
  throw new KugelAudioError("WebSocket not connected");
1384
1515
  }
1385
- if (!this.contexts.has(contextId) && !this.isStarted) {
1516
+ if (!this.requestedContexts.has(contextId) && !this.contexts.has(contextId)) {
1386
1517
  this.createContext(contextId);
1387
1518
  }
1388
1519
  this.ws.send(JSON.stringify({
@@ -1440,6 +1571,7 @@ var MultiContextSession = class {
1440
1571
  this.ws = null;
1441
1572
  this.isStarted = false;
1442
1573
  this.contexts.clear();
1574
+ this.requestedContexts.clear();
1443
1575
  }
1444
1576
  /**
1445
1577
  * Get active context IDs.
@@ -1458,10 +1590,19 @@ var StreamingSession = class {
1458
1590
  constructor(client, config, callbacks) {
1459
1591
  this.ws = null;
1460
1592
  this.configSent = false;
1593
+ this._lastUsage = null;
1461
1594
  this.client = client;
1462
1595
  this.config = config;
1463
1596
  this.callbacks = callbacks;
1464
1597
  }
1598
+ /**
1599
+ * Per-session usage from the most recently closed session, or null before
1600
+ * the first session closes. Use this to bill your own customers per
1601
+ * conversation. See {@link SessionUsage}.
1602
+ */
1603
+ get lastUsage() {
1604
+ return this._lastUsage;
1605
+ }
1465
1606
  /**
1466
1607
  * Open the WebSocket connection and authenticate.
1467
1608
  *
@@ -1480,7 +1621,7 @@ var StreamingSession = class {
1480
1621
  } else {
1481
1622
  authParam = "api_key";
1482
1623
  }
1483
- const url = `${wsUrl}/ws/tts/stream?${authParam}=${this.client.apiKey}`;
1624
+ const url = appendSdkQuery(`${wsUrl}/ws/tts/stream?${authParam}=${this.client.apiKey}`);
1484
1625
  this.ws = createWs(url);
1485
1626
  const ws = this.ws;
1486
1627
  ws.onmessage = (event) => {
@@ -1525,7 +1666,15 @@ var StreamingSession = class {
1525
1666
  if (data.interrupted) {
1526
1667
  this.callbacks.onInterrupted?.();
1527
1668
  }
1669
+ if (data.final) {
1670
+ this.callbacks.onFinal?.(
1671
+ data.total_audio_seconds ?? 0,
1672
+ data.total_text_chunks ?? 0,
1673
+ data.total_audio_chunks ?? 0
1674
+ );
1675
+ }
1528
1676
  if (data.session_closed) {
1677
+ this._lastUsage = parseSessionUsage(data);
1529
1678
  this.callbacks.onSessionClosed?.(
1530
1679
  data.total_audio_seconds ?? 0,
1531
1680
  data.total_text_chunks ?? 0,
@@ -1593,6 +1742,7 @@ var StreamingSession = class {
1593
1742
  if (this.config.temperature !== void 0) msg.temperature = this.config.temperature;
1594
1743
  if (this.config.maxNewTokens !== void 0) msg.max_new_tokens = this.config.maxNewTokens;
1595
1744
  if (this.config.sampleRate !== void 0) msg.sample_rate = this.config.sampleRate;
1745
+ if (this.config.outputFormat !== void 0) msg.output_format = this.config.outputFormat;
1596
1746
  if (this.config.flushTimeoutMs !== void 0) msg.flush_timeout_ms = this.config.flushTimeoutMs;
1597
1747
  if (this.config.maxBufferLength !== void 0) msg.max_buffer_length = this.config.maxBufferLength;
1598
1748
  if (this.config.normalize !== void 0) msg.normalize = this.config.normalize;
@@ -1601,6 +1751,7 @@ var StreamingSession = class {
1601
1751
  if (this.config.autoMode !== void 0) msg.auto_mode = this.config.autoMode;
1602
1752
  if (this.config.chunkLengthSchedule?.length) msg.chunk_length_schedule = this.config.chunkLengthSchedule;
1603
1753
  if (this.config.speed !== void 0) msg.speed = this.config.speed;
1754
+ if (this.config.dictionaryIds !== void 0) msg.dictionary_ids = this.config.dictionaryIds;
1604
1755
  this.configSent = true;
1605
1756
  }
1606
1757
  this.ws.send(JSON.stringify(msg));
@@ -1884,7 +2035,8 @@ var KugelAudio = class _KugelAudio {
1884
2035
  const headers = {
1885
2036
  "Content-Type": "application/json",
1886
2037
  "X-API-Key": this._apiKey,
1887
- "Authorization": `Bearer ${this._apiKey}`
2038
+ "Authorization": `Bearer ${this._apiKey}`,
2039
+ ...sdkHeaders()
1888
2040
  };
1889
2041
  const controller = new AbortController();
1890
2042
  const timeoutId = setTimeout(() => controller.abort(), this._timeout);
@@ -1924,7 +2076,8 @@ var KugelAudio = class _KugelAudio {
1924
2076
  const url = `${this._apiUrl}${path}`;
1925
2077
  const headers = {
1926
2078
  "X-API-Key": this._apiKey,
1927
- "Authorization": `Bearer ${this._apiKey}`
2079
+ "Authorization": `Bearer ${this._apiKey}`,
2080
+ ...sdkHeaders()
1928
2081
  };
1929
2082
  const controller = new AbortController();
1930
2083
  const timeoutId = setTimeout(() => controller.abort(), this._timeout);
@@ -1977,5 +2130,6 @@ export {
1977
2130
  classifyWsHandshakeError,
1978
2131
  createWavBlob,
1979
2132
  createWavFile,
1980
- decodePCM16
2133
+ decodePCM16,
2134
+ parseSessionUsage
1981
2135
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "kugelaudio",
3
- "version": "0.6.1",
3
+ "version": "0.8.0",
4
4
  "description": "Official JavaScript/TypeScript SDK for KugelAudio TTS API",
5
5
  "main": "dist/index.js",
6
6
  "module": "dist/index.mjs",