@tiktool/live 2.6.2 → 2.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -567,6 +567,9 @@ declare class TikTokCaptions extends EventEmitter {
567
567
  private readonly _diarization;
568
568
  private readonly _maxDurationMinutes;
569
569
  private _language;
570
+ private streamAbortController;
571
+ private flvExtractor;
572
+ private streamUrl;
570
573
  constructor(options: TikTokCaptionsOptions);
571
574
  /**
572
575
  * Start real-time captions for the configured TikTok user.
@@ -598,6 +601,11 @@ declare class TikTokCaptions extends EventEmitter {
598
601
  private buildWsUrl;
599
602
  private send;
600
603
  private handleMessage;
604
+ /**
605
+ * Connect to the TikTok FLV stream and extract audio.
606
+ * Sends binary audio buffers to the server via WebSocket.
607
+ */
608
+ private connectToStream;
601
609
  }
602
610
 
603
611
  interface GetRanklistOptions {
package/dist/index.d.ts CHANGED
@@ -567,6 +567,9 @@ declare class TikTokCaptions extends EventEmitter {
567
567
  private readonly _diarization;
568
568
  private readonly _maxDurationMinutes;
569
569
  private _language;
570
+ private streamAbortController;
571
+ private flvExtractor;
572
+ private streamUrl;
570
573
  constructor(options: TikTokCaptionsOptions);
571
574
  /**
572
575
  * Start real-time captions for the configured TikTok user.
@@ -598,6 +601,11 @@ declare class TikTokCaptions extends EventEmitter {
598
601
  private buildWsUrl;
599
602
  private send;
600
603
  private handleMessage;
604
+ /**
605
+ * Connect to the TikTok FLV stream and extract audio.
606
+ * Sends binary audio buffers to the server via WebSocket.
607
+ */
608
+ private connectToStream;
601
609
  }
602
610
 
603
611
  interface GetRanklistOptions {
package/dist/index.js CHANGED
@@ -1173,6 +1173,86 @@ var TikTokLive = class extends import_events.EventEmitter {
1173
1173
  // src/captions.ts
1174
1174
  var import_events2 = require("events");
1175
1175
  var import_ws2 = __toESM(require("ws"));
1176
+ var FLV_TAG_AUDIO = 8;
1177
+ var FLV_HEADER_SIZE = 9;
1178
+ var FLV_PREV_TAG_SIZE = 4;
1179
+ var FlvAudioExtractor = class {
1180
+ buffer = new Uint8Array(0);
1181
+ headerParsed = false;
1182
+ onAudio;
1183
+ aacProfile = 2;
1184
+ sampleRateIndex = 4;
1185
+ channelConfig = 2;
1186
+ ascParsed = false;
1187
+ constructor(onAudio) {
1188
+ this.onAudio = onAudio;
1189
+ }
1190
+ parseASC(asc) {
1191
+ if (asc.length < 2) return;
1192
+ this.aacProfile = asc[0] >> 3 & 31;
1193
+ this.sampleRateIndex = (asc[0] & 7) << 1 | asc[1] >> 7 & 1;
1194
+ this.channelConfig = asc[1] >> 3 & 15;
1195
+ this.ascParsed = true;
1196
+ }
1197
+ buildAdtsHeader(frameLength) {
1198
+ const adts = new Uint8Array(7);
1199
+ const fullLength = frameLength + 7;
1200
+ const profile = this.aacProfile - 1;
1201
+ adts[0] = 255;
1202
+ adts[1] = 241;
1203
+ adts[2] = (profile & 3) << 6 | (this.sampleRateIndex & 15) << 2 | this.channelConfig >> 2 & 1;
1204
+ adts[3] = (this.channelConfig & 3) << 6 | fullLength >> 11 & 3;
1205
+ adts[4] = fullLength >> 3 & 255;
1206
+ adts[5] = (fullLength & 7) << 5 | 31;
1207
+ adts[6] = 252;
1208
+ return adts;
1209
+ }
1210
+ push(chunk) {
1211
+ const newBuf = new Uint8Array(this.buffer.length + chunk.length);
1212
+ newBuf.set(this.buffer, 0);
1213
+ newBuf.set(chunk, this.buffer.length);
1214
+ this.buffer = newBuf;
1215
+ if (!this.headerParsed) {
1216
+ if (this.buffer.length < FLV_HEADER_SIZE + FLV_PREV_TAG_SIZE) return;
1217
+ if (this.buffer[0] !== 70 || this.buffer[1] !== 76 || this.buffer[2] !== 86) return;
1218
+ const dv = new DataView(this.buffer.buffer, this.buffer.byteOffset, this.buffer.byteLength);
1219
+ const dataOffset = dv.getUint32(5);
1220
+ this.buffer = this.buffer.subarray(dataOffset + FLV_PREV_TAG_SIZE);
1221
+ this.headerParsed = true;
1222
+ }
1223
+ while (this.buffer.length >= 11) {
1224
+ const tagType = this.buffer[0] & 31;
1225
+ const dataSize = this.buffer[1] << 16 | this.buffer[2] << 8 | this.buffer[3];
1226
+ const totalTagSize = 11 + dataSize + FLV_PREV_TAG_SIZE;
1227
+ if (this.buffer.length < totalTagSize) break;
1228
+ if (tagType === FLV_TAG_AUDIO) {
1229
+ const audioData = this.buffer.subarray(11, 11 + dataSize);
1230
+ if (audioData.length > 0) {
1231
+ const soundFormat = audioData[0] >> 4 & 15;
1232
+ if (soundFormat === 10 && audioData.length > 2) {
1233
+ const aacPacketType = audioData[1];
1234
+ if (aacPacketType === 0) {
1235
+ this.parseASC(audioData.subarray(2));
1236
+ } else if (aacPacketType === 1 && this.ascParsed) {
1237
+ const rawFrame = audioData.subarray(2);
1238
+ const adtsHeader = this.buildAdtsHeader(rawFrame.length);
1239
+ const adtsFrame = new Uint8Array(adtsHeader.length + rawFrame.length);
1240
+ adtsFrame.set(adtsHeader, 0);
1241
+ adtsFrame.set(rawFrame, adtsHeader.length);
1242
+ this.onAudio(adtsFrame);
1243
+ }
1244
+ }
1245
+ }
1246
+ }
1247
+ this.buffer = this.buffer.subarray(totalTagSize);
1248
+ }
1249
+ }
1250
+ reset() {
1251
+ this.buffer = new Uint8Array(0);
1252
+ this.headerParsed = false;
1253
+ this.ascParsed = false;
1254
+ }
1255
+ };
1176
1256
  var DEFAULT_CAPTIONS_SERVER = "wss://api.tik.tools";
1177
1257
  var TikTokCaptions = class extends import_events2.EventEmitter {
1178
1258
  ws = null;
@@ -1189,6 +1269,9 @@ var TikTokCaptions = class extends import_events2.EventEmitter {
1189
1269
  _diarization;
1190
1270
  _maxDurationMinutes;
1191
1271
  _language;
1272
+ streamAbortController = null;
1273
+ flvExtractor = null;
1274
+ streamUrl = null;
1192
1275
  constructor(options) {
1193
1276
  super();
1194
1277
  this.uniqueId = options.uniqueId.replace(/^@/, "");
@@ -1247,6 +1330,14 @@ var TikTokCaptions = class extends import_events2.EventEmitter {
1247
1330
  */
1248
1331
  stop() {
1249
1332
  this.intentionalClose = true;
1333
+ if (this.streamAbortController) {
1334
+ this.streamAbortController.abort();
1335
+ this.streamAbortController = null;
1336
+ }
1337
+ if (this.flvExtractor) {
1338
+ this.flvExtractor.reset();
1339
+ this.flvExtractor = null;
1340
+ }
1250
1341
  if (this.ws) {
1251
1342
  this.send({ action: "stop" });
1252
1343
  this.ws.close(1e3);
@@ -1309,6 +1400,10 @@ var TikTokCaptions = class extends import_events2.EventEmitter {
1309
1400
  try {
1310
1401
  const msg = JSON.parse(raw);
1311
1402
  switch (msg.type) {
1403
+ case "stream_info":
1404
+ if (this.debug) console.log(`[Captions] Received stream_info: flv=${!!msg.flvUrl}, hls=${!!msg.hlsUrl}, ao=${!!msg.audioOnlyUrl}`);
1405
+ this.connectToStream(msg);
1406
+ break;
1312
1407
  case "caption":
1313
1408
  this.emit("caption", {
1314
1409
  text: msg.text,
@@ -1359,6 +1454,43 @@ var TikTokCaptions = class extends import_events2.EventEmitter {
1359
1454
  message: msg.message
1360
1455
  });
1361
1456
  break;
1457
+ // Handle interim/final captions from server (sentence-level accumulation)
1458
+ case "interim":
1459
+ this.emit("caption", {
1460
+ text: msg.text,
1461
+ language: msg.language,
1462
+ isFinal: false,
1463
+ confidence: msg.confidence || 0,
1464
+ speaker: msg.speaker
1465
+ });
1466
+ break;
1467
+ case "final":
1468
+ this.emit("caption", {
1469
+ text: msg.text,
1470
+ language: msg.language,
1471
+ isFinal: true,
1472
+ confidence: msg.confidence || 0,
1473
+ speaker: msg.speaker
1474
+ });
1475
+ break;
1476
+ case "translation_interim":
1477
+ this.emit("translation", {
1478
+ text: msg.text,
1479
+ language: msg.language,
1480
+ isFinal: false,
1481
+ confidence: msg.confidence || 0,
1482
+ speaker: msg.speaker
1483
+ });
1484
+ break;
1485
+ case "translation_final":
1486
+ this.emit("translation", {
1487
+ text: msg.text,
1488
+ language: msg.language,
1489
+ isFinal: true,
1490
+ confidence: msg.confidence || 0,
1491
+ speaker: msg.speaker
1492
+ });
1493
+ break;
1362
1494
  default:
1363
1495
  if (this.debug) {
1364
1496
  console.log(`[Captions] Unknown message type: ${msg.type}`, msg);
@@ -1368,6 +1500,115 @@ var TikTokCaptions = class extends import_events2.EventEmitter {
1368
1500
  if (this.debug) console.error("[Captions] Failed to parse message:", raw);
1369
1501
  }
1370
1502
  }
1503
+ /**
1504
+ * Connect to the TikTok FLV stream and extract audio.
1505
+ * Sends binary audio buffers to the server via WebSocket.
1506
+ */
1507
+ async connectToStream(streamInfo) {
1508
+ const url = streamInfo.audioOnlyUrl || streamInfo.flvUrl;
1509
+ if (!url) {
1510
+ this.emit("error", { code: "NO_STREAM_URL", message: "Server did not provide a usable stream URL" });
1511
+ return;
1512
+ }
1513
+ this.streamUrl = url;
1514
+ if (this.debug) console.log(`[Captions] connectToStream: URL selected: ${url.substring(0, 80)}...`);
1515
+ if (this.streamAbortController) {
1516
+ this.streamAbortController.abort();
1517
+ }
1518
+ this.streamAbortController = new AbortController();
1519
+ let audioFramesSent = 0;
1520
+ let audioBytesSent = 0;
1521
+ this.flvExtractor = new FlvAudioExtractor((adtsFrame) => {
1522
+ if (this.ws?.readyState === import_ws2.default.OPEN) {
1523
+ this.ws.send(adtsFrame);
1524
+ audioFramesSent++;
1525
+ audioBytesSent += adtsFrame.length;
1526
+ if (this.debug && (audioFramesSent <= 3 || audioFramesSent % 100 === 0)) {
1527
+ console.log(`[Captions] Audio frame #${audioFramesSent}: ${adtsFrame.length}b (total: ${audioBytesSent}b)`);
1528
+ }
1529
+ } else if (this.debug && audioFramesSent === 0) {
1530
+ console.log(`[Captions] WARNING: WS not open (readyState=${this.ws?.readyState}), cannot send audio`);
1531
+ }
1532
+ });
1533
+ try {
1534
+ if (this.debug) console.log(`[Captions] connectToStream: calling fetch()...`);
1535
+ const resp = await fetch(url, {
1536
+ signal: this.streamAbortController.signal,
1537
+ headers: {
1538
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
1539
+ }
1540
+ });
1541
+ if (this.debug) console.log(`[Captions] connectToStream: fetch returned status=${resp.status}, hasBody=${!!resp.body}`);
1542
+ if (!resp.ok || !resp.body) {
1543
+ throw new Error(`FLV stream HTTP ${resp.status}`);
1544
+ }
1545
+ if (this.debug) console.log(`[Captions] FLV stream connected (${resp.status})`);
1546
+ const reader = resp.body.getReader ? resp.body.getReader() : null;
1547
+ if (this.debug) console.log(`[Captions] connectToStream: hasReader=${!!reader}, hasAsyncIterator=${typeof resp.body[Symbol.asyncIterator] === "function"}`);
1548
+ if (reader) {
1549
+ const processStream = async () => {
1550
+ let chunks = 0;
1551
+ try {
1552
+ while (true) {
1553
+ const { done, value } = await reader.read();
1554
+ if (done || this.intentionalClose) {
1555
+ if (this.debug) console.log(`[Captions] FLV stream ended (done=${done}, intentionalClose=${this.intentionalClose}), chunks=${chunks}, audioFrames=${audioFramesSent}`);
1556
+ break;
1557
+ }
1558
+ chunks++;
1559
+ if (value && this.flvExtractor) {
1560
+ this.flvExtractor.push(value);
1561
+ }
1562
+ if (this.debug && chunks <= 3) {
1563
+ console.log(`[Captions] FLV chunk #${chunks}: ${value?.length || 0}b`);
1564
+ }
1565
+ }
1566
+ } catch (err) {
1567
+ if (err.name !== "AbortError" && !this.intentionalClose) {
1568
+ if (this.debug) console.error("[Captions] FLV stream read error:", err.message);
1569
+ this.emit("error", { code: "STREAM_READ_ERROR", message: err.message });
1570
+ } else if (this.debug) {
1571
+ console.log(`[Captions] FLV stream aborted after ${chunks} chunks, ${audioFramesSent} audio frames`);
1572
+ }
1573
+ }
1574
+ };
1575
+ processStream();
1576
+ } else if (typeof resp.body[Symbol.asyncIterator] === "function") {
1577
+ const processNodeStream = async () => {
1578
+ let chunks = 0;
1579
+ try {
1580
+ for await (const chunk of resp.body) {
1581
+ if (this.intentionalClose) break;
1582
+ chunks++;
1583
+ const u8 = chunk instanceof Uint8Array ? chunk : new Uint8Array(chunk);
1584
+ if (this.flvExtractor) {
1585
+ this.flvExtractor.push(u8);
1586
+ }
1587
+ if (this.debug && chunks <= 3) {
1588
+ console.log(`[Captions] FLV chunk #${chunks}: ${u8.length}b`);
1589
+ }
1590
+ }
1591
+ if (this.debug) console.log(`[Captions] Node stream ended, chunks=${chunks}, audioFrames=${audioFramesSent}`);
1592
+ } catch (err) {
1593
+ if (err.name !== "AbortError" && !this.intentionalClose) {
1594
+ if (this.debug) console.error("[Captions] FLV stream read error:", err.message);
1595
+ this.emit("error", { code: "STREAM_READ_ERROR", message: err.message });
1596
+ } else if (this.debug) {
1597
+ console.log(`[Captions] FLV node stream aborted after ${chunks} chunks, ${audioFramesSent} audio frames`);
1598
+ }
1599
+ }
1600
+ };
1601
+ processNodeStream();
1602
+ } else {
1603
+ if (this.debug) console.error(`[Captions] ERROR: resp.body has no getReader() and no asyncIterator!`);
1604
+ }
1605
+ } catch (err) {
1606
+ if (err.name !== "AbortError" && !this.intentionalClose) {
1607
+ console.error("[Captions] FLV stream connect error:", err.message);
1608
+ this.emit("error", { code: "STREAM_CONNECT_ERROR", message: err.message });
1609
+ }
1610
+ }
1611
+ }
1371
1612
  };
1372
1613
 
1373
1614
  // src/api.ts