@volley/recognition-client-sdk 0.1.211 → 0.1.254

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,2 @@
1
- export { f as AudioEncoding, h as ControlSignal, G as GameContextV1, c as RealTimeTwoWayWebSocketRecognitionClient, a as RealTimeTwoWayWebSocketRecognitionClientConfig, g as RecognitionContextTypeV1, d as TranscriptionResult } from './browser-C4ZssGoU.js';
1
+ export { h as AudioEncoding, k as ControlSignal, G as GameContextV1, e as RealTimeTwoWayWebSocketRecognitionClient, c as RealTimeTwoWayWebSocketRecognitionClientConfig, j as RecognitionContextTypeV1, f as TranscriptionResult } from './browser-BZs4BL_w.js';
2
2
  import 'zod';
@@ -19,6 +19,11 @@ var RecognitionMode;
19
19
  RecognitionMode2["STREAMING"] = "streaming";
20
20
  RecognitionMode2["BATCH"] = "batch";
21
21
  })(RecognitionMode || (RecognitionMode = {}));
22
+ var ASRApiType;
23
+ (function(ASRApiType2) {
24
+ ASRApiType2["STREAMING"] = "streaming";
25
+ ASRApiType2["FILE_BASED"] = "file-based";
26
+ })(ASRApiType || (ASRApiType = {}));
22
27
  var DeepgramModel;
23
28
  (function(DeepgramModel2) {
24
29
  DeepgramModel2["NOVA_2"] = "nova-2";
@@ -82,6 +87,8 @@ var MetadataResultSchemaV1 = z.object({
82
87
  accumulatedAudioTimeMs: z.number().optional(),
83
88
  // Cost Information
84
89
  costInUSD: z.number().default(0).optional(),
90
+ // ASR API Type
91
+ apiType: z.nativeEnum(ASRApiType).optional(),
85
92
  // ASR configuration as JSON string (no type validation)
86
93
  asrConfig: z.string().optional(),
87
94
  // Raw ASR metadata payload as provided by the provider (stringified if needed)
@@ -94,6 +101,7 @@ var ErrorTypeV1;
94
101
  ErrorTypeV12["PROVIDER_ERROR"] = "provider_error";
95
102
  ErrorTypeV12["TIMEOUT_ERROR"] = "timeout_error";
96
103
  ErrorTypeV12["QUOTA_EXCEEDED"] = "quota_exceeded";
104
+ ErrorTypeV12["CONNECTION_ERROR"] = "connection_error";
97
105
  ErrorTypeV12["UNKNOWN_ERROR"] = "unknown_error";
98
106
  })(ErrorTypeV1 || (ErrorTypeV1 = {}));
99
107
  var ErrorResultSchemaV1 = z.object({
@@ -214,7 +222,19 @@ var TranscriptMessageSchema = z.object({
214
222
  * Whether this transcript is finalized (won't change)
215
223
  * @example true
216
224
  */
217
- is_final: z.boolean()
225
+ is_final: z.boolean(),
226
+ /**
227
+ * Accumulated confirmed transcript (all finalized text received so far)
228
+ * Contains only the completed/finalized portions
229
+ * @example "hello world how are you"
230
+ */
231
+ confirmedTranscript: z.string().optional(),
232
+ /**
233
+ * New pending transcript (current interim text since last confirmation)
234
+ * Contains only the unconfirmed interim text
235
+ * @example "I'm doing"
236
+ */
237
+ newPendingTranscript: z.string().optional()
218
238
  });
219
239
  var VADEndSignalSchema = z.object({
220
240
  type: z.literal(ProviderMessageType.VAD_END_SIGNAL),
@@ -302,7 +322,20 @@ var TimerSchema = z.object({
302
322
  * Total duration of all audio chunks sent to this provider session
303
323
  * @example 2500 (2.5 seconds of audio has been sent)
304
324
  */
305
- accumulatedAudioTimeMs: z.number().optional()
325
+ accumulatedAudioTimeMs: z.number().optional(),
326
+ /**
327
+ * Estimated cost in USD for this session
328
+ * Calculated by the job based on audio duration and provider pricing
329
+ * @example 0.0025 (quarter of a cent)
330
+ */
331
+ costInUSD: z.number().optional().default(0),
332
+ /**
333
+ * ASR API type from the job
334
+ * - STREAMING: Real-time streaming APIs (Deepgram, AssemblyAI, Google)
335
+ * - FILE_BASED: File upload/batch APIs (OpenAI Batch, Gemini Batch)
336
+ * @example ASRApiType.STREAMING
337
+ */
338
+ apiType: z.nativeEnum(ASRApiType).optional()
306
339
  });
307
340
  var RawMessageSchema = z.object({
308
341
  type: z.literal(ProviderMessageType.RAW),
@@ -391,6 +424,16 @@ var QuotaExceededExceptionSchema = BaseRecognitionExceptionSchema.extend({
391
424
  /** How long to wait in seconds before retry */
392
425
  retryAfterSeconds: z.number().optional()
393
426
  });
427
+ var ConnectionExceptionSchema = BaseRecognitionExceptionSchema.extend({
428
+ errorType: z.literal(ErrorTypeV1.CONNECTION_ERROR),
429
+ isImmediatelyAvailable: z.literal(true),
430
+ /** Number of connection attempts made */
431
+ attempts: z.number().optional(),
432
+ /** URL that failed to connect */
433
+ url: z.string().optional(),
434
+ /** Underlying error message */
435
+ underlyingError: z.string().optional()
436
+ });
394
437
  var UnknownExceptionSchema = BaseRecognitionExceptionSchema.extend({
395
438
  errorType: z.literal(ErrorTypeV1.UNKNOWN_ERROR),
396
439
  isImmediatelyAvailable: z.literal(false),
@@ -405,6 +448,7 @@ z.discriminatedUnion("errorType", [
405
448
  ProviderExceptionSchema,
406
449
  TimeoutExceptionSchema,
407
450
  QuotaExceededExceptionSchema,
451
+ ConnectionExceptionSchema,
408
452
  UnknownExceptionSchema
409
453
  ]);
410
454
  var RecognitionContextTypeV1;
@@ -1178,8 +1222,16 @@ __name(getRecognitionServiceBase, "getRecognitionServiceBase");
1178
1222
 
1179
1223
  // src/utils/url-builder.ts
1180
1224
  function buildWebSocketUrl(config) {
1181
- const defaultBase = getRecognitionServiceBase("production");
1182
- const baseUrl = config.url || `${defaultBase.wsBase}/ws/v1/recognize`;
1225
+ let baseUrl;
1226
+ if (config.url) {
1227
+ baseUrl = config.url;
1228
+ } else if (config.stage) {
1229
+ const stageBase = getRecognitionServiceBase(config.stage);
1230
+ baseUrl = `${stageBase.wsBase}/ws/v1/recognize`;
1231
+ } else {
1232
+ const defaultBase = getRecognitionServiceBase("production");
1233
+ baseUrl = `${defaultBase.wsBase}/ws/v1/recognize`;
1234
+ }
1183
1235
  const url = new URL(baseUrl);
1184
1236
  url.searchParams.set("audioUtteranceId", config.audioUtteranceId);
1185
1237
  if (config.callbackUrls && config.callbackUrls.length > 0) {
@@ -1435,6 +1487,41 @@ var MessageHandler = class {
1435
1487
  }
1436
1488
  };
1437
1489
 
1490
+ // src/errors.ts
1491
+ var RecognitionError = class extends Error {
1492
+ static {
1493
+ __name(this, "RecognitionError");
1494
+ }
1495
+ errorType;
1496
+ timestamp;
1497
+ constructor(errorType, message) {
1498
+ super(message);
1499
+ this.name = "RecognitionError";
1500
+ this.errorType = errorType;
1501
+ this.timestamp = Date.now();
1502
+ if (Error.captureStackTrace) {
1503
+ Error.captureStackTrace(this, this.constructor);
1504
+ }
1505
+ }
1506
+ };
1507
+ var ConnectionError = class extends RecognitionError {
1508
+ static {
1509
+ __name(this, "ConnectionError");
1510
+ }
1511
+ attempts;
1512
+ url;
1513
+ underlyingError;
1514
+ constructor(message, attempts, url, underlyingError) {
1515
+ super(ErrorTypeV1.CONNECTION_ERROR, message);
1516
+ this.name = "ConnectionError";
1517
+ this.attempts = attempts;
1518
+ this.url = url;
1519
+ if (underlyingError !== void 0) {
1520
+ this.underlyingError = underlyingError;
1521
+ }
1522
+ }
1523
+ };
1524
+
1438
1525
  // src/recognition-client.ts
1439
1526
  async function blobToArrayBuffer(blob) {
1440
1527
  if (typeof blob.arrayBuffer === "function") {
@@ -1472,6 +1559,9 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
1472
1559
  ...config.url && {
1473
1560
  url: config.url
1474
1561
  },
1562
+ ...config.stage && {
1563
+ stage: config.stage
1564
+ },
1475
1565
  ...config.callbackUrls && {
1476
1566
  callbackUrls: config.callbackUrls
1477
1567
  },
@@ -1502,6 +1592,9 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
1502
1592
  highWM: config.highWaterMark ?? 512e3,
1503
1593
  lowWM: config.lowWaterMark ?? 128e3
1504
1594
  });
1595
+ const retryConfig = config.connectionRetry || {};
1596
+ const maxAttempts = Math.max(1, Math.min(5, retryConfig.maxAttempts ?? 4));
1597
+ const delayMs = retryConfig.delayMs ?? 200;
1505
1598
  this.config = {
1506
1599
  url,
1507
1600
  audioUtteranceId,
@@ -1530,6 +1623,10 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
1530
1623
  lowWaterMark: config.lowWaterMark ?? 128e3,
1531
1624
  maxBufferDurationSec: config.maxBufferDurationSec ?? 60,
1532
1625
  chunksPerSecond: config.chunksPerSecond ?? 100,
1626
+ connectionRetry: {
1627
+ maxAttempts,
1628
+ delayMs
1629
+ },
1533
1630
  ...config.logger && {
1534
1631
  logger: config.logger
1535
1632
  }
@@ -1587,9 +1684,8 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
1587
1684
  // ==========================================================================
1588
1685
  async connect() {
1589
1686
  if (this.connectionPromise) {
1590
- this.log("debug", "Returning existing connection promise", {
1591
- state: this.state,
1592
- hasPromise: true
1687
+ this.log("debug", "Returning existing connection promise (already connecting)", {
1688
+ state: this.state
1593
1689
  });
1594
1690
  return this.connectionPromise;
1595
1691
  }
@@ -1599,43 +1695,97 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
1599
1695
  });
1600
1696
  return Promise.resolve();
1601
1697
  }
1602
- this.log("debug", "Creating new connection to WebSocket", {
1603
- url: this.config.url
1604
- });
1605
- this.state = ClientState.CONNECTING;
1606
- const connectionStartTime = Date.now();
1607
- this.connectionPromise = new Promise((resolve, reject) => {
1608
- const timeout = setTimeout(() => {
1609
- this.log("warn", "Connection timeout", {
1610
- timeout: 1e4
1611
- });
1612
- this.state = ClientState.FAILED;
1613
- reject(new Error("Timeout"));
1614
- }, 1e4);
1615
- const originalOnConnected = this.onConnected.bind(this);
1616
- this.onConnected = () => {
1617
- clearTimeout(timeout);
1618
- const connectionTime = Date.now() - connectionStartTime;
1619
- this.log("debug", "Connection established successfully", {
1620
- connectionTimeMs: connectionTime,
1621
- url: this.config.url
1622
- });
1623
- this.state = ClientState.CONNECTED;
1624
- originalOnConnected();
1625
- resolve();
1626
- };
1627
- const originalOnError = this.onError.bind(this);
1628
- this.onError = (error) => {
1629
- clearTimeout(timeout);
1630
- this.log("warn", "Connection error", error);
1631
- this.state = ClientState.FAILED;
1632
- originalOnError(error);
1633
- reject(error);
1634
- };
1635
- super.connect();
1636
- });
1698
+ this.connectionPromise = this.connectWithRetry();
1637
1699
  return this.connectionPromise;
1638
1700
  }
1701
+ /**
1702
+ * Attempt to connect with retry logic
1703
+ * Only retries on initial connection establishment, not mid-stream interruptions
1704
+ */
1705
+ async connectWithRetry() {
1706
+ const { maxAttempts, delayMs } = this.config.connectionRetry;
1707
+ const connectionTimeout = 1e4;
1708
+ let lastError;
1709
+ const originalOnConnected = this.config.onConnected;
1710
+ const originalOnError = this.config.onError;
1711
+ try {
1712
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
1713
+ const attemptLogLevel = attempt === 1 ? "debug" : "info";
1714
+ this.log(attemptLogLevel, `Connection attempt ${attempt}/${maxAttempts}`, {
1715
+ url: this.config.url,
1716
+ delayMs: attempt > 1 ? delayMs : 0
1717
+ });
1718
+ this.state = ClientState.CONNECTING;
1719
+ const connectionStartTime = Date.now();
1720
+ try {
1721
+ await new Promise((resolve, reject) => {
1722
+ let settled = false;
1723
+ const timeout = setTimeout(() => {
1724
+ if (settled) return;
1725
+ settled = true;
1726
+ this.log("warn", "Connection timeout", {
1727
+ timeout: connectionTimeout,
1728
+ attempt
1729
+ });
1730
+ this.state = ClientState.FAILED;
1731
+ reject(new Error(`Connection timeout after ${connectionTimeout}ms`));
1732
+ }, connectionTimeout);
1733
+ this.onConnected = () => {
1734
+ if (settled) return;
1735
+ settled = true;
1736
+ clearTimeout(timeout);
1737
+ const connectionTime = Date.now() - connectionStartTime;
1738
+ this.log("debug", "Connection established successfully", {
1739
+ connectionTimeMs: connectionTime,
1740
+ url: this.config.url,
1741
+ attempt
1742
+ });
1743
+ this.state = ClientState.CONNECTED;
1744
+ originalOnConnected();
1745
+ resolve();
1746
+ };
1747
+ this.onError = (error) => {
1748
+ if (settled) return;
1749
+ settled = true;
1750
+ clearTimeout(timeout);
1751
+ this.log("warn", "Connection error", {
1752
+ error,
1753
+ attempt
1754
+ });
1755
+ this.state = ClientState.FAILED;
1756
+ reject(error);
1757
+ };
1758
+ super.connect();
1759
+ });
1760
+ const successLogLevel = attempt === 1 ? "debug" : "info";
1761
+ this.log(successLogLevel, `Connection successful on attempt ${attempt}`, {
1762
+ totalAttempts: attempt
1763
+ });
1764
+ return;
1765
+ } catch (error) {
1766
+ lastError = error;
1767
+ if (attempt < maxAttempts) {
1768
+ const logLevel = attempt < 3 ? "info" : "warn";
1769
+ this.log(logLevel, `Connection attempt ${attempt} failed, retrying after ${delayMs}ms`, {
1770
+ error: lastError.message,
1771
+ nextAttempt: attempt + 1
1772
+ });
1773
+ this.state = ClientState.INITIAL;
1774
+ await new Promise((resolve) => setTimeout(resolve, delayMs));
1775
+ } else {
1776
+ this.log("warn", `All ${maxAttempts} connection attempts failed`, {
1777
+ error: lastError.message
1778
+ });
1779
+ }
1780
+ }
1781
+ }
1782
+ throw new ConnectionError(`Failed to establish connection after ${maxAttempts} attempts`, maxAttempts, this.config.url, lastError);
1783
+ } finally {
1784
+ this.config.onConnected = originalOnConnected;
1785
+ this.config.onError = originalOnError;
1786
+ this.connectionPromise = void 0;
1787
+ }
1788
+ }
1639
1789
  sendAudio(audioData) {
1640
1790
  if (audioData instanceof Blob) {
1641
1791
  blobToArrayBuffer(audioData).then((arrayBuffer) => {
@@ -1710,6 +1860,9 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
1710
1860
  getAudioUtteranceId() {
1711
1861
  return this.config.audioUtteranceId;
1712
1862
  }
1863
+ getUrl() {
1864
+ return this.config.url;
1865
+ }
1713
1866
  getState() {
1714
1867
  return this.state;
1715
1868
  }