@volley/recognition-client-sdk 0.1.211 → 0.1.255

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,2 +1,2 @@
1
- export { f as AudioEncoding, h as ControlSignal, G as GameContextV1, c as RealTimeTwoWayWebSocketRecognitionClient, a as RealTimeTwoWayWebSocketRecognitionClientConfig, g as RecognitionContextTypeV1, d as TranscriptionResult } from './browser-C4ZssGoU.js';
1
+ export { h as AudioEncoding, k as ControlSignal, G as GameContextV1, e as RealTimeTwoWayWebSocketRecognitionClient, c as RealTimeTwoWayWebSocketRecognitionClientConfig, j as RecognitionContextTypeV1, f as TranscriptionResult } from './browser-BZs4BL_w.js';
2
2
  import 'zod';
@@ -19,6 +19,11 @@ var RecognitionMode;
19
19
  RecognitionMode2["STREAMING"] = "streaming";
20
20
  RecognitionMode2["BATCH"] = "batch";
21
21
  })(RecognitionMode || (RecognitionMode = {}));
22
+ var ASRApiType;
23
+ (function(ASRApiType2) {
24
+ ASRApiType2["STREAMING"] = "streaming";
25
+ ASRApiType2["FILE_BASED"] = "file-based";
26
+ })(ASRApiType || (ASRApiType = {}));
22
27
  var DeepgramModel;
23
28
  (function(DeepgramModel2) {
24
29
  DeepgramModel2["NOVA_2"] = "nova-2";
@@ -82,6 +87,8 @@ var MetadataResultSchemaV1 = z.object({
82
87
  accumulatedAudioTimeMs: z.number().optional(),
83
88
  // Cost Information
84
89
  costInUSD: z.number().default(0).optional(),
90
+ // ASR API Type
91
+ apiType: z.nativeEnum(ASRApiType).optional(),
85
92
  // ASR configuration as JSON string (no type validation)
86
93
  asrConfig: z.string().optional(),
87
94
  // Raw ASR metadata payload as provided by the provider (stringified if needed)
@@ -94,6 +101,7 @@ var ErrorTypeV1;
94
101
  ErrorTypeV12["PROVIDER_ERROR"] = "provider_error";
95
102
  ErrorTypeV12["TIMEOUT_ERROR"] = "timeout_error";
96
103
  ErrorTypeV12["QUOTA_EXCEEDED"] = "quota_exceeded";
104
+ ErrorTypeV12["CONNECTION_ERROR"] = "connection_error";
97
105
  ErrorTypeV12["UNKNOWN_ERROR"] = "unknown_error";
98
106
  })(ErrorTypeV1 || (ErrorTypeV1 = {}));
99
107
  var ErrorResultSchemaV1 = z.object({
@@ -214,7 +222,27 @@ var TranscriptMessageSchema = z.object({
214
222
  * Whether this transcript is finalized (won't change)
215
223
  * @example true
216
224
  */
217
- is_final: z.boolean()
225
+ is_final: z.boolean(),
226
+ /**
227
+ * Accumulated confirmed transcript (all finalized text received so far)
228
+ * Contains only the completed/finalized portions
229
+ * @example "hello world how are you"
230
+ */
231
+ confirmedTranscript: z.string().optional(),
232
+ /**
233
+ * New pending transcript (current interim text since last confirmation)
234
+ * Contains only the unconfirmed interim text
235
+ * @example "I'm doing"
236
+ */
237
+ newPendingTranscript: z.string().optional(),
238
+ /**
239
+ * Whether this is a fallback transcript (forced due to timeout)
240
+ * True when provider didn't send is_final=true within expected timeframe
241
+ * Used for monitoring/debugging fallback scenarios
242
+ * @example true
243
+ * @default false
244
+ */
245
+ is_fallback: z.boolean().optional()
218
246
  });
219
247
  var VADEndSignalSchema = z.object({
220
248
  type: z.literal(ProviderMessageType.VAD_END_SIGNAL),
@@ -302,7 +330,20 @@ var TimerSchema = z.object({
302
330
  * Total duration of all audio chunks sent to this provider session
303
331
  * @example 2500 (2.5 seconds of audio has been sent)
304
332
  */
305
- accumulatedAudioTimeMs: z.number().optional()
333
+ accumulatedAudioTimeMs: z.number().optional(),
334
+ /**
335
+ * Estimated cost in USD for this session
336
+ * Calculated by the job based on audio duration and provider pricing
337
+ * @example 0.0025 (quarter of a cent)
338
+ */
339
+ costInUSD: z.number().optional().default(0),
340
+ /**
341
+ * ASR API type from the job
342
+ * - STREAMING: Real-time streaming APIs (Deepgram, AssemblyAI, Google)
343
+ * - FILE_BASED: File upload/batch APIs (OpenAI Batch, Gemini Batch)
344
+ * @example ASRApiType.STREAMING
345
+ */
346
+ apiType: z.nativeEnum(ASRApiType).optional()
306
347
  });
307
348
  var RawMessageSchema = z.object({
308
349
  type: z.literal(ProviderMessageType.RAW),
@@ -391,6 +432,16 @@ var QuotaExceededExceptionSchema = BaseRecognitionExceptionSchema.extend({
391
432
  /** How long to wait in seconds before retry */
392
433
  retryAfterSeconds: z.number().optional()
393
434
  });
435
+ var ConnectionExceptionSchema = BaseRecognitionExceptionSchema.extend({
436
+ errorType: z.literal(ErrorTypeV1.CONNECTION_ERROR),
437
+ isImmediatelyAvailable: z.literal(true),
438
+ /** Number of connection attempts made */
439
+ attempts: z.number().optional(),
440
+ /** URL that failed to connect */
441
+ url: z.string().optional(),
442
+ /** Underlying error message */
443
+ underlyingError: z.string().optional()
444
+ });
394
445
  var UnknownExceptionSchema = BaseRecognitionExceptionSchema.extend({
395
446
  errorType: z.literal(ErrorTypeV1.UNKNOWN_ERROR),
396
447
  isImmediatelyAvailable: z.literal(false),
@@ -405,6 +456,7 @@ z.discriminatedUnion("errorType", [
405
456
  ProviderExceptionSchema,
406
457
  TimeoutExceptionSchema,
407
458
  QuotaExceededExceptionSchema,
459
+ ConnectionExceptionSchema,
408
460
  UnknownExceptionSchema
409
461
  ]);
410
462
  var RecognitionContextTypeV1;
@@ -1178,8 +1230,16 @@ __name(getRecognitionServiceBase, "getRecognitionServiceBase");
1178
1230
 
1179
1231
  // src/utils/url-builder.ts
1180
1232
  function buildWebSocketUrl(config) {
1181
- const defaultBase = getRecognitionServiceBase("production");
1182
- const baseUrl = config.url || `${defaultBase.wsBase}/ws/v1/recognize`;
1233
+ let baseUrl;
1234
+ if (config.url) {
1235
+ baseUrl = config.url;
1236
+ } else if (config.stage) {
1237
+ const stageBase = getRecognitionServiceBase(config.stage);
1238
+ baseUrl = `${stageBase.wsBase}/ws/v1/recognize`;
1239
+ } else {
1240
+ const defaultBase = getRecognitionServiceBase("production");
1241
+ baseUrl = `${defaultBase.wsBase}/ws/v1/recognize`;
1242
+ }
1183
1243
  const url = new URL(baseUrl);
1184
1244
  url.searchParams.set("audioUtteranceId", config.audioUtteranceId);
1185
1245
  if (config.callbackUrls && config.callbackUrls.length > 0) {
@@ -1435,6 +1495,41 @@ var MessageHandler = class {
1435
1495
  }
1436
1496
  };
1437
1497
 
1498
+ // src/errors.ts
1499
+ var RecognitionError = class extends Error {
1500
+ static {
1501
+ __name(this, "RecognitionError");
1502
+ }
1503
+ errorType;
1504
+ timestamp;
1505
+ constructor(errorType, message) {
1506
+ super(message);
1507
+ this.name = "RecognitionError";
1508
+ this.errorType = errorType;
1509
+ this.timestamp = Date.now();
1510
+ if (Error.captureStackTrace) {
1511
+ Error.captureStackTrace(this, this.constructor);
1512
+ }
1513
+ }
1514
+ };
1515
+ var ConnectionError = class extends RecognitionError {
1516
+ static {
1517
+ __name(this, "ConnectionError");
1518
+ }
1519
+ attempts;
1520
+ url;
1521
+ underlyingError;
1522
+ constructor(message, attempts, url, underlyingError) {
1523
+ super(ErrorTypeV1.CONNECTION_ERROR, message);
1524
+ this.name = "ConnectionError";
1525
+ this.attempts = attempts;
1526
+ this.url = url;
1527
+ if (underlyingError !== void 0) {
1528
+ this.underlyingError = underlyingError;
1529
+ }
1530
+ }
1531
+ };
1532
+
1438
1533
  // src/recognition-client.ts
1439
1534
  async function blobToArrayBuffer(blob) {
1440
1535
  if (typeof blob.arrayBuffer === "function") {
@@ -1472,6 +1567,9 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
1472
1567
  ...config.url && {
1473
1568
  url: config.url
1474
1569
  },
1570
+ ...config.stage && {
1571
+ stage: config.stage
1572
+ },
1475
1573
  ...config.callbackUrls && {
1476
1574
  callbackUrls: config.callbackUrls
1477
1575
  },
@@ -1502,6 +1600,9 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
1502
1600
  highWM: config.highWaterMark ?? 512e3,
1503
1601
  lowWM: config.lowWaterMark ?? 128e3
1504
1602
  });
1603
+ const retryConfig = config.connectionRetry || {};
1604
+ const maxAttempts = Math.max(1, Math.min(5, retryConfig.maxAttempts ?? 4));
1605
+ const delayMs = retryConfig.delayMs ?? 200;
1505
1606
  this.config = {
1506
1607
  url,
1507
1608
  audioUtteranceId,
@@ -1530,6 +1631,10 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
1530
1631
  lowWaterMark: config.lowWaterMark ?? 128e3,
1531
1632
  maxBufferDurationSec: config.maxBufferDurationSec ?? 60,
1532
1633
  chunksPerSecond: config.chunksPerSecond ?? 100,
1634
+ connectionRetry: {
1635
+ maxAttempts,
1636
+ delayMs
1637
+ },
1533
1638
  ...config.logger && {
1534
1639
  logger: config.logger
1535
1640
  }
@@ -1587,9 +1692,8 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
1587
1692
  // ==========================================================================
1588
1693
  async connect() {
1589
1694
  if (this.connectionPromise) {
1590
- this.log("debug", "Returning existing connection promise", {
1591
- state: this.state,
1592
- hasPromise: true
1695
+ this.log("debug", "Returning existing connection promise (already connecting)", {
1696
+ state: this.state
1593
1697
  });
1594
1698
  return this.connectionPromise;
1595
1699
  }
@@ -1599,43 +1703,97 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
1599
1703
  });
1600
1704
  return Promise.resolve();
1601
1705
  }
1602
- this.log("debug", "Creating new connection to WebSocket", {
1603
- url: this.config.url
1604
- });
1605
- this.state = ClientState.CONNECTING;
1606
- const connectionStartTime = Date.now();
1607
- this.connectionPromise = new Promise((resolve, reject) => {
1608
- const timeout = setTimeout(() => {
1609
- this.log("warn", "Connection timeout", {
1610
- timeout: 1e4
1611
- });
1612
- this.state = ClientState.FAILED;
1613
- reject(new Error("Timeout"));
1614
- }, 1e4);
1615
- const originalOnConnected = this.onConnected.bind(this);
1616
- this.onConnected = () => {
1617
- clearTimeout(timeout);
1618
- const connectionTime = Date.now() - connectionStartTime;
1619
- this.log("debug", "Connection established successfully", {
1620
- connectionTimeMs: connectionTime,
1621
- url: this.config.url
1622
- });
1623
- this.state = ClientState.CONNECTED;
1624
- originalOnConnected();
1625
- resolve();
1626
- };
1627
- const originalOnError = this.onError.bind(this);
1628
- this.onError = (error) => {
1629
- clearTimeout(timeout);
1630
- this.log("warn", "Connection error", error);
1631
- this.state = ClientState.FAILED;
1632
- originalOnError(error);
1633
- reject(error);
1634
- };
1635
- super.connect();
1636
- });
1706
+ this.connectionPromise = this.connectWithRetry();
1637
1707
  return this.connectionPromise;
1638
1708
  }
1709
+ /**
1710
+ * Attempt to connect with retry logic
1711
+ * Only retries on initial connection establishment, not mid-stream interruptions
1712
+ */
1713
+ async connectWithRetry() {
1714
+ const { maxAttempts, delayMs } = this.config.connectionRetry;
1715
+ const connectionTimeout = 1e4;
1716
+ let lastError;
1717
+ const originalOnConnected = this.onConnected.bind(this);
1718
+ const originalOnError = this.onError.bind(this);
1719
+ try {
1720
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
1721
+ const attemptLogLevel = attempt === 1 ? "debug" : "info";
1722
+ this.log(attemptLogLevel, `Connection attempt ${attempt}/${maxAttempts}`, {
1723
+ url: this.config.url,
1724
+ delayMs: attempt > 1 ? delayMs : 0
1725
+ });
1726
+ this.state = ClientState.CONNECTING;
1727
+ const connectionStartTime = Date.now();
1728
+ try {
1729
+ await new Promise((resolve, reject) => {
1730
+ let settled = false;
1731
+ const timeout = setTimeout(() => {
1732
+ if (settled) return;
1733
+ settled = true;
1734
+ this.log("warn", "Connection timeout", {
1735
+ timeout: connectionTimeout,
1736
+ attempt
1737
+ });
1738
+ this.state = ClientState.FAILED;
1739
+ reject(new Error(`Connection timeout after ${connectionTimeout}ms`));
1740
+ }, connectionTimeout);
1741
+ this.onConnected = () => {
1742
+ if (settled) return;
1743
+ settled = true;
1744
+ clearTimeout(timeout);
1745
+ const connectionTime = Date.now() - connectionStartTime;
1746
+ this.log("debug", "Connection established successfully", {
1747
+ connectionTimeMs: connectionTime,
1748
+ url: this.config.url,
1749
+ attempt
1750
+ });
1751
+ this.state = ClientState.CONNECTED;
1752
+ originalOnConnected();
1753
+ resolve();
1754
+ };
1755
+ this.onError = (error) => {
1756
+ if (settled) return;
1757
+ settled = true;
1758
+ clearTimeout(timeout);
1759
+ this.log("warn", "Connection error", {
1760
+ error,
1761
+ attempt
1762
+ });
1763
+ this.state = ClientState.FAILED;
1764
+ reject(error);
1765
+ };
1766
+ super.connect();
1767
+ });
1768
+ const successLogLevel = attempt === 1 ? "debug" : "info";
1769
+ this.log(successLogLevel, `Connection successful on attempt ${attempt}`, {
1770
+ totalAttempts: attempt
1771
+ });
1772
+ return;
1773
+ } catch (error) {
1774
+ lastError = error;
1775
+ if (attempt < maxAttempts) {
1776
+ const logLevel = attempt < 3 ? "info" : "warn";
1777
+ this.log(logLevel, `Connection attempt ${attempt} failed, retrying after ${delayMs}ms`, {
1778
+ error: lastError.message,
1779
+ nextAttempt: attempt + 1
1780
+ });
1781
+ this.state = ClientState.INITIAL;
1782
+ await new Promise((resolve) => setTimeout(resolve, delayMs));
1783
+ } else {
1784
+ this.log("warn", `All ${maxAttempts} connection attempts failed`, {
1785
+ error: lastError.message
1786
+ });
1787
+ }
1788
+ }
1789
+ }
1790
+ throw new ConnectionError(`Failed to establish connection after ${maxAttempts} attempts`, maxAttempts, this.config.url, lastError);
1791
+ } finally {
1792
+ this.onConnected = originalOnConnected;
1793
+ this.onError = originalOnError;
1794
+ this.connectionPromise = void 0;
1795
+ }
1796
+ }
1639
1797
  sendAudio(audioData) {
1640
1798
  if (audioData instanceof Blob) {
1641
1799
  blobToArrayBuffer(audioData).then((arrayBuffer) => {
@@ -1710,6 +1868,9 @@ var RealTimeTwoWayWebSocketRecognitionClient = class _RealTimeTwoWayWebSocketRec
1710
1868
  getAudioUtteranceId() {
1711
1869
  return this.config.audioUtteranceId;
1712
1870
  }
1871
+ getUrl() {
1872
+ return this.config.url;
1873
+ }
1713
1874
  getState() {
1714
1875
  return this.state;
1715
1876
  }