@aithos/sdk 0.1.0-alpha.43 → 0.1.0-alpha.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -55,6 +55,40 @@ const reply = await sdk.compute.invokeBedrock({
55
55
  console.log(reply.content);
56
56
  ```
57
57
 
58
+ ## Transcribing audio → text
59
+
60
+ `sdk.compute.invokeTranscribe` turns an audio `Blob` into text through AWS
61
+ Transcribe. It does one thing — audio → text — and **stores nothing**: it
62
+ returns the transcript and you decide what to do with it (write it to an
63
+ ethos, a PDS, your own database, email it, or throw it away).
64
+
65
+ ```ts
66
+ // Browser: a Blob from MediaRecorder; backend: a Blob from a Buffer.
67
+ const result = await sdk.compute.invokeTranscribe({
68
+ audio: blob, // Blob/File (Node 18+ has global Blob)
69
+ model: "transcribe:aws-fr-standard", // default; also aws-en-standard
70
+ languageCode: "fr-FR", // optional
71
+ // durationSecOverride: 127, // REQUIRED on backends (no DOM probe)
72
+ onProgress: (s) => console.log(s.phase), // uploading → starting → processing → completed
73
+ });
74
+
75
+ console.log(result.text); // "Bonjour, je voulais te dire que…"
76
+ console.log(result.segments); // [{ start_sec, end_sec, text }]
77
+ console.log(result.creditsCharged);
78
+
79
+ // Then YOU choose where it goes — the compute has no opinion:
80
+ await myEthos.addRevision(result.text); // or PDS, DB, email, nothing…
81
+ ```
82
+
83
+ The core is isomorphic (Node + browser) and depends only on `Blob`, `fetch`
84
+ and timers. Browser-only resilience is opt-in and framework-agnostic:
85
+ `sdk.compute.transcribeDraft` (IndexedDB queue of recordings) and
86
+ `sdk.compute.listLocalPendingTranscribes()` /
87
+ `subscribeLocalPendingTranscribes()` / `resumeTranscribe(jobId)` recover jobs
88
+ across reloads. React users get `useAithosTranscribePendingJobs(sdk.compute)`
89
+ from `@aithos/sdk/react`. Advanced callers can drive the flow manually with
90
+ `prepareTranscribe` / `startTranscribe` / `getTranscribeStatus`.
91
+
58
92
  ## Delegating compute to an agent — opt-in token spending
59
93
 
60
94
  To let an agent (or another user, or a third-party app) invoke Bedrock
@@ -1,5 +1,6 @@
1
1
  import type { AithosAuth } from "./auth.js";
2
2
  import { type AithosSdkEndpoints } from "./endpoints.js";
3
+ import { type LocalPendingEntry, type TranscribeDraftMeta, type TranscribeDraftRecord } from "./transcribe-resilience.js";
3
4
  export interface ComputeMessage {
4
5
  readonly role: "user" | "assistant";
5
6
  readonly content: string;
@@ -222,6 +223,138 @@ export interface InvokeSegmentationResult {
222
223
  readonly walletBalance: number;
223
224
  readonly auditId: string;
224
225
  }
226
+ /**
227
+ * Stable cross-provider transcription model ids. The `transcribe:` prefix
228
+ * is part of the wire contract (mirrors `image:` for image models). New
229
+ * models can be added server-side without an SDK release; the union here
230
+ * gives autocomplete + type-checking for the common ones.
231
+ */
232
+ export type TranscribeModelId = "transcribe:aws-fr-standard" | "transcribe:aws-en-standard";
233
+ /** Progress callback states emitted by {@link ComputeNamespace.invokeTranscribe}. */
234
+ export type TranscribeProgressState = {
235
+ readonly phase: "queued";
236
+ } | {
237
+ readonly phase: "uploading";
238
+ readonly bytesUploaded: number;
239
+ readonly totalBytes: number;
240
+ } | {
241
+ readonly phase: "starting";
242
+ } | {
243
+ readonly phase: "processing";
244
+ readonly elapsedSec: number;
245
+ } | {
246
+ readonly phase: "completed";
247
+ };
248
+ export interface TranscribeSegment {
249
+ readonly start_sec: number;
250
+ readonly end_sec: number;
251
+ readonly text: string;
252
+ readonly speaker_label?: string;
253
+ }
254
+ export interface TranscribeWord {
255
+ readonly start_sec: number;
256
+ readonly end_sec: number;
257
+ readonly content: string;
258
+ readonly confidence: number;
259
+ }
260
+ /** High-level args for the one-call {@link ComputeNamespace.invokeTranscribe}. */
261
+ export interface InvokeTranscribeArgs {
262
+ /** Mandate id — optional for owner sessions, required for delegate sessions. */
263
+ readonly mandateId?: string;
264
+ /** The audio to transcribe. `Blob` is supported in both Node 18+ and browsers. */
265
+ readonly audio: Blob;
266
+ /** Model alias. Default `"transcribe:aws-fr-standard"`. */
267
+ readonly model?: TranscribeModelId;
268
+ /** AWS language code override (e.g. `"fr-FR"`). Defaults to the model alias's language. */
269
+ readonly languageCode?: string;
270
+ /** Speaker diarization. Default `false`. */
271
+ readonly diarization?: boolean;
272
+ /**
273
+ * Audio duration in seconds. REQUIRED on backends / non-browser runtimes
274
+ * (used for the wallet pre-debit estimate). In a browser it is probed
275
+ * automatically from the Blob when omitted; if probing fails the SDK
276
+ * falls back to a server-reconciled estimate of 0.
277
+ */
278
+ readonly durationSecOverride?: number;
279
+ /** Idempotency key for replay-safe retries (generated if omitted). */
280
+ readonly idempotencyKey?: string;
281
+ /** Progress callback (upload bytes, processing elapsed, …). */
282
+ readonly onProgress?: (state: TranscribeProgressState) => void;
283
+ /** Abort signal — cancels upload + polling. */
284
+ readonly signal?: AbortSignal;
285
+ /**
286
+ * Polling cadence override (ms) for the status loop. Defaults to an
287
+ * exponential backoff 2s → 15s. Mainly for tests.
288
+ */
289
+ readonly pollIntervalMs?: number;
290
+ }
291
+ export interface InvokeTranscribeResult {
292
+ readonly text: string;
293
+ readonly segments: readonly TranscribeSegment[];
294
+ readonly words: readonly TranscribeWord[];
295
+ readonly durationSec: number;
296
+ readonly languageCode: string;
297
+ readonly creditsCharged: number;
298
+ readonly walletBalance: number;
299
+ readonly auditId: string;
300
+ readonly jobId: string;
301
+ readonly fundedBy?: "sponsored" | "grant" | "purchase";
302
+ readonly sponsoredBy?: string;
303
+ readonly receiptId?: string;
304
+ }
305
+ export interface PrepareTranscribeArgs {
306
+ readonly contentType: string;
307
+ readonly durationSecEstimate?: number;
308
+ /** Selects the delegate signer for delegate sessions (not sent on the wire). */
309
+ readonly mandateId?: string;
310
+ readonly signal?: AbortSignal;
311
+ }
312
+ export interface PrepareTranscribeResult {
313
+ readonly jobId: string;
314
+ readonly uploadUrl: string;
315
+ readonly s3ObjectKey: string;
316
+ readonly expiresAt: number;
317
+ }
318
+ export interface StartTranscribeArgs {
319
+ readonly jobId: string;
320
+ readonly mandateId?: string;
321
+ readonly model: TranscribeModelId | string;
322
+ readonly durationSec: number;
323
+ readonly languageCode?: string;
324
+ readonly diarization?: boolean;
325
+ readonly idempotencyKey?: string;
326
+ readonly signal?: AbortSignal;
327
+ }
328
+ export interface StartTranscribeResult {
329
+ readonly jobId: string;
330
+ readonly status: "running";
331
+ readonly estimatedCredits: number;
332
+ readonly walletBalance: number;
333
+ readonly fundedBy?: "sponsored" | "grant" | "purchase";
334
+ readonly receiptId?: string;
335
+ }
336
+ export type TranscribeStatusResult = {
337
+ readonly jobId: string;
338
+ readonly status: "running";
339
+ readonly elapsedSec: number;
340
+ } | ({
341
+ readonly jobId: string;
342
+ readonly status: "completed";
343
+ } & InvokeTranscribeResult) | {
344
+ readonly jobId: string;
345
+ readonly status: "failed";
346
+ readonly error: {
347
+ readonly code: string;
348
+ readonly message: string;
349
+ };
350
+ };
351
+ export interface TranscribeJobSummary {
352
+ readonly jobId: string;
353
+ readonly status: "prepared" | "running" | "completed" | "failed";
354
+ readonly createdAt: number;
355
+ readonly estimatedCredits?: number;
356
+ readonly creditsCharged?: number;
357
+ }
225
358
  export interface ComputeNamespaceDeps {
226
359
  readonly auth: AithosAuth;
227
360
  readonly appDid: string;
@@ -309,5 +442,90 @@ export declare class ComputeNamespace {
309
442
  * Pricing: flat 5 000 mc per call (~$0.005 — Florence-2 is cheap).
310
443
  */
311
444
  invokeSegmentation(args: InvokeSegmentationArgs): Promise<InvokeSegmentationResult>;
445
+ /**
446
+ * Provision a transcription job and get a pre-signed S3 URL to PUT the
447
+ * audio to. No wallet debit. `mandateId` only selects the delegate
448
+ * signer (it is not part of the wire params for prepare).
449
+ */
450
+ prepareTranscribe(args: PrepareTranscribeArgs): Promise<PrepareTranscribeResult>;
451
+ /**
452
+ * Verify the uploaded audio, pre-debit the wallet, and launch the AWS
453
+ * Transcribe job. Returns immediately with `status: "running"`.
454
+ */
455
+ startTranscribe(args: StartTranscribeArgs): Promise<StartTranscribeResult>;
456
+ /**
457
+ * Poll a job's status. On completion the server finalises (reconcile +
458
+ * audit) and returns the transcript; a resumed poll after reconnect
459
+ * re-reads the transcript while it's still in the 24h output window.
460
+ */
461
+ getTranscribeStatus(args: {
462
+ readonly jobId: string;
463
+ readonly mandateId?: string;
464
+ readonly signal?: AbortSignal;
465
+ }): Promise<TranscribeStatusResult>;
466
+ /**
467
+ * List the caller's transcription jobs. Excludes terminal `completed`
468
+ * jobs unless `includeCompleted` is set — the resilience "what's still
469
+ * pending server-side" query.
470
+ */
471
+ listPendingTranscribes(args?: {
472
+ readonly includeCompleted?: boolean;
473
+ readonly mandateId?: string;
474
+ readonly signal?: AbortSignal;
475
+ }): Promise<{
476
+ readonly jobs: readonly TranscribeJobSummary[];
477
+ }>;
478
+ /**
479
+ * Transcribe an audio Blob to text in one call. Composes the four
480
+ * low-level methods: prepare → direct S3 upload → start → poll. Returns
481
+ * the transcript and stores NOTHING server-side beyond the ephemeral
482
+ * job — the consumer decides what to do with the result.
483
+ *
484
+ * Isomorphic: depends only on `Blob`, `fetch`/`XMLHttpRequest` and
485
+ * timers. On a backend, pass `durationSecOverride` (no Blob duration
486
+ * probing is possible without a DOM); in a browser the duration is
487
+ * probed automatically when omitted.
488
+ *
489
+ * Resilience: the job id is recorded in a localStorage tracker (browser)
490
+ * before upload, so `listLocalPendingTranscribes()` / `resumeTranscribe()`
491
+ * can recover a job whose result never arrived. In Node the tracker is a
492
+ * harmless in-memory no-op.
493
+ */
494
+ invokeTranscribe(args: InvokeTranscribeArgs): Promise<InvokeTranscribeResult>;
495
+ /**
496
+ * Resume polling an in-flight job by id — for recovery after a reload or
497
+ * crash. Returns the final result and clears the job from the local
498
+ * pending tracker. Throws if the job has already failed.
499
+ */
500
+ resumeTranscribe(jobId: string, opts?: {
501
+ readonly mandateId?: string;
502
+ readonly onProgress?: (state: TranscribeProgressState) => void;
503
+ readonly signal?: AbortSignal;
504
+ readonly pollIntervalMs?: number;
505
+ }): Promise<InvokeTranscribeResult>;
506
+ /** Snapshot of locally-tracked in-flight jobs (stable ref between mutations). */
507
+ listLocalPendingTranscribes(): readonly LocalPendingEntry[];
508
+ /** Stable snapshot for `useSyncExternalStore`-style consumers. */
509
+ getLocalPendingTranscribesSnapshot(): readonly LocalPendingEntry[];
510
+ /**
511
+ * Subscribe to changes in the local pending-jobs registry. Returns an
512
+ * unsubscribe function. Framework-agnostic: wrap it in a React
513
+ * `useSyncExternalStore`, a Vue effect, a Svelte store, etc.
514
+ */
515
+ subscribeLocalPendingTranscribes(listener: () => void): () => void;
516
+ /**
517
+ * IndexedDB-backed draft queue: persist a recording before any network
518
+ * call, upload it when the user confirms. Browser-only (methods reject
519
+ * with TranscribeDraftUnavailableError when IndexedDB is absent).
520
+ */
521
+ get transcribeDraft(): {
522
+ readonly save: (blob: Blob, meta?: TranscribeDraftMeta) => Promise<{
523
+ readonly draftId: string;
524
+ }>;
525
+ readonly list: () => Promise<readonly TranscribeDraftRecord[]>;
526
+ readonly get: (draftId: string) => Promise<TranscribeDraftRecord | null>;
527
+ readonly delete: (draftId: string) => Promise<void>;
528
+ readonly upload: (draftId: string, args: Omit<InvokeTranscribeArgs, "audio">) => Promise<InvokeTranscribeResult>;
529
+ };
312
530
  }
313
531
  //# sourceMappingURL=compute.d.ts.map