@aithos/sdk 0.1.0-alpha.43 → 0.1.0-alpha.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -0
- package/dist/src/compute.d.ts +218 -0
- package/dist/src/compute.js +457 -0
- package/dist/src/index.d.ts +4 -2
- package/dist/src/index.js +2 -1
- package/dist/src/react/index.d.ts +1 -0
- package/dist/src/react/index.js +1 -0
- package/dist/src/react/use-transcribe-pending.d.ts +21 -0
- package/dist/src/react/use-transcribe-pending.js +47 -0
- package/dist/src/transcribe-resilience.d.ts +57 -0
- package/dist/src/transcribe-resilience.js +203 -0
- package/dist/test/transcribe-invoke.test.d.ts +2 -0
- package/dist/test/transcribe-invoke.test.js +204 -0
- package/dist/test/transcribe.test.d.ts +2 -0
- package/dist/test/transcribe.test.js +186 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -55,6 +55,40 @@ const reply = await sdk.compute.invokeBedrock({
|
|
|
55
55
|
console.log(reply.content);
|
|
56
56
|
```
|
|
57
57
|
|
|
58
|
+
## Transcribing audio → text
|
|
59
|
+
|
|
60
|
+
`sdk.compute.invokeTranscribe` turns an audio `Blob` into text through AWS
|
|
61
|
+
Transcribe. It does one thing — audio → text — and **stores nothing**: it
|
|
62
|
+
returns the transcript and you decide what to do with it (write it to an
|
|
63
|
+
ethos, a PDS, your own database, email it, or throw it away).
|
|
64
|
+
|
|
65
|
+
```ts
|
|
66
|
+
// Browser: a Blob from MediaRecorder; backend: a Blob from a Buffer.
|
|
67
|
+
const result = await sdk.compute.invokeTranscribe({
|
|
68
|
+
audio: blob, // Blob/File (Node 18+ has global Blob)
|
|
69
|
+
model: "transcribe:aws-fr-standard", // default; also aws-en-standard
|
|
70
|
+
languageCode: "fr-FR", // optional
|
|
71
|
+
// durationSecOverride: 127, // REQUIRED on backends (no DOM probe)
|
|
72
|
+
onProgress: (s) => console.log(s.phase), // uploading → starting → processing → completed
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
console.log(result.text); // "Bonjour, je voulais te dire que…"
|
|
76
|
+
console.log(result.segments); // [{ start_sec, end_sec, text }]
|
|
77
|
+
console.log(result.creditsCharged);
|
|
78
|
+
|
|
79
|
+
// Then YOU choose where it goes — the compute has no opinion:
|
|
80
|
+
await myEthos.addRevision(result.text); // or PDS, DB, email, nothing…
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
The core is isomorphic (Node + browser) and depends only on `Blob`, `fetch`
|
|
84
|
+
and timers. Browser-only resilience is opt-in and framework-agnostic:
|
|
85
|
+
`sdk.compute.transcribeDraft` (IndexedDB queue of recordings) and
|
|
86
|
+
`sdk.compute.listLocalPendingTranscribes()` /
|
|
87
|
+
`subscribeLocalPendingTranscribes()` / `resumeTranscribe(jobId)` recover jobs
|
|
88
|
+
across reloads. React users get `useAithosTranscribePendingJobs(sdk.compute)`
|
|
89
|
+
from `@aithos/sdk/react`. Advanced callers can drive the flow manually with
|
|
90
|
+
`prepareTranscribe` / `startTranscribe` / `getTranscribeStatus`.
|
|
91
|
+
|
|
58
92
|
## Delegating compute to an agent — opt-in token spending
|
|
59
93
|
|
|
60
94
|
To let an agent (or another user, or a third-party app) invoke Bedrock
|
package/dist/src/compute.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import type { AithosAuth } from "./auth.js";
|
|
2
2
|
import { type AithosSdkEndpoints } from "./endpoints.js";
|
|
3
|
+
import { type LocalPendingEntry, type TranscribeDraftMeta, type TranscribeDraftRecord } from "./transcribe-resilience.js";
|
|
3
4
|
export interface ComputeMessage {
|
|
4
5
|
readonly role: "user" | "assistant";
|
|
5
6
|
readonly content: string;
|
|
@@ -222,6 +223,138 @@ export interface InvokeSegmentationResult {
|
|
|
222
223
|
readonly walletBalance: number;
|
|
223
224
|
readonly auditId: string;
|
|
224
225
|
}
|
|
226
|
+
/**
|
|
227
|
+
* Stable cross-provider transcription model ids. The `transcribe:` prefix
|
|
228
|
+
* is part of the wire contract (mirrors `image:` for image models). New
|
|
229
|
+
* models can be added server-side without an SDK release; the union here
|
|
230
|
+
* gives autocomplete + type-checking for the common ones.
|
|
231
|
+
*/
|
|
232
|
+
export type TranscribeModelId = "transcribe:aws-fr-standard" | "transcribe:aws-en-standard";
|
|
233
|
+
/** Progress callback states emitted by {@link ComputeNamespace.invokeTranscribe}. */
|
|
234
|
+
export type TranscribeProgressState = {
|
|
235
|
+
readonly phase: "queued";
|
|
236
|
+
} | {
|
|
237
|
+
readonly phase: "uploading";
|
|
238
|
+
readonly bytesUploaded: number;
|
|
239
|
+
readonly totalBytes: number;
|
|
240
|
+
} | {
|
|
241
|
+
readonly phase: "starting";
|
|
242
|
+
} | {
|
|
243
|
+
readonly phase: "processing";
|
|
244
|
+
readonly elapsedSec: number;
|
|
245
|
+
} | {
|
|
246
|
+
readonly phase: "completed";
|
|
247
|
+
};
|
|
248
|
+
export interface TranscribeSegment {
|
|
249
|
+
readonly start_sec: number;
|
|
250
|
+
readonly end_sec: number;
|
|
251
|
+
readonly text: string;
|
|
252
|
+
readonly speaker_label?: string;
|
|
253
|
+
}
|
|
254
|
+
export interface TranscribeWord {
|
|
255
|
+
readonly start_sec: number;
|
|
256
|
+
readonly end_sec: number;
|
|
257
|
+
readonly content: string;
|
|
258
|
+
readonly confidence: number;
|
|
259
|
+
}
|
|
260
|
+
/** High-level args for the one-call {@link ComputeNamespace.invokeTranscribe}. */
|
|
261
|
+
export interface InvokeTranscribeArgs {
|
|
262
|
+
/** Mandate id — optional for owner sessions, required for delegate sessions. */
|
|
263
|
+
readonly mandateId?: string;
|
|
264
|
+
/** The audio to transcribe. `Blob` is supported in both Node 18+ and browsers. */
|
|
265
|
+
readonly audio: Blob;
|
|
266
|
+
/** Model alias. Default `"transcribe:aws-fr-standard"`. */
|
|
267
|
+
readonly model?: TranscribeModelId;
|
|
268
|
+
/** AWS language code override (e.g. `"fr-FR"`). Defaults to the model alias's language. */
|
|
269
|
+
readonly languageCode?: string;
|
|
270
|
+
/** Speaker diarization. Default `false`. */
|
|
271
|
+
readonly diarization?: boolean;
|
|
272
|
+
/**
|
|
273
|
+
* Audio duration in seconds. REQUIRED on backends / non-browser runtimes
|
|
274
|
+
* (used for the wallet pre-debit estimate). In a browser it is probed
|
|
275
|
+
* automatically from the Blob when omitted; if probing fails the SDK
|
|
276
|
+
* falls back to a server-reconciled estimate of 0.
|
|
277
|
+
*/
|
|
278
|
+
readonly durationSecOverride?: number;
|
|
279
|
+
/** Idempotency key for replay-safe retries (generated if omitted). */
|
|
280
|
+
readonly idempotencyKey?: string;
|
|
281
|
+
/** Progress callback (upload bytes, processing elapsed, …). */
|
|
282
|
+
readonly onProgress?: (state: TranscribeProgressState) => void;
|
|
283
|
+
/** Abort signal — cancels upload + polling. */
|
|
284
|
+
readonly signal?: AbortSignal;
|
|
285
|
+
/**
|
|
286
|
+
* Polling cadence override (ms) for the status loop. Defaults to an
|
|
287
|
+
* exponential backoff 2s → 15s. Mainly for tests.
|
|
288
|
+
*/
|
|
289
|
+
readonly pollIntervalMs?: number;
|
|
290
|
+
}
|
|
291
|
+
export interface InvokeTranscribeResult {
|
|
292
|
+
readonly text: string;
|
|
293
|
+
readonly segments: readonly TranscribeSegment[];
|
|
294
|
+
readonly words: readonly TranscribeWord[];
|
|
295
|
+
readonly durationSec: number;
|
|
296
|
+
readonly languageCode: string;
|
|
297
|
+
readonly creditsCharged: number;
|
|
298
|
+
readonly walletBalance: number;
|
|
299
|
+
readonly auditId: string;
|
|
300
|
+
readonly jobId: string;
|
|
301
|
+
readonly fundedBy?: "sponsored" | "grant" | "purchase";
|
|
302
|
+
readonly sponsoredBy?: string;
|
|
303
|
+
readonly receiptId?: string;
|
|
304
|
+
}
|
|
305
|
+
export interface PrepareTranscribeArgs {
|
|
306
|
+
readonly contentType: string;
|
|
307
|
+
readonly durationSecEstimate?: number;
|
|
308
|
+
/** Selects the delegate signer for delegate sessions (not sent on the wire). */
|
|
309
|
+
readonly mandateId?: string;
|
|
310
|
+
readonly signal?: AbortSignal;
|
|
311
|
+
}
|
|
312
|
+
export interface PrepareTranscribeResult {
|
|
313
|
+
readonly jobId: string;
|
|
314
|
+
readonly uploadUrl: string;
|
|
315
|
+
readonly s3ObjectKey: string;
|
|
316
|
+
readonly expiresAt: number;
|
|
317
|
+
}
|
|
318
|
+
export interface StartTranscribeArgs {
|
|
319
|
+
readonly jobId: string;
|
|
320
|
+
readonly mandateId?: string;
|
|
321
|
+
readonly model: TranscribeModelId | string;
|
|
322
|
+
readonly durationSec: number;
|
|
323
|
+
readonly languageCode?: string;
|
|
324
|
+
readonly diarization?: boolean;
|
|
325
|
+
readonly idempotencyKey?: string;
|
|
326
|
+
readonly signal?: AbortSignal;
|
|
327
|
+
}
|
|
328
|
+
export interface StartTranscribeResult {
|
|
329
|
+
readonly jobId: string;
|
|
330
|
+
readonly status: "running";
|
|
331
|
+
readonly estimatedCredits: number;
|
|
332
|
+
readonly walletBalance: number;
|
|
333
|
+
readonly fundedBy?: "sponsored" | "grant" | "purchase";
|
|
334
|
+
readonly receiptId?: string;
|
|
335
|
+
}
|
|
336
|
+
export type TranscribeStatusResult = {
|
|
337
|
+
readonly jobId: string;
|
|
338
|
+
readonly status: "running";
|
|
339
|
+
readonly elapsedSec: number;
|
|
340
|
+
} | ({
|
|
341
|
+
readonly jobId: string;
|
|
342
|
+
readonly status: "completed";
|
|
343
|
+
} & InvokeTranscribeResult) | {
|
|
344
|
+
readonly jobId: string;
|
|
345
|
+
readonly status: "failed";
|
|
346
|
+
readonly error: {
|
|
347
|
+
readonly code: string;
|
|
348
|
+
readonly message: string;
|
|
349
|
+
};
|
|
350
|
+
};
|
|
351
|
+
export interface TranscribeJobSummary {
|
|
352
|
+
readonly jobId: string;
|
|
353
|
+
readonly status: "prepared" | "running" | "completed" | "failed";
|
|
354
|
+
readonly createdAt: number;
|
|
355
|
+
readonly estimatedCredits?: number;
|
|
356
|
+
readonly creditsCharged?: number;
|
|
357
|
+
}
|
|
225
358
|
export interface ComputeNamespaceDeps {
|
|
226
359
|
readonly auth: AithosAuth;
|
|
227
360
|
readonly appDid: string;
|
|
@@ -309,5 +442,90 @@ export declare class ComputeNamespace {
|
|
|
309
442
|
* Pricing: flat 5 000 mc per call (~$0.005 — Florence-2 is cheap).
|
|
310
443
|
*/
|
|
311
444
|
invokeSegmentation(args: InvokeSegmentationArgs): Promise<InvokeSegmentationResult>;
|
|
445
|
+
/**
|
|
446
|
+
* Provision a transcription job and get a pre-signed S3 URL to PUT the
|
|
447
|
+
* audio to. No wallet debit. `mandateId` only selects the delegate
|
|
448
|
+
* signer (it is not part of the wire params for prepare).
|
|
449
|
+
*/
|
|
450
|
+
prepareTranscribe(args: PrepareTranscribeArgs): Promise<PrepareTranscribeResult>;
|
|
451
|
+
/**
|
|
452
|
+
* Verify the uploaded audio, pre-debit the wallet, and launch the AWS
|
|
453
|
+
* Transcribe job. Returns immediately with `status: "running"`.
|
|
454
|
+
*/
|
|
455
|
+
startTranscribe(args: StartTranscribeArgs): Promise<StartTranscribeResult>;
|
|
456
|
+
/**
|
|
457
|
+
* Poll a job's status. On completion the server finalises (reconcile +
|
|
458
|
+
* audit) and returns the transcript; a resumed poll after reconnect
|
|
459
|
+
* re-reads the transcript while it's still in the 24h output window.
|
|
460
|
+
*/
|
|
461
|
+
getTranscribeStatus(args: {
|
|
462
|
+
readonly jobId: string;
|
|
463
|
+
readonly mandateId?: string;
|
|
464
|
+
readonly signal?: AbortSignal;
|
|
465
|
+
}): Promise<TranscribeStatusResult>;
|
|
466
|
+
/**
|
|
467
|
+
* List the caller's transcription jobs. Excludes terminal `completed`
|
|
468
|
+
* jobs unless `includeCompleted` is set — the resilience "what's still
|
|
469
|
+
* pending server-side" query.
|
|
470
|
+
*/
|
|
471
|
+
listPendingTranscribes(args?: {
|
|
472
|
+
readonly includeCompleted?: boolean;
|
|
473
|
+
readonly mandateId?: string;
|
|
474
|
+
readonly signal?: AbortSignal;
|
|
475
|
+
}): Promise<{
|
|
476
|
+
readonly jobs: readonly TranscribeJobSummary[];
|
|
477
|
+
}>;
|
|
478
|
+
/**
|
|
479
|
+
* Transcribe an audio Blob to text in one call. Composes the four
|
|
480
|
+
* low-level methods: prepare → direct S3 upload → start → poll. Returns
|
|
481
|
+
* the transcript and stores NOTHING server-side beyond the ephemeral
|
|
482
|
+
* job — the consumer decides what to do with the result.
|
|
483
|
+
*
|
|
484
|
+
* Isomorphic: depends only on `Blob`, `fetch`/`XMLHttpRequest` and
|
|
485
|
+
* timers. On a backend, pass `durationSecOverride` (no Blob duration
|
|
486
|
+
* probing is possible without a DOM); in a browser the duration is
|
|
487
|
+
* probed automatically when omitted.
|
|
488
|
+
*
|
|
489
|
+
* Resilience: the job id is recorded in a localStorage tracker (browser)
|
|
490
|
+
* before upload, so `listLocalPendingTranscribes()` / `resumeTranscribe()`
|
|
491
|
+
* can recover a job whose result never arrived. In Node the tracker is a
|
|
492
|
+
* harmless in-memory no-op.
|
|
493
|
+
*/
|
|
494
|
+
invokeTranscribe(args: InvokeTranscribeArgs): Promise<InvokeTranscribeResult>;
|
|
495
|
+
/**
|
|
496
|
+
* Resume polling an in-flight job by id — for recovery after a reload or
|
|
497
|
+
* crash. Returns the final result and clears the job from the local
|
|
498
|
+
* pending tracker. Throws if the job has already failed.
|
|
499
|
+
*/
|
|
500
|
+
resumeTranscribe(jobId: string, opts?: {
|
|
501
|
+
readonly mandateId?: string;
|
|
502
|
+
readonly onProgress?: (state: TranscribeProgressState) => void;
|
|
503
|
+
readonly signal?: AbortSignal;
|
|
504
|
+
readonly pollIntervalMs?: number;
|
|
505
|
+
}): Promise<InvokeTranscribeResult>;
|
|
506
|
+
/** Snapshot of locally-tracked in-flight jobs (stable ref between mutations). */
|
|
507
|
+
listLocalPendingTranscribes(): readonly LocalPendingEntry[];
|
|
508
|
+
/** Stable snapshot for `useSyncExternalStore`-style consumers. */
|
|
509
|
+
getLocalPendingTranscribesSnapshot(): readonly LocalPendingEntry[];
|
|
510
|
+
/**
|
|
511
|
+
* Subscribe to changes in the local pending-jobs registry. Returns an
|
|
512
|
+
* unsubscribe function. Framework-agnostic: wrap it in a React
|
|
513
|
+
* `useSyncExternalStore`, a Vue effect, a Svelte store, etc.
|
|
514
|
+
*/
|
|
515
|
+
subscribeLocalPendingTranscribes(listener: () => void): () => void;
|
|
516
|
+
/**
|
|
517
|
+
* IndexedDB-backed draft queue: persist a recording before any network
|
|
518
|
+
* call, upload it when the user confirms. Browser-only (methods reject
|
|
519
|
+
* with TranscribeDraftUnavailableError when IndexedDB is absent).
|
|
520
|
+
*/
|
|
521
|
+
get transcribeDraft(): {
|
|
522
|
+
readonly save: (blob: Blob, meta?: TranscribeDraftMeta) => Promise<{
|
|
523
|
+
readonly draftId: string;
|
|
524
|
+
}>;
|
|
525
|
+
readonly list: () => Promise<readonly TranscribeDraftRecord[]>;
|
|
526
|
+
readonly get: (draftId: string) => Promise<TranscribeDraftRecord | null>;
|
|
527
|
+
readonly delete: (draftId: string) => Promise<void>;
|
|
528
|
+
readonly upload: (draftId: string, args: Omit<InvokeTranscribeArgs, "audio">) => Promise<InvokeTranscribeResult>;
|
|
529
|
+
};
|
|
312
530
|
}
|
|
313
531
|
//# sourceMappingURL=compute.d.ts.map
|