npm - @arcote.tech/arc-ai-voice - Versions diffs - 0.7.22 → 0.7.24 - Mend

@arcote.tech/arc-ai-voice 0.7.22 → 0.7.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/package.json +4 -4
package/src/adapters/whisper.ts +17 -3
package/src/index.ts +6 -1
package/src/react/use-voice-recorder.ts +9 -1
package/src/routes/transcribe-route.ts +89 -3
package/src/types.ts +18 -3
package/src/voice-builder.ts +8 -0

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@arcote.tech/arc-ai-voice",
   "type": "module",
-  "version": "0.7.22",
+  "version": "0.7.24",
   "private": false,
   "description": "Voice input + transcription standard for Arc — provider abstraction (Whisper, ...) + React VoiceTextInput/Textarea/ContentEditable components",
   "main": "./src/index.ts",
@@ -10,9 +10,9 @@
     "type-check": "tsc --noEmit"
   },
   "peerDependencies": {
-    "@arcote.tech/arc": "^0.7.22",
-    "@arcote.tech/arc-ds": "^0.7.22",
-    "@arcote.tech/platform": "^0.7.22",
+    "@arcote.tech/arc": "^0.7.24",
+    "@arcote.tech/arc-ds": "^0.7.24",
+    "@arcote.tech/platform": "^0.7.24",
     "react": "^18.0.0 || ^19.0.0",
     "lucide-react": ">=0.400.0",
     "typescript": "^5.0.0"

package/src/adapters/whisper.ts CHANGED Viewed

@@ -27,7 +27,14 @@ export function whisper(config: WhisperConfig): TranscriptionProvider {
       const ext = mimeToExt(audio.type);
       formData.append("file", audio, `audio.${ext}`);
       formData.append("model", model);
-      formData.append("response_format", "text");
+      // `verbose_json` niesie pole `duration` (sekundy audio) potrzebne do
+      // rozliczenia per-minuta. Wspiera je `whisper-1`; nowsze modele
+      // transkrypcji (gpt-4o-transcribe) akceptują tylko `json`/`text`, więc
+      // dla nich prosimy o `json` (bez duration → billing pominięty).
+      const responseFormat = model.startsWith("whisper")
+        ? "verbose_json"
+        : "json";
+      formData.append("response_format", responseFormat);
       if (options.language) formData.append("language", options.language);
       const response = await fetch(`${baseUrl}/audio/transcriptions`, {
@@ -43,8 +50,15 @@ export function whisper(config: WhisperConfig): TranscriptionProvider {
         );
       }
-      // response_format=text → plain string body, nie JSON.
-      return (await response.text()).trim();
+      const data = (await response.json()) as {
+        text?: string;
+        duration?: number;
+      };
+      return {
+        text: (data.text ?? "").trim(),
+        durationSeconds:
+          typeof data.duration === "number" ? data.duration : undefined,
+      };
     },
   };
 }

package/src/index.ts CHANGED Viewed

@@ -1,5 +1,9 @@
 // Provider abstrakcja + adaptery
-export type { TranscriptionOptions, TranscriptionProvider } from "./types";
+export type {
+  TranscriptionOptions,
+  TranscriptionProvider,
+  TranscriptionResult,
+} from "./types";
 export { whisper, type WhisperConfig } from "./adapters/whisper";
 // Server-side: builder modułu + route
@@ -7,6 +11,7 @@ export { voice, type VoiceConfig } from "./voice-builder";
 export {
   createTranscribeRoute,
   type TranscribeRouteConfig,
+  type VoiceBillingConfig,
 } from "./routes/transcribe-route";
 // React: hook + komponenty UI

package/src/react/use-voice-recorder.ts CHANGED Viewed

@@ -166,7 +166,15 @@ export function useVoiceRecorder(
         if (language) form.append("language", language);
         const res = await fetch(apiUrl, { method: "POST", body: form });
         if (!res.ok) {
-          throw new Error(`transcribe failed: ${res.status} ${await res.text().catch(() => "")}`);
+          const body = await res.text().catch(() => "");
+          // Brak kredytów (402) — typowany błąd, by UI pokazało komunikat + CTA
+          // „Dokup kredyty" zamiast generycznego błędu transkrypcji.
+          if (res.status === 402 || body.includes("insufficient_credits")) {
+            const err = new Error("insufficient_credits");
+            (err as any).code = "insufficient_credits";
+            throw err;
+          }
+          throw new Error(`transcribe failed: ${res.status} ${body}`);
         }
         const text = await res.text();
         setState("idle");

package/src/routes/transcribe-route.ts CHANGED Viewed

@@ -2,6 +2,41 @@
 import { route, type ArcTokenAny } from "@arcote.tech/arc";
 import type { TranscriptionProvider } from "../types";
+/**
+ * Rozliczanie transkrypcji. Whisper nie ma tokenów — koszt liczymy z długości
+ * audio (`durationSeconds` z `verbose_json`) razy `pricePerMinuteCents`,
+ * i emitujemy do tego samego `creditLedger` co czaty przez `ai.recordCost`.
+ */
+export interface VoiceBillingConfig {
+  /** `ai.recordCost` z fabryki `ai()` arc-ai. */
+  recordCost: (
+    ctx: any,
+    params: {
+      scopeId: string;
+      alias: string;
+      model: string;
+      costCents: number;
+      metadata?: Record<string, unknown>;
+    },
+  ) => Promise<void>;
+  /**
+   * Element rejestru usage (`ai.usageRegistry.Registry`) — dopisywany do
+   * `.mutate([...])` route'a, żeby `ctx.mutate(registry)` w `recordCost`
+   * miał ten agregat w zasięgu.
+   */
+  registryElement: any;
+  /** Token-params → billing scopeId (np. `byAccountId`). */
+  billTo: (tokenParams: any) => string;
+  /** Cena za minutę audio w jednostce ledgera (centy USD). */
+  pricePerMinuteCents: number;
+  /**
+   * Pre-flight gate (`ai.assertCredits`) — wołane PRZED transkrypcją. Rzuca
+   * błąd z `code === "insufficient_credits"` gdy scope nie ma kredytów; route
+   * zwraca wtedy 402. No-op gdy undefined.
+   */
+  assertCredits?: (ctx: any, scopeId: string) => Promise<void>;
+}
 export interface TranscribeRouteConfig {
   provider: TranscriptionProvider;
   /** Domyślny język gdy klient nie przekaże `language` w form-data. */
@@ -17,6 +52,12 @@ export interface TranscribeRouteConfig {
     token: ArcTokenAny;
     check?: (params: any) => boolean | object;
   };
+  /**
+   * Opcjonalne rozliczanie kosztu transkrypcji. Wymaga `protectBy` (scopeId
+   * bierzemy z `$auth.params` przez `billTo`). Bez tego transkrypcja działa
+   * jak dotąd, bez debetu salda.
+   */
+  billing?: VoiceBillingConfig;
 }
 const MAX_AUDIO_BYTES = 25 * 1024 * 1024; // Whisper limit = 25MB
@@ -41,8 +82,14 @@ export function createTranscribeRoute(config: TranscribeRouteConfig) {
       )
     : base.public();
-  return gated.handle({
-    POST: async (_ctx, req: Request) => {
+  // Gdy billing wpięty — rejestr usage musi być w `.mutate([...])` route'a,
+  // inaczej `ctx.mutate(registry)` w `recordCost` nie zadziała.
+  const withDeps: any = config.billing
+    ? (gated as any).mutate([config.billing.registryElement])
+    : gated;
+  return withDeps.handle({
+    POST: async (ctx: any, req: Request) => {
       if (!ONLY_SERVER) {
         return new Response("server only", { status: 500 });
       }
@@ -68,11 +115,50 @@ export function createTranscribeRoute(config: TranscribeRouteConfig) {
       const language =
         (form.get("language") as string | null) ?? config.defaultLanguage;
+      // Pre-flight gate — PRZED wywołaniem providera (zanim poniesiemy koszt
+      // API). Brak kredytów → 402, by front pokazał komunikat + CTA.
+      if (config.billing?.assertCredits) {
+        const scopeId = config.billing.billTo(ctx?.$auth?.params ?? {});
+        if (scopeId) {
+          try {
+            await config.billing.assertCredits(ctx, scopeId);
+          } catch (e) {
+            if ((e as any)?.code === "insufficient_credits") {
+              return jsonError(402, "insufficient_credits");
+            }
+            throw e;
+          }
+        }
+      }
       try {
-        const text = await config.provider.transcribe(
+        const { text, durationSeconds } = await config.provider.transcribe(
           audio,
           language ? { language } : undefined,
         );
+        // Rozliczenie — best-effort, nigdy nie wywala transkrypcji. Wymaga
+        // znanej długości audio (Whisper `verbose_json`) i scopeId z auth.
+        const billing = config.billing;
+        if (billing && typeof durationSeconds === "number" && durationSeconds > 0) {
+          try {
+            const scopeId = billing.billTo(ctx?.$auth?.params ?? {});
+            if (scopeId) {
+              const costCents =
+                (durationSeconds / 60) * billing.pricePerMinuteCents;
+              await billing.recordCost(ctx, {
+                scopeId,
+                alias: "voice-transcription",
+                model: config.provider.name,
+                costCents,
+                metadata: { durationSeconds },
+              });
+            }
+          } catch (err) {
+            console.error("[voice:transcribe] recordCost failed:", err);
+          }
+        }
         return new Response(text, {
           status: 200,
           headers: { "Content-Type": "text/plain; charset=utf-8" },

package/src/types.ts CHANGED Viewed

@@ -12,12 +12,27 @@ export interface TranscriptionOptions {
   language?: string;
 }
+export interface TranscriptionResult {
+  /** Rozpoznany tekst. */
+  text: string;
+  /**
+   * Długość audio w sekundach, jeśli provider ją raportuje (Whisper przez
+   * `verbose_json`). Używana do rozliczenia kosztu per-minuta. `undefined`
+   * gdy provider jej nie zwraca — billing wtedy pomijany (best-effort).
+   */
+  durationSeconds?: number;
+}
 export interface TranscriptionProvider {
   /** Identyfikator providera — używany w logach i diagnozie. */
   name: string;
   /**
-   * Transkrybuje audio (webm/opus, mp4, wav, mp3...) na tekst.
-   * Powinien rzucić błąd przy nieprawidłowym formacie / awarii API.
+   * Transkrybuje audio (webm/opus, mp4, wav, mp3...) na tekst + opcjonalnie
+   * długość audio (do rozliczenia). Powinien rzucić błąd przy nieprawidłowym
+   * formacie / awarii API.
    */
-  transcribe(audio: Blob, options?: TranscriptionOptions): Promise<string>;
+  transcribe(
+    audio: Blob,
+    options?: TranscriptionOptions,
+  ): Promise<TranscriptionResult>;
 }

package/src/voice-builder.ts CHANGED Viewed

@@ -23,6 +23,14 @@ export interface VoiceConfig extends TranscribeRouteConfig {}
  * przyjmują go z propsa, więc konsumer ustawia go per-komponent.
  */
 export function voice(config: VoiceConfig) {
+  // Billing potrzebuje scopeId z `$auth.params` (przez `billTo`), a auth jest
+  // tylko gdy endpoint jest chroniony. Bez `protectBy` scopeId byłby pusty i
+  // koszt nigdy by się nie zapisał — sygnalizujemy błąd konfiguracji wcześnie.
+  if (config.billing && !config.protectBy) {
+    throw new Error(
+      "voice: billing wymaga protectBy (scopeId pochodzi z params chronionego tokenu)",
+    );
+  }
   const transcribeRoute = createTranscribeRoute(config);
   return {
     route: transcribeRoute,