@arcote.tech/arc-ai-voice 0.7.22 → 0.7.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +4 -4
- package/src/adapters/whisper.ts +17 -3
- package/src/index.ts +6 -1
- package/src/react/use-voice-recorder.ts +9 -1
- package/src/routes/transcribe-route.ts +89 -3
- package/src/types.ts +18 -3
- package/src/voice-builder.ts +8 -0
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@arcote.tech/arc-ai-voice",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.7.
|
|
4
|
+
"version": "0.7.24",
|
|
5
5
|
"private": false,
|
|
6
6
|
"description": "Voice input + transcription standard for Arc — provider abstraction (Whisper, ...) + React VoiceTextInput/Textarea/ContentEditable components",
|
|
7
7
|
"main": "./src/index.ts",
|
|
@@ -10,9 +10,9 @@
|
|
|
10
10
|
"type-check": "tsc --noEmit"
|
|
11
11
|
},
|
|
12
12
|
"peerDependencies": {
|
|
13
|
-
"@arcote.tech/arc": "^0.7.
|
|
14
|
-
"@arcote.tech/arc-ds": "^0.7.
|
|
15
|
-
"@arcote.tech/platform": "^0.7.
|
|
13
|
+
"@arcote.tech/arc": "^0.7.24",
|
|
14
|
+
"@arcote.tech/arc-ds": "^0.7.24",
|
|
15
|
+
"@arcote.tech/platform": "^0.7.24",
|
|
16
16
|
"react": "^18.0.0 || ^19.0.0",
|
|
17
17
|
"lucide-react": ">=0.400.0",
|
|
18
18
|
"typescript": "^5.0.0"
|
package/src/adapters/whisper.ts
CHANGED
|
@@ -27,7 +27,14 @@ export function whisper(config: WhisperConfig): TranscriptionProvider {
|
|
|
27
27
|
const ext = mimeToExt(audio.type);
|
|
28
28
|
formData.append("file", audio, `audio.${ext}`);
|
|
29
29
|
formData.append("model", model);
|
|
30
|
-
|
|
30
|
+
// `verbose_json` niesie pole `duration` (sekundy audio) potrzebne do
|
|
31
|
+
// rozliczenia per-minuta. Wspiera je `whisper-1`; nowsze modele
|
|
32
|
+
// transkrypcji (gpt-4o-transcribe) akceptują tylko `json`/`text`, więc
|
|
33
|
+
// dla nich prosimy o `json` (bez duration → billing pominięty).
|
|
34
|
+
const responseFormat = model.startsWith("whisper")
|
|
35
|
+
? "verbose_json"
|
|
36
|
+
: "json";
|
|
37
|
+
formData.append("response_format", responseFormat);
|
|
31
38
|
if (options.language) formData.append("language", options.language);
|
|
32
39
|
|
|
33
40
|
const response = await fetch(`${baseUrl}/audio/transcriptions`, {
|
|
@@ -43,8 +50,15 @@ export function whisper(config: WhisperConfig): TranscriptionProvider {
|
|
|
43
50
|
);
|
|
44
51
|
}
|
|
45
52
|
|
|
46
|
-
|
|
47
|
-
|
|
53
|
+
const data = (await response.json()) as {
|
|
54
|
+
text?: string;
|
|
55
|
+
duration?: number;
|
|
56
|
+
};
|
|
57
|
+
return {
|
|
58
|
+
text: (data.text ?? "").trim(),
|
|
59
|
+
durationSeconds:
|
|
60
|
+
typeof data.duration === "number" ? data.duration : undefined,
|
|
61
|
+
};
|
|
48
62
|
},
|
|
49
63
|
};
|
|
50
64
|
}
|
package/src/index.ts
CHANGED
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
// Provider abstrakcja + adaptery
|
|
2
|
-
export type {
|
|
2
|
+
export type {
|
|
3
|
+
TranscriptionOptions,
|
|
4
|
+
TranscriptionProvider,
|
|
5
|
+
TranscriptionResult,
|
|
6
|
+
} from "./types";
|
|
3
7
|
export { whisper, type WhisperConfig } from "./adapters/whisper";
|
|
4
8
|
|
|
5
9
|
// Server-side: builder modułu + route
|
|
@@ -7,6 +11,7 @@ export { voice, type VoiceConfig } from "./voice-builder";
|
|
|
7
11
|
export {
|
|
8
12
|
createTranscribeRoute,
|
|
9
13
|
type TranscribeRouteConfig,
|
|
14
|
+
type VoiceBillingConfig,
|
|
10
15
|
} from "./routes/transcribe-route";
|
|
11
16
|
|
|
12
17
|
// React: hook + komponenty UI
|
|
@@ -166,7 +166,15 @@ export function useVoiceRecorder(
|
|
|
166
166
|
if (language) form.append("language", language);
|
|
167
167
|
const res = await fetch(apiUrl, { method: "POST", body: form });
|
|
168
168
|
if (!res.ok) {
|
|
169
|
-
|
|
169
|
+
const body = await res.text().catch(() => "");
|
|
170
|
+
// Brak kredytów (402) — typowany błąd, by UI pokazało komunikat + CTA
|
|
171
|
+
// „Dokup kredyty" zamiast generycznego błędu transkrypcji.
|
|
172
|
+
if (res.status === 402 || body.includes("insufficient_credits")) {
|
|
173
|
+
const err = new Error("insufficient_credits");
|
|
174
|
+
(err as any).code = "insufficient_credits";
|
|
175
|
+
throw err;
|
|
176
|
+
}
|
|
177
|
+
throw new Error(`transcribe failed: ${res.status} ${body}`);
|
|
170
178
|
}
|
|
171
179
|
const text = await res.text();
|
|
172
180
|
setState("idle");
|
|
@@ -2,6 +2,41 @@
|
|
|
2
2
|
import { route, type ArcTokenAny } from "@arcote.tech/arc";
|
|
3
3
|
import type { TranscriptionProvider } from "../types";
|
|
4
4
|
|
|
5
|
+
/**
|
|
6
|
+
* Rozliczanie transkrypcji. Whisper nie ma tokenów — koszt liczymy z długości
|
|
7
|
+
* audio (`durationSeconds` z `verbose_json`) razy `pricePerMinuteCents`,
|
|
8
|
+
* i emitujemy do tego samego `creditLedger` co czaty przez `ai.recordCost`.
|
|
9
|
+
*/
|
|
10
|
+
export interface VoiceBillingConfig {
|
|
11
|
+
/** `ai.recordCost` z fabryki `ai()` arc-ai. */
|
|
12
|
+
recordCost: (
|
|
13
|
+
ctx: any,
|
|
14
|
+
params: {
|
|
15
|
+
scopeId: string;
|
|
16
|
+
alias: string;
|
|
17
|
+
model: string;
|
|
18
|
+
costCents: number;
|
|
19
|
+
metadata?: Record<string, unknown>;
|
|
20
|
+
},
|
|
21
|
+
) => Promise<void>;
|
|
22
|
+
/**
|
|
23
|
+
* Element rejestru usage (`ai.usageRegistry.Registry`) — dopisywany do
|
|
24
|
+
* `.mutate([...])` route'a, żeby `ctx.mutate(registry)` w `recordCost`
|
|
25
|
+
* miał ten agregat w zasięgu.
|
|
26
|
+
*/
|
|
27
|
+
registryElement: any;
|
|
28
|
+
/** Token-params → billing scopeId (np. `byAccountId`). */
|
|
29
|
+
billTo: (tokenParams: any) => string;
|
|
30
|
+
/** Cena za minutę audio w jednostce ledgera (centy USD). */
|
|
31
|
+
pricePerMinuteCents: number;
|
|
32
|
+
/**
|
|
33
|
+
* Pre-flight gate (`ai.assertCredits`) — wołane PRZED transkrypcją. Rzuca
|
|
34
|
+
* błąd z `code === "insufficient_credits"` gdy scope nie ma kredytów; route
|
|
35
|
+
* zwraca wtedy 402. No-op gdy undefined.
|
|
36
|
+
*/
|
|
37
|
+
assertCredits?: (ctx: any, scopeId: string) => Promise<void>;
|
|
38
|
+
}
|
|
39
|
+
|
|
5
40
|
export interface TranscribeRouteConfig {
|
|
6
41
|
provider: TranscriptionProvider;
|
|
7
42
|
/** Domyślny język gdy klient nie przekaże `language` w form-data. */
|
|
@@ -17,6 +52,12 @@ export interface TranscribeRouteConfig {
|
|
|
17
52
|
token: ArcTokenAny;
|
|
18
53
|
check?: (params: any) => boolean | object;
|
|
19
54
|
};
|
|
55
|
+
/**
|
|
56
|
+
* Opcjonalne rozliczanie kosztu transkrypcji. Wymaga `protectBy` (scopeId
|
|
57
|
+
* bierzemy z `$auth.params` przez `billTo`). Bez tego transkrypcja działa
|
|
58
|
+
* jak dotąd, bez debetu salda.
|
|
59
|
+
*/
|
|
60
|
+
billing?: VoiceBillingConfig;
|
|
20
61
|
}
|
|
21
62
|
|
|
22
63
|
const MAX_AUDIO_BYTES = 25 * 1024 * 1024; // Whisper limit = 25MB
|
|
@@ -41,8 +82,14 @@ export function createTranscribeRoute(config: TranscribeRouteConfig) {
|
|
|
41
82
|
)
|
|
42
83
|
: base.public();
|
|
43
84
|
|
|
44
|
-
|
|
45
|
-
|
|
85
|
+
// Gdy billing wpięty — rejestr usage musi być w `.mutate([...])` route'a,
|
|
86
|
+
// inaczej `ctx.mutate(registry)` w `recordCost` nie zadziała.
|
|
87
|
+
const withDeps: any = config.billing
|
|
88
|
+
? (gated as any).mutate([config.billing.registryElement])
|
|
89
|
+
: gated;
|
|
90
|
+
|
|
91
|
+
return withDeps.handle({
|
|
92
|
+
POST: async (ctx: any, req: Request) => {
|
|
46
93
|
if (!ONLY_SERVER) {
|
|
47
94
|
return new Response("server only", { status: 500 });
|
|
48
95
|
}
|
|
@@ -68,11 +115,50 @@ export function createTranscribeRoute(config: TranscribeRouteConfig) {
|
|
|
68
115
|
const language =
|
|
69
116
|
(form.get("language") as string | null) ?? config.defaultLanguage;
|
|
70
117
|
|
|
118
|
+
// Pre-flight gate — PRZED wywołaniem providera (zanim poniesiemy koszt
|
|
119
|
+
// API). Brak kredytów → 402, by front pokazał komunikat + CTA.
|
|
120
|
+
if (config.billing?.assertCredits) {
|
|
121
|
+
const scopeId = config.billing.billTo(ctx?.$auth?.params ?? {});
|
|
122
|
+
if (scopeId) {
|
|
123
|
+
try {
|
|
124
|
+
await config.billing.assertCredits(ctx, scopeId);
|
|
125
|
+
} catch (e) {
|
|
126
|
+
if ((e as any)?.code === "insufficient_credits") {
|
|
127
|
+
return jsonError(402, "insufficient_credits");
|
|
128
|
+
}
|
|
129
|
+
throw e;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
71
134
|
try {
|
|
72
|
-
const text = await config.provider.transcribe(
|
|
135
|
+
const { text, durationSeconds } = await config.provider.transcribe(
|
|
73
136
|
audio,
|
|
74
137
|
language ? { language } : undefined,
|
|
75
138
|
);
|
|
139
|
+
|
|
140
|
+
// Rozliczenie — best-effort, nigdy nie wywala transkrypcji. Wymaga
|
|
141
|
+
// znanej długości audio (Whisper `verbose_json`) i scopeId z auth.
|
|
142
|
+
const billing = config.billing;
|
|
143
|
+
if (billing && typeof durationSeconds === "number" && durationSeconds > 0) {
|
|
144
|
+
try {
|
|
145
|
+
const scopeId = billing.billTo(ctx?.$auth?.params ?? {});
|
|
146
|
+
if (scopeId) {
|
|
147
|
+
const costCents =
|
|
148
|
+
(durationSeconds / 60) * billing.pricePerMinuteCents;
|
|
149
|
+
await billing.recordCost(ctx, {
|
|
150
|
+
scopeId,
|
|
151
|
+
alias: "voice-transcription",
|
|
152
|
+
model: config.provider.name,
|
|
153
|
+
costCents,
|
|
154
|
+
metadata: { durationSeconds },
|
|
155
|
+
});
|
|
156
|
+
}
|
|
157
|
+
} catch (err) {
|
|
158
|
+
console.error("[voice:transcribe] recordCost failed:", err);
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
76
162
|
return new Response(text, {
|
|
77
163
|
status: 200,
|
|
78
164
|
headers: { "Content-Type": "text/plain; charset=utf-8" },
|
package/src/types.ts
CHANGED
|
@@ -12,12 +12,27 @@ export interface TranscriptionOptions {
|
|
|
12
12
|
language?: string;
|
|
13
13
|
}
|
|
14
14
|
|
|
15
|
+
export interface TranscriptionResult {
|
|
16
|
+
/** Rozpoznany tekst. */
|
|
17
|
+
text: string;
|
|
18
|
+
/**
|
|
19
|
+
* Długość audio w sekundach, jeśli provider ją raportuje (Whisper przez
|
|
20
|
+
* `verbose_json`). Używana do rozliczenia kosztu per-minuta. `undefined`
|
|
21
|
+
* gdy provider jej nie zwraca — billing wtedy pomijany (best-effort).
|
|
22
|
+
*/
|
|
23
|
+
durationSeconds?: number;
|
|
24
|
+
}
|
|
25
|
+
|
|
15
26
|
export interface TranscriptionProvider {
|
|
16
27
|
/** Identyfikator providera — używany w logach i diagnozie. */
|
|
17
28
|
name: string;
|
|
18
29
|
/**
|
|
19
|
-
* Transkrybuje audio (webm/opus, mp4, wav, mp3...) na tekst
|
|
20
|
-
* Powinien rzucić błąd przy nieprawidłowym
|
|
30
|
+
* Transkrybuje audio (webm/opus, mp4, wav, mp3...) na tekst + opcjonalnie
|
|
31
|
+
* długość audio (do rozliczenia). Powinien rzucić błąd przy nieprawidłowym
|
|
32
|
+
* formacie / awarii API.
|
|
21
33
|
*/
|
|
22
|
-
transcribe(
|
|
34
|
+
transcribe(
|
|
35
|
+
audio: Blob,
|
|
36
|
+
options?: TranscriptionOptions,
|
|
37
|
+
): Promise<TranscriptionResult>;
|
|
23
38
|
}
|
package/src/voice-builder.ts
CHANGED
|
@@ -23,6 +23,14 @@ export interface VoiceConfig extends TranscribeRouteConfig {}
|
|
|
23
23
|
* przyjmują go z propsa, więc konsumer ustawia go per-komponent.
|
|
24
24
|
*/
|
|
25
25
|
export function voice(config: VoiceConfig) {
|
|
26
|
+
// Billing potrzebuje scopeId z `$auth.params` (przez `billTo`), a auth jest
|
|
27
|
+
// tylko gdy endpoint jest chroniony. Bez `protectBy` scopeId byłby pusty i
|
|
28
|
+
// koszt nigdy by się nie zapisał — sygnalizujemy błąd konfiguracji wcześnie.
|
|
29
|
+
if (config.billing && !config.protectBy) {
|
|
30
|
+
throw new Error(
|
|
31
|
+
"voice: billing wymaga protectBy (scopeId pochodzi z params chronionego tokenu)",
|
|
32
|
+
);
|
|
33
|
+
}
|
|
26
34
|
const transcribeRoute = createTranscribeRoute(config);
|
|
27
35
|
return {
|
|
28
36
|
route: transcribeRoute,
|