@arcote.tech/arc-ai-voice 0.7.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json ADDED
@@ -0,0 +1,23 @@
1
+ {
2
+ "name": "@arcote.tech/arc-ai-voice",
3
+ "type": "module",
4
+ "version": "0.7.9",
5
+ "private": false,
6
+ "description": "Voice input + transcription standard for Arc — provider abstraction (Whisper, ...) + React VoiceTextInput/Textarea/ContentEditable components",
7
+ "main": "./src/index.ts",
8
+ "types": "./src/index.ts",
9
+ "scripts": {
10
+ "type-check": "tsc --noEmit"
11
+ },
12
+ "peerDependencies": {
13
+ "@arcote.tech/arc": "^0.7.9",
14
+ "@arcote.tech/arc-ds": "^0.7.9",
15
+ "@arcote.tech/platform": "^0.7.9",
16
+ "react": "^18.0.0 || ^19.0.0",
17
+ "lucide-react": ">=0.400.0",
18
+ "typescript": "^5.0.0"
19
+ },
20
+ "devDependencies": {
21
+ "@types/bun": "latest"
22
+ }
23
+ }
@@ -0,0 +1,74 @@
1
+ import type { TranscriptionOptions, TranscriptionProvider } from "../types";
2
+
3
+ export interface WhisperConfig {
4
+ /** OpenAI API key. */
5
+ apiKey: string;
6
+ /** Model — default `"whisper-1"`. Można też podać nowsze np. `gpt-4o-transcribe`. */
7
+ model?: string;
8
+ /** Custom base URL (proxy / Azure OpenAI / self-hosted). Default OpenAI. */
9
+ baseUrl?: string;
10
+ }
11
+
12
+ /**
13
+ * OpenAI Whisper adapter. `whisper-1` jest sprawdzony, ekonomiczny i wspiera
14
+ * polski + większość innych języków. Endpoint `/v1/audio/transcriptions`
15
+ * przyjmuje multipart z polem `file` (webm/opus/mp4/wav/mp3).
16
+ */
17
+ export function whisper(config: WhisperConfig): TranscriptionProvider {
18
+ const baseUrl = config.baseUrl ?? "https://api.openai.com/v1";
19
+ const model = config.model ?? "whisper-1";
20
+
21
+ return {
22
+ name: "whisper",
23
+ async transcribe(audio: Blob, options: TranscriptionOptions = {}) {
24
+ const formData = new FormData();
25
+ // Whisper wymaga `file` z rozszerzeniem w nazwie żeby rozpoznać format —
26
+ // sam Content-Type Bloba nie wystarczy.
27
+ const ext = mimeToExt(audio.type);
28
+ formData.append("file", audio, `audio.${ext}`);
29
+ formData.append("model", model);
30
+ formData.append("response_format", "text");
31
+ if (options.language) formData.append("language", options.language);
32
+
33
+ const response = await fetch(`${baseUrl}/audio/transcriptions`, {
34
+ method: "POST",
35
+ headers: { Authorization: `Bearer ${config.apiKey}` },
36
+ body: formData,
37
+ });
38
+
39
+ if (!response.ok) {
40
+ const detail = await response.text().catch(() => "");
41
+ throw new Error(
42
+ `Whisper API ${response.status}: ${detail || response.statusText}`,
43
+ );
44
+ }
45
+
46
+ // response_format=text → plain string body, nie JSON.
47
+ return (await response.text()).trim();
48
+ },
49
+ };
50
+ }
51
+
52
+ function mimeToExt(mime: string): string {
53
+ // Whisper waliduje po rozszerzeniu pliku. `audio/webm;codecs=opus` → "webm".
54
+ const m = mime.split(";")[0].trim();
55
+ switch (m) {
56
+ case "audio/webm":
57
+ return "webm";
58
+ case "audio/mp4":
59
+ case "audio/m4a":
60
+ case "audio/x-m4a":
61
+ return "m4a";
62
+ case "audio/mpeg":
63
+ case "audio/mp3":
64
+ return "mp3";
65
+ case "audio/wav":
66
+ case "audio/x-wav":
67
+ return "wav";
68
+ case "audio/ogg":
69
+ return "ogg";
70
+ default:
71
+ // Bezpieczny fallback — Whisper akceptuje webm szeroko.
72
+ return "webm";
73
+ }
74
+ }
package/src/arc.d.ts ADDED
@@ -0,0 +1,6 @@
1
+ declare const BROWSER: boolean;
2
+ declare const NOT_ON_BROWSER: boolean;
3
+ declare const ONLY_BROWSER: boolean;
4
+ declare const SERVER: boolean;
5
+ declare const NOT_ON_SERVER: boolean;
6
+ declare const ONLY_SERVER: boolean;
package/src/index.ts ADDED
@@ -0,0 +1,31 @@
1
+ // Provider abstrakcja + adaptery
2
+ export type { TranscriptionOptions, TranscriptionProvider } from "./types";
3
+ export { whisper, type WhisperConfig } from "./adapters/whisper";
4
+
5
+ // Server-side: builder modułu + route
6
+ export { voice, type VoiceConfig } from "./voice-builder";
7
+ export {
8
+ createTranscribeRoute,
9
+ type TranscribeRouteConfig,
10
+ } from "./routes/transcribe-route";
11
+
12
+ // React: hook + komponenty UI
13
+ export {
14
+ useVoiceRecorder,
15
+ type UseVoiceRecorderOptions,
16
+ type UseVoiceRecorderResult,
17
+ type VoiceRecorderState,
18
+ } from "./react/use-voice-recorder";
19
+ export { VoiceButton, type VoiceButtonProps } from "./react/voice-button";
20
+ export {
21
+ VoiceTextInput,
22
+ type VoiceTextInputProps,
23
+ } from "./react/voice-text-input";
24
+ export {
25
+ VoiceTextarea,
26
+ type VoiceTextareaProps,
27
+ } from "./react/voice-textarea";
28
+ export {
29
+ VoiceContentEditable,
30
+ type VoiceContentEditableProps,
31
+ } from "./react/voice-content-editable";
@@ -0,0 +1,255 @@
1
+ import { useCallback, useEffect, useRef, useState } from "react";
2
+
3
+ /**
4
+ * Niskopoziomowa klasa do nagrywania audio. Trzymana jako moduł
5
+ * (nie hook) bo MediaRecorder ma własny lifecycle ortogonalny do React.
6
+ * Inspirowana legacy/packages/platform/src/utils/voice-recorder.ts (NDT,
7
+ * sprawdzona produkcyjnie).
8
+ */
9
+ class Recorder {
10
+ private mediaRecorder: MediaRecorder | null = null;
11
+ private stream: MediaStream | null = null;
12
+ private chunks: Blob[] = [];
13
+
14
+ async start(): Promise<void> {
15
+ this.stream = await navigator.mediaDevices.getUserMedia({
16
+ audio: { echoCancellation: true, noiseSuppression: true, sampleRate: 44100 },
17
+ });
18
+ // Fallback chain: webm/opus → mp4 → default. Whisper akceptuje wszystkie.
19
+ let mimeType: string | undefined = "audio/webm;codecs=opus";
20
+ if (!MediaRecorder.isTypeSupported(mimeType)) mimeType = "audio/mp4";
21
+ if (!MediaRecorder.isTypeSupported(mimeType)) mimeType = undefined;
22
+
23
+ this.mediaRecorder = new MediaRecorder(
24
+ this.stream,
25
+ mimeType ? { mimeType } : undefined,
26
+ );
27
+ this.chunks = [];
28
+ this.mediaRecorder.ondataavailable = (e) => {
29
+ if (e.data.size > 0) this.chunks.push(e.data);
30
+ };
31
+ // start(100) — zbieraj data co 100ms, lepszy responsiveness na cancel.
32
+ this.mediaRecorder.start(100);
33
+ }
34
+
35
+ stop(): Promise<Blob> {
36
+ return new Promise((resolve, reject) => {
37
+ const mr = this.mediaRecorder;
38
+ if (!mr) return reject(new Error("not recording"));
39
+ mr.onstop = () => {
40
+ const mime = mr.mimeType || "audio/webm";
41
+ const blob = new Blob(this.chunks, { type: mime });
42
+ this.cleanup();
43
+ resolve(blob);
44
+ };
45
+ mr.onerror = () => reject(new Error("recording error"));
46
+ mr.stop();
47
+ });
48
+ }
49
+
50
+ cancel(): void {
51
+ if (this.mediaRecorder?.state === "recording") {
52
+ try { this.mediaRecorder.stop(); } catch {}
53
+ }
54
+ this.cleanup();
55
+ }
56
+
57
+ private cleanup(): void {
58
+ if (this.stream) {
59
+ this.stream.getTracks().forEach((t) => t.stop());
60
+ this.stream = null;
61
+ }
62
+ this.mediaRecorder = null;
63
+ this.chunks = [];
64
+ }
65
+
66
+ static isSupported(): boolean {
67
+ return (
68
+ typeof navigator !== "undefined" &&
69
+ typeof navigator.mediaDevices?.getUserMedia === "function" &&
70
+ typeof MediaRecorder !== "undefined"
71
+ );
72
+ }
73
+ }
74
+
75
+ export type VoiceRecorderState =
76
+ | "idle"
77
+ | "recording"
78
+ | "processing"
79
+ | "error";
80
+
81
+ export interface UseVoiceRecorderOptions {
82
+ /** Endpoint do POST audio. Default `/route/voice/transcribe` (zgodne z arc-host prefix `/route` + voice path). */
83
+ apiUrl?: string;
84
+ /** Język ISO 639-1 (np. "pl"). Przekazany jako form field. */
85
+ language?: string;
86
+ /** Max długość nagrania (ms) — auto-stop po przekroczeniu. Default 60_000. */
87
+ maxDurationMs?: number;
88
+ /** Wywoływany gdy transkrypcja gotowa. */
89
+ onTranscript: (text: string) => void;
90
+ /** Wywoływany przy każdym błędzie (permission, network, API). */
91
+ onError?: (err: Error) => void;
92
+ }
93
+
94
+ export interface UseVoiceRecorderResult {
95
+ state: VoiceRecorderState;
96
+ /** Czas nagrywania w ms — rośnie podczas `state === "recording"`. */
97
+ elapsedMs: number;
98
+ /** Ostatni błąd (kasowany przy następnym start()). */
99
+ error: Error | null;
100
+ start: () => Promise<void>;
101
+ stop: () => Promise<void>;
102
+ cancel: () => void;
103
+ /** Sprawdza dostępność MediaRecorder + getUserMedia. */
104
+ isSupported: boolean;
105
+ }
106
+
107
+ const DEFAULT_API_URL = "/route/voice/transcribe";
108
+ const DEFAULT_MAX_MS = 60_000;
109
+ // Refresh elapsed time — 100ms wystarczy do gładkiej animacji koła postępu
110
+ // bez przeciążania React renderem.
111
+ const TICK_INTERVAL_MS = 100;
112
+
113
+ export function useVoiceRecorder(
114
+ options: UseVoiceRecorderOptions,
115
+ ): UseVoiceRecorderResult {
116
+ const {
117
+ apiUrl = DEFAULT_API_URL,
118
+ language,
119
+ maxDurationMs = DEFAULT_MAX_MS,
120
+ onTranscript,
121
+ onError,
122
+ } = options;
123
+
124
+ const [state, setState] = useState<VoiceRecorderState>("idle");
125
+ const [elapsedMs, setElapsedMs] = useState(0);
126
+ const [error, setError] = useState<Error | null>(null);
127
+
128
+ const recorderRef = useRef<Recorder | null>(null);
129
+ const startTimeRef = useRef<number>(0);
130
+ const tickRef = useRef<ReturnType<typeof setInterval> | null>(null);
131
+ const autoStopRef = useRef<ReturnType<typeof setTimeout> | null>(null);
132
+ // Callbacks w refach żeby start/stop nie potrzebowały ich w deps i nie
133
+ // re-tworzyły się przy każdym renderze konsumenta.
134
+ const onTranscriptRef = useRef(onTranscript);
135
+ const onErrorRef = useRef(onError);
136
+ onTranscriptRef.current = onTranscript;
137
+ onErrorRef.current = onError;
138
+
139
+ const clearTimers = useCallback(() => {
140
+ if (tickRef.current !== null) {
141
+ clearInterval(tickRef.current);
142
+ tickRef.current = null;
143
+ }
144
+ if (autoStopRef.current !== null) {
145
+ clearTimeout(autoStopRef.current);
146
+ autoStopRef.current = null;
147
+ }
148
+ }, []);
149
+
150
+ const handleError = useCallback(
151
+ (e: unknown) => {
152
+ const err = e instanceof Error ? e : new Error(String(e));
153
+ setError(err);
154
+ setState("error");
155
+ onErrorRef.current?.(err);
156
+ },
157
+ [],
158
+ );
159
+
160
+ const transcribe = useCallback(
161
+ async (audio: Blob) => {
162
+ setState("processing");
163
+ try {
164
+ const form = new FormData();
165
+ form.append("audio", audio);
166
+ if (language) form.append("language", language);
167
+ const res = await fetch(apiUrl, { method: "POST", body: form });
168
+ if (!res.ok) {
169
+ throw new Error(`transcribe failed: ${res.status} ${await res.text().catch(() => "")}`);
170
+ }
171
+ const text = await res.text();
172
+ setState("idle");
173
+ setElapsedMs(0);
174
+ onTranscriptRef.current(text.trim());
175
+ } catch (e) {
176
+ handleError(e);
177
+ }
178
+ },
179
+ [apiUrl, language, handleError],
180
+ );
181
+
182
+ const stop = useCallback(async () => {
183
+ clearTimers();
184
+ const rec = recorderRef.current;
185
+ if (!rec) return;
186
+ recorderRef.current = null;
187
+ try {
188
+ const blob = await rec.stop();
189
+ if (blob.size === 0) {
190
+ setState("idle");
191
+ setElapsedMs(0);
192
+ return;
193
+ }
194
+ await transcribe(blob);
195
+ } catch (e) {
196
+ handleError(e);
197
+ }
198
+ }, [clearTimers, transcribe, handleError]);
199
+
200
+ const start = useCallback(async () => {
201
+ if (state === "recording" || state === "processing") return;
202
+ setError(null);
203
+ setElapsedMs(0);
204
+ if (!Recorder.isSupported()) {
205
+ handleError(new Error("Voice recording not supported in this browser"));
206
+ return;
207
+ }
208
+ const rec = new Recorder();
209
+ try {
210
+ await rec.start();
211
+ } catch (e) {
212
+ handleError(e);
213
+ return;
214
+ }
215
+ recorderRef.current = rec;
216
+ startTimeRef.current = Date.now();
217
+ setState("recording");
218
+ tickRef.current = setInterval(() => {
219
+ setElapsedMs(Date.now() - startTimeRef.current);
220
+ }, TICK_INTERVAL_MS);
221
+ autoStopRef.current = setTimeout(() => {
222
+ // Po przekroczeniu max time — auto-stop + transkrypcja.
223
+ void stop();
224
+ }, maxDurationMs);
225
+ }, [state, maxDurationMs, stop, handleError]);
226
+
227
+ const cancel = useCallback(() => {
228
+ clearTimers();
229
+ const rec = recorderRef.current;
230
+ recorderRef.current = null;
231
+ rec?.cancel();
232
+ setState("idle");
233
+ setElapsedMs(0);
234
+ setError(null);
235
+ }, [clearTimers]);
236
+
237
+ // Cleanup gdy komponent zniknie podczas nagrywania (np. user nawiguje).
238
+ useEffect(() => {
239
+ return () => {
240
+ clearTimers();
241
+ recorderRef.current?.cancel();
242
+ recorderRef.current = null;
243
+ };
244
+ }, [clearTimers]);
245
+
246
+ return {
247
+ state,
248
+ elapsedMs,
249
+ error,
250
+ start,
251
+ stop,
252
+ cancel,
253
+ isSupported: Recorder.isSupported(),
254
+ };
255
+ }
@@ -0,0 +1,154 @@
1
+ import { Mic, MicOff, Loader2, Square } from "lucide-react";
2
+ import type { VoiceRecorderState } from "./use-voice-recorder";
3
+
4
+ export interface VoiceButtonProps {
5
+ state: VoiceRecorderState;
6
+ /** Czas nagrywania w ms — driver progress koła i MM:SS. */
7
+ elapsedMs: number;
8
+ /** Maksimum (ms) — odniesienie dla wypełnienia koła (0% → 100%). */
9
+ maxDurationMs: number;
10
+ /** Klik podczas idle = start, podczas recording = stop. */
11
+ onClick: () => void;
12
+ /** Klik cancel — pokazywany jako osobny przycisk obok podczas recording. */
13
+ onCancel?: () => void;
14
+ /** Komunikat błędu — tooltip nad ikoną MicOff. */
15
+ error?: Error | null;
16
+ className?: string;
17
+ }
18
+
19
+ // Constanty rysowania koła. Promień taki żeby SVG mieścił się w 32×32 z
20
+ // 2px stroke + 2px padding na pulsującą obwódkę.
21
+ const SIZE = 32;
22
+ const STROKE = 2.5;
23
+ const RADIUS = (SIZE - STROKE) / 2;
24
+ const CIRCUMFERENCE = 2 * Math.PI * RADIUS;
25
+
26
+ function formatMmSs(ms: number): string {
27
+ const total = Math.max(0, Math.floor(ms / 1000));
28
+ const m = Math.floor(total / 60);
29
+ const s = total % 60;
30
+ return `${m}:${String(s).padStart(2, "0")}`;
31
+ }
32
+
33
+ export function VoiceButton({
34
+ state,
35
+ elapsedMs,
36
+ maxDurationMs,
37
+ onClick,
38
+ onCancel,
39
+ error,
40
+ className,
41
+ }: VoiceButtonProps) {
42
+ const recording = state === "recording";
43
+ const processing = state === "processing";
44
+ const errored = state === "error";
45
+
46
+ if (recording) {
47
+ const progress = Math.min(1, elapsedMs / maxDurationMs);
48
+ const dashOffset = CIRCUMFERENCE * (1 - progress);
49
+ return (
50
+ <div className={`flex items-center gap-1 ${className ?? ""}`}>
51
+ {onCancel && (
52
+ <button
53
+ type="button"
54
+ onClick={onCancel}
55
+ aria-label="Anuluj nagrywanie"
56
+ title="Anuluj"
57
+ className="flex h-7 w-7 items-center justify-center rounded-full text-muted-foreground/70 transition-colors hover:bg-muted hover:text-foreground"
58
+ >
59
+ <span className="text-xs">✕</span>
60
+ </button>
61
+ )}
62
+ <button
63
+ type="button"
64
+ onClick={onClick}
65
+ aria-label="Zakończ nagrywanie"
66
+ className="relative inline-flex items-center justify-center"
67
+ style={{ width: SIZE, height: SIZE }}
68
+ >
69
+ <svg
70
+ width={SIZE}
71
+ height={SIZE}
72
+ className="absolute inset-0 -rotate-90"
73
+ aria-hidden
74
+ >
75
+ {/* Tło — pełne koło, niska opacity */}
76
+ <circle
77
+ cx={SIZE / 2}
78
+ cy={SIZE / 2}
79
+ r={RADIUS}
80
+ fill="none"
81
+ stroke="currentColor"
82
+ strokeWidth={STROKE}
83
+ className="text-muted-foreground/20"
84
+ />
85
+ {/* Progress — wypełnia się w miarę elapsedMs */}
86
+ <circle
87
+ cx={SIZE / 2}
88
+ cy={SIZE / 2}
89
+ r={RADIUS}
90
+ fill="none"
91
+ stroke="currentColor"
92
+ strokeWidth={STROKE}
93
+ strokeLinecap="round"
94
+ strokeDasharray={CIRCUMFERENCE}
95
+ strokeDashoffset={dashOffset}
96
+ className="text-primary transition-[stroke-dashoffset] duration-100 ease-linear"
97
+ />
98
+ </svg>
99
+ {/* Pulsująca obwódka — sygnał że nagrywanie aktywne */}
100
+ <span
101
+ className="absolute inset-0 rounded-full bg-primary/20 animate-ping"
102
+ style={{ animationDuration: "1.5s" }}
103
+ aria-hidden
104
+ />
105
+ {/* Środek — kwadracik stop + timer pod spodem */}
106
+ <Square className="relative h-3 w-3 fill-primary text-primary" />
107
+ </button>
108
+ <span className="text-xs font-medium tabular-nums text-muted-foreground min-w-[2.5rem]">
109
+ {formatMmSs(elapsedMs)}
110
+ </span>
111
+ </div>
112
+ );
113
+ }
114
+
115
+ if (processing) {
116
+ return (
117
+ <button
118
+ type="button"
119
+ disabled
120
+ aria-label="Transkrybuję…"
121
+ className={`inline-flex h-7 w-7 items-center justify-center rounded-full text-primary ${className ?? ""}`}
122
+ >
123
+ <Loader2 className="h-4 w-4 animate-spin" />
124
+ </button>
125
+ );
126
+ }
127
+
128
+ if (errored) {
129
+ return (
130
+ <button
131
+ type="button"
132
+ onClick={onClick}
133
+ aria-label="Spróbuj ponownie nagrać"
134
+ title={error?.message ?? "Błąd nagrywania"}
135
+ className={`inline-flex h-7 w-7 items-center justify-center rounded-full text-destructive transition-colors hover:bg-destructive/10 ${className ?? ""}`}
136
+ >
137
+ <MicOff className="h-4 w-4" />
138
+ </button>
139
+ );
140
+ }
141
+
142
+ // idle
143
+ return (
144
+ <button
145
+ type="button"
146
+ onClick={onClick}
147
+ aria-label="Nagraj głosowo"
148
+ title="Nagraj głosowo"
149
+ className={`inline-flex h-7 w-7 items-center justify-center rounded-full text-muted-foreground/70 transition-colors hover:bg-muted hover:text-foreground ${className ?? ""}`}
150
+ >
151
+ <Mic className="h-4 w-4" />
152
+ </button>
153
+ );
154
+ }
@@ -0,0 +1,130 @@
1
+ import { useEffect, useRef } from "react";
2
+ import { useI18n } from "@arcote.tech/platform";
3
+ import { useVoiceRecorder } from "./use-voice-recorder";
4
+ import { VoiceButton } from "./voice-button";
5
+
6
+ export interface VoiceContentEditableProps {
7
+ value: string;
8
+ onChange: (value: string) => void;
9
+ placeholder?: string;
10
+ className?: string;
11
+ /** ISO 639-1 (np. "pl"). Default — z `useI18n().locale`. */
12
+ language?: string;
13
+ /** Max długość nagrania w ms. Default 60_000. */
14
+ maxDurationMs?: number;
15
+ /** Override endpointu transkrypcji. Default `/route/voice/transcribe`. */
16
+ transcribeUrl?: string;
17
+ }
18
+
19
+ const DEFAULT_MAX_MS = 60_000;
20
+
21
+ /**
22
+ * Auto-grow contentEditable — bez wewnętrznego scrolla, height rośnie z
23
+ * treścią. Sensowny dla pól typu "opisz dłuższą myśl" gdzie chcemy widzieć
24
+ * całość bez maxHeight. Voice button w prawym górnym rogu (absolute), tekst
25
+ * z padding-right żeby nie wchodził pod ikonkę.
26
+ *
27
+ * Implementacja podobna do `TextareaField` (`/ds/form/fields/textarea-field.tsx`),
28
+ * ale bez `maxHeight` / scrolla — div sam rośnie.
29
+ */
30
+ export function VoiceContentEditable({
31
+ value,
32
+ onChange,
33
+ placeholder,
34
+ className,
35
+ language,
36
+ maxDurationMs = DEFAULT_MAX_MS,
37
+ transcribeUrl,
38
+ }: VoiceContentEditableProps) {
39
+ const ref = useRef<HTMLDivElement>(null);
40
+ const composingRef = useRef(false);
41
+ const locale = useLocaleLanguage();
42
+
43
+ const { state, elapsedMs, error, start, stop, cancel } = useVoiceRecorder({
44
+ apiUrl: transcribeUrl,
45
+ language: language ?? locale,
46
+ maxDurationMs,
47
+ onTranscript: (text) => {
48
+ const trimmed = text.trim();
49
+ if (!trimmed) return;
50
+ onChange(value ? `${value.trimEnd()} ${trimmed}` : trimmed);
51
+ },
52
+ });
53
+
54
+ // Sync zewnętrznego value → DOM. Zapisujemy w innerText (plaintext)
55
+ // żeby uniknąć HTML escapingu i niespójności caret.
56
+ useEffect(() => {
57
+ if (!ref.current) return;
58
+ if (ref.current.innerText !== (value ?? "")) {
59
+ ref.current.innerText = value ?? "";
60
+ }
61
+ }, [value]);
62
+
63
+ const handleInput = () => {
64
+ if (composingRef.current) return;
65
+ onChange(ref.current?.innerText ?? "");
66
+ };
67
+
68
+ // Paste jako plaintext — bez stylów z innej apki.
69
+ const handlePaste = (e: React.ClipboardEvent) => {
70
+ e.preventDefault();
71
+ const text = e.clipboardData.getData("text/plain");
72
+ document.execCommand("insertText", false, text);
73
+ };
74
+
75
+ const recording = state === "recording";
76
+ const isEmpty = !value;
77
+
78
+ return (
79
+ <div className={`relative ${className ?? ""}`}>
80
+ <div
81
+ ref={ref}
82
+ contentEditable
83
+ onInput={handleInput}
84
+ onPaste={handlePaste}
85
+ onCompositionStart={() => (composingRef.current = true)}
86
+ onCompositionEnd={() => {
87
+ composingRef.current = false;
88
+ handleInput();
89
+ }}
90
+ suppressContentEditableWarning
91
+ className={
92
+ // `whitespace-pre-wrap` + `break-words` (overflow-wrap: break-word)
93
+ // łamie długie słowa gdy nie mieszczą się w linii. Arbitrary
94
+ // `[overflow-wrap:anywhere]` jest bardziej agresywne i łapie
95
+ // edge case'y typu długie URL-e / sklejone wyrazy bez spacji.
96
+ "min-h-[6rem] w-full rounded-md border border-input bg-background px-3 py-2 text-sm whitespace-pre-wrap break-words [overflow-wrap:anywhere] focus:outline-none focus:ring-2 focus:ring-ring " +
97
+ (recording ? "pr-28" : "pr-10")
98
+ }
99
+ data-placeholder={placeholder}
100
+ />
101
+ {isEmpty && placeholder && (
102
+ <span
103
+ aria-hidden
104
+ className="pointer-events-none absolute left-3 top-2 text-sm text-muted-foreground"
105
+ >
106
+ {placeholder}
107
+ </span>
108
+ )}
109
+ <div className="absolute top-1.5 right-1.5">
110
+ <VoiceButton
111
+ state={state}
112
+ elapsedMs={elapsedMs}
113
+ maxDurationMs={maxDurationMs}
114
+ error={error}
115
+ onClick={recording ? stop : start}
116
+ onCancel={recording ? cancel : undefined}
117
+ />
118
+ </div>
119
+ </div>
120
+ );
121
+ }
122
+
123
+ function useLocaleLanguage(): string | undefined {
124
+ try {
125
+ const { locale } = useI18n();
126
+ return locale ? locale.split("-")[0] : undefined;
127
+ } catch {
128
+ return undefined;
129
+ }
130
+ }
@@ -0,0 +1,87 @@
1
+ import { Input } from "@arcote.tech/arc-ds";
2
+ import { useI18n } from "@arcote.tech/platform";
3
+ import type { ComponentType } from "react";
4
+ import { useVoiceRecorder } from "./use-voice-recorder";
5
+ import { VoiceButton } from "./voice-button";
6
+
7
+ export interface VoiceTextInputProps {
8
+ value: string;
9
+ onChange: (value: string) => void;
10
+ placeholder?: string;
11
+ icon?: ComponentType<{ className?: string }>;
12
+ size?: "default" | "sm" | "xs" | "lg";
13
+ className?: string;
14
+ /** ISO 639-1 (np. "pl"). Default — z `useI18n().locale` (np. `pl-PL` → `pl`). */
15
+ language?: string;
16
+ /** Max długość nagrania w ms. Default 60_000. */
17
+ maxDurationMs?: number;
18
+ /** Override endpointu transkrypcji. Default `/route/voice/transcribe`. */
19
+ transcribeUrl?: string;
20
+ }
21
+
22
+ const DEFAULT_MAX_MS = 60_000;
23
+
24
+ /**
25
+ * Jednoliniowy `Input` z przyciskiem nagrywania głosu w prawej części
26
+ * (vertically centered). Transkrypt jest **dopisywany** do bieżącego
27
+ * `value` (ze spacją gdy value nie pusty) — pozwala dyktować w kawałkach.
28
+ */
29
+ export function VoiceTextInput({
30
+ value,
31
+ onChange,
32
+ placeholder,
33
+ icon,
34
+ size = "default",
35
+ className,
36
+ language,
37
+ maxDurationMs = DEFAULT_MAX_MS,
38
+ transcribeUrl,
39
+ }: VoiceTextInputProps) {
40
+ const locale = useLocaleLanguage();
41
+ const { state, elapsedMs, error, start, stop, cancel } = useVoiceRecorder({
42
+ apiUrl: transcribeUrl,
43
+ language: language ?? locale,
44
+ maxDurationMs,
45
+ onTranscript: (text) => {
46
+ const trimmed = text.trim();
47
+ if (!trimmed) return;
48
+ onChange(value ? `${value.trimEnd()} ${trimmed}` : trimmed);
49
+ },
50
+ });
51
+
52
+ const recording = state === "recording";
53
+
54
+ return (
55
+ <div className={`relative ${className ?? ""}`}>
56
+ <Input
57
+ value={value}
58
+ onChange={(e) => onChange(e.target.value)}
59
+ placeholder={placeholder}
60
+ icon={icon}
61
+ size={size}
62
+ // Padding-right zostawiamy miejsce na przycisk + ewentualny timer.
63
+ className={recording ? "pr-24" : "pr-10"}
64
+ />
65
+ <div className="absolute right-2 top-1/2 -translate-y-1/2">
66
+ <VoiceButton
67
+ state={state}
68
+ elapsedMs={elapsedMs}
69
+ maxDurationMs={maxDurationMs}
70
+ error={error}
71
+ onClick={recording ? stop : start}
72
+ onCancel={recording ? cancel : undefined}
73
+ />
74
+ </div>
75
+ </div>
76
+ );
77
+ }
78
+
79
+ // `pl-PL` → `pl`. Whisper akceptuje BCP-47 ale ISO 639-1 jest bezpieczniejsze.
80
+ function useLocaleLanguage(): string | undefined {
81
+ try {
82
+ const { locale } = useI18n();
83
+ return locale ? locale.split("-")[0] : undefined;
84
+ } catch {
85
+ return undefined;
86
+ }
87
+ }
@@ -0,0 +1,98 @@
1
+ import { TextareaField } from "@arcote.tech/arc-ds";
2
+ import { useI18n } from "@arcote.tech/platform";
3
+ import type { ReactNode } from "react";
4
+ import { useVoiceRecorder } from "./use-voice-recorder";
5
+ import { VoiceButton } from "./voice-button";
6
+
7
+ export interface VoiceTextareaProps {
8
+ value: string;
9
+ onChange: (value: string) => void;
10
+ placeholder?: string;
11
+ label?: ReactNode;
12
+ rows?: number;
13
+ maxHeight?: number;
14
+ className?: string;
15
+ /** ISO 639-1 (np. "pl"). Default — z `useI18n().locale`. */
16
+ language?: string;
17
+ /** Max długość nagrania w ms. Default 60_000. */
18
+ maxDurationMs?: number;
19
+ /** Override endpointu transkrypcji. Default `/route/voice/transcribe`. */
20
+ transcribeUrl?: string;
21
+ }
22
+
23
+ const DEFAULT_MAX_MS = 60_000;
24
+
25
+ /**
26
+ * Wieloliniowy edytor `TextareaField` z przyciskiem nagrywania w prawym
27
+ * GÓRNYM rogu. Transkrypt jest **dopisywany** do bieżącego `value`.
28
+ * Dla użycia w chacie zostaw `rows=1` — TextareaField auto-rośnie z treścią.
29
+ */
30
+ export function VoiceTextarea({
31
+ value,
32
+ onChange,
33
+ placeholder,
34
+ label,
35
+ rows = 4,
36
+ maxHeight,
37
+ className,
38
+ language,
39
+ maxDurationMs = DEFAULT_MAX_MS,
40
+ transcribeUrl,
41
+ }: VoiceTextareaProps) {
42
+ const locale = useLocaleLanguage();
43
+ const { state, elapsedMs, error, start, stop, cancel } = useVoiceRecorder({
44
+ apiUrl: transcribeUrl,
45
+ language: language ?? locale,
46
+ maxDurationMs,
47
+ onTranscript: (text) => {
48
+ const trimmed = text.trim();
49
+ if (!trimmed) return;
50
+ onChange(value ? `${value.trimEnd()} ${trimmed}` : trimmed);
51
+ },
52
+ });
53
+
54
+ const recording = state === "recording";
55
+
56
+ // Pojedyncza linia (np. chat-input rows=1) — button vertically centered.
57
+ // Wieloliniowo — button kotwiczony w prawym górnym, przy pierwszej linii.
58
+ const isSingleLine = (rows ?? 1) <= 1;
59
+ const buttonPosition = isSingleLine
60
+ ? "top-1/2 -translate-y-1/2 right-1.5"
61
+ : "top-1.5 right-1.5";
62
+
63
+ return (
64
+ <div className={`relative ${className ?? ""}`}>
65
+ <TextareaField
66
+ value={value}
67
+ onChange={(val) => onChange(val ?? "")}
68
+ placeholder={placeholder}
69
+ label={label}
70
+ rows={rows}
71
+ maxHeight={maxHeight}
72
+ // Padding-right na samym contentEditable żeby tekst nie wchodził
73
+ // pod absolute-positioned VoiceButton. Recording wyświetla mic +
74
+ // MM:SS timer (szerszy widget) — większy padding.
75
+ inputClassName={recording ? "pr-28" : "pr-10"}
76
+ />
77
+ <div className={`absolute ${buttonPosition}`}>
78
+ <VoiceButton
79
+ state={state}
80
+ elapsedMs={elapsedMs}
81
+ maxDurationMs={maxDurationMs}
82
+ error={error}
83
+ onClick={recording ? stop : start}
84
+ onCancel={recording ? cancel : undefined}
85
+ />
86
+ </div>
87
+ </div>
88
+ );
89
+ }
90
+
91
+ function useLocaleLanguage(): string | undefined {
92
+ try {
93
+ const { locale } = useI18n();
94
+ return locale ? locale.split("-")[0] : undefined;
95
+ } catch {
96
+ return undefined;
97
+ }
98
+ }
@@ -0,0 +1,94 @@
1
+ /// <reference path="../arc.d.ts" />
2
+ import { route, type ArcTokenAny } from "@arcote.tech/arc";
3
+ import type { TranscriptionProvider } from "../types";
4
+
5
+ export interface TranscribeRouteConfig {
6
+ provider: TranscriptionProvider;
7
+ /** Domyślny język gdy klient nie przekaże `language` w form-data. */
8
+ defaultLanguage?: string;
9
+ /**
10
+ * Opcjonalny gating endpointu. Konsumer przekazuje swój token (np.
11
+ * `userToken` z arc-auth) i funkcję check (zwracającą `true` lub
12
+ * `{ ...whereClause }`). Bez tego endpoint jest `.public()` — koszty
13
+ * API są wtedy eksponowane na każdego, więc rekomendacja: zawsze
14
+ * przekazać token.
15
+ */
16
+ protectBy?: {
17
+ token: ArcTokenAny;
18
+ check?: (params: any) => boolean | object;
19
+ };
20
+ }
21
+
22
+ const MAX_AUDIO_BYTES = 25 * 1024 * 1024; // Whisper limit = 25MB
23
+
24
+ /**
25
+ * POST /route/voice/transcribe
26
+ *
27
+ * Multipart body:
28
+ * - `audio` (Blob, required) — webm/opus, mp4, wav, mp3, m4a, ogg
29
+ * - `language` (string, optional) — ISO 639-1 (np. "pl"), nadpisuje default
30
+ *
31
+ * Response: plain text z transkrypcją (200) albo `{ error }` JSON (4xx/5xx).
32
+ */
33
+ export function createTranscribeRoute(config: TranscribeRouteConfig) {
34
+ const base = route("voiceTranscribe").path("/voice/transcribe");
35
+ // `.public()` i `.protectBy()` zwracają niezgodne typy (isPublic: true vs
36
+ // protections w typie), więc rozdzielamy branche zamiast reassignować let.
37
+ const gated = config.protectBy
38
+ ? base.protectBy(
39
+ config.protectBy.token,
40
+ (config.protectBy.check ?? (() => true)) as any,
41
+ )
42
+ : base.public();
43
+
44
+ return gated.handle({
45
+ POST: async (_ctx, req: Request) => {
46
+ if (!ONLY_SERVER) {
47
+ return new Response("server only", { status: 500 });
48
+ }
49
+
50
+ let form: FormData;
51
+ try {
52
+ form = await req.formData();
53
+ } catch (e) {
54
+ return jsonError(400, "invalid multipart body");
55
+ }
56
+
57
+ const audio = form.get("audio");
58
+ if (!(audio instanceof Blob)) {
59
+ return jsonError(400, "missing 'audio' field");
60
+ }
61
+ if (audio.size === 0) {
62
+ return jsonError(400, "empty audio");
63
+ }
64
+ if (audio.size > MAX_AUDIO_BYTES) {
65
+ return jsonError(413, `audio exceeds ${MAX_AUDIO_BYTES} bytes`);
66
+ }
67
+
68
+ const language =
69
+ (form.get("language") as string | null) ?? config.defaultLanguage;
70
+
71
+ try {
72
+ const text = await config.provider.transcribe(
73
+ audio,
74
+ language ? { language } : undefined,
75
+ );
76
+ return new Response(text, {
77
+ status: 200,
78
+ headers: { "Content-Type": "text/plain; charset=utf-8" },
79
+ });
80
+ } catch (e) {
81
+ const msg = e instanceof Error ? e.message : "transcription failed";
82
+ console.error("[voice:transcribe]", msg);
83
+ return jsonError(502, msg);
84
+ }
85
+ },
86
+ });
87
+ }
88
+
89
+ function jsonError(status: number, error: string): Response {
90
+ return new Response(JSON.stringify({ error }), {
91
+ status,
92
+ headers: { "Content-Type": "application/json" },
93
+ });
94
+ }
package/src/types.ts ADDED
@@ -0,0 +1,23 @@
1
+ // Abstrakcja providera transkrypcji audio → tekst. Konkretne adaptery
2
+ // (whisper, google-speech itp.) eksportują fabrykę zwracającą instancję
3
+ // tej struktury. `voice({ provider })` wstrzykuje wybranego providera do
4
+ // route handlera `/voice/transcribe`.
5
+
6
+ export interface TranscriptionOptions {
7
+ /**
8
+ * Język nagrania w formacie ISO 639-1 (np. "pl", "en"). Większość
9
+ * providerów potrafi auto-detekować, ale podanie języka znacząco
10
+ * poprawia accuracy dla krótkich nagrań.
11
+ */
12
+ language?: string;
13
+ }
14
+
15
+ export interface TranscriptionProvider {
16
+ /** Identyfikator providera — używany w logach i diagnozie. */
17
+ name: string;
18
+ /**
19
+ * Transkrybuje audio (webm/opus, mp4, wav, mp3...) na tekst.
20
+ * Powinien rzucić błąd przy nieprawidłowym formacie / awarii API.
21
+ */
22
+ transcribe(audio: Blob, options?: TranscriptionOptions): Promise<string>;
23
+ }
@@ -0,0 +1,36 @@
1
+ import { contextElement, module } from "@arcote.tech/platform";
2
+ import {
3
+ createTranscribeRoute,
4
+ type TranscribeRouteConfig,
5
+ } from "./routes/transcribe-route";
6
+
7
+ export interface VoiceConfig extends TranscribeRouteConfig {}
8
+
9
+ /**
10
+ * Rejestruje moduł `voice` z route `/voice/transcribe`. Konsumer wywołuje raz
11
+ * w init aplikacji:
12
+ *
13
+ * ```ts
14
+ * voice({
15
+ * provider: whisper({ apiKey: process.env.OPENAI_API_KEY! }),
16
+ * defaultLanguage: "pl",
17
+ * protectBy: { token: userToken },
18
+ * }).build();
19
+ * ```
20
+ *
21
+ * Maksymalny czas nagrania (`maxDurationMs`) kontroluje hook
22
+ * `useVoiceRecorder` — `VoiceTextarea`/`VoiceTextInput`/`VoiceContentEditable`
23
+ * przyjmują go z propsa, więc konsumer ustawia go per-komponent.
24
+ */
25
+ export function voice(config: VoiceConfig) {
26
+ const transcribeRoute = createTranscribeRoute(config);
27
+ return {
28
+ route: transcribeRoute,
29
+ /** Rejestruje moduł "voice" z transcribe route jako jednym fragmentem. */
30
+ build() {
31
+ return module("voice")
32
+ .public([contextElement(transcribeRoute)])
33
+ .build();
34
+ },
35
+ };
36
+ }
package/tsconfig.json ADDED
@@ -0,0 +1,4 @@
1
+ {
2
+ "extends": "../../../../tsconfig.json",
3
+ "include": ["src/**/*"]
4
+ }