@apifuse/provider-sdk 2.1.0-beta.3 → 2.1.0-beta.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AUTHORING.md +187 -8
- package/CHANGELOG.md +13 -1
- package/README.md +40 -18
- package/SUBMISSION.md +4 -4
- package/bin/apifuse-dev.ts +12 -5
- package/bin/apifuse-pack-check.ts +9 -2
- package/bin/apifuse-pack-smoke.ts +127 -6
- package/bin/apifuse-perf.ts +76 -31
- package/bin/apifuse-record.ts +148 -94
- package/bin/apifuse-submit-check.ts +243 -7
- package/bin/apifuse.ts +1 -1
- package/package.json +17 -8
- package/src/choice-token.ts +164 -0
- package/src/cli/commands.ts +4 -7
- package/src/cli/create.ts +180 -51
- package/src/cli/templates/provider/.dockerignore.tpl +22 -0
- package/src/cli/templates/provider/.gitignore.tpl +22 -0
- package/src/cli/templates/provider/README.md.tpl +42 -7
- package/src/cli/templates/provider/dev.ts.tpl +1 -1
- package/src/cli/templates/provider/domain/README.md.tpl +3 -0
- package/src/cli/templates/provider/index.ts.tpl +5 -47
- package/src/cli/templates/provider/mappers/README.md.tpl +3 -0
- package/src/cli/templates/provider/meta.ts.tpl +7 -0
- package/src/cli/templates/provider/operations/index.ts.tpl +5 -0
- package/src/cli/templates/provider/operations/ping.ts.tpl +23 -0
- package/src/cli/templates/provider/schemas/ping.ts.tpl +16 -0
- package/src/cli/templates/provider/start.ts.tpl +1 -1
- package/src/cli/templates/provider/upstream/README.md.tpl +3 -0
- package/src/config/loader.ts +1206 -9
- package/src/define.ts +1620 -106
- package/src/errors.ts +12 -0
- package/src/i18n/catalog.ts +121 -0
- package/src/i18n/index.ts +2 -0
- package/src/i18n/keys.ts +64 -0
- package/src/index.ts +149 -8
- package/src/lint.ts +306 -51
- package/src/observability.ts +41 -0
- package/src/provider.ts +60 -3
- package/src/public-schema-field-lint.ts +237 -0
- package/src/runtime/auth-flow.ts +7 -0
- package/src/runtime/browser.ts +77 -21
- package/src/runtime/cache.ts +582 -0
- package/src/runtime/executor.ts +13 -1
- package/src/runtime/http.ts +939 -195
- package/src/runtime/insights.ts +11 -11
- package/src/runtime/instrumentation.ts +12 -4
- package/src/runtime/key-derivation.ts +1 -1
- package/src/runtime/keyring.ts +4 -3
- package/src/runtime/proxy-errors.ts +132 -0
- package/src/runtime/proxy-telemetry.ts +253 -0
- package/src/runtime/request-options.ts +66 -0
- package/src/runtime/state.ts +76 -0
- package/src/runtime/stealth.ts +1145 -0
- package/src/runtime/stt.ts +629 -0
- package/src/runtime/trace.ts +1 -1
- package/src/schema.ts +363 -1
- package/src/server/serve.ts +816 -58
- package/src/server/types.ts +35 -0
- package/src/stream.ts +210 -0
- package/src/testing/run.ts +17 -4
- package/src/types.ts +876 -53
- package/src/runtime/tls.ts +0 -434
- package/src/types/playwright-stealth.d.ts +0 -9
|
@@ -0,0 +1,629 @@
|
|
|
1
|
+
import { ProviderError, TransportError, ValidationError } from "../errors";
|
|
2
|
+
import type {
|
|
3
|
+
Bcp47Locale,
|
|
4
|
+
ProviderSttConfig,
|
|
5
|
+
SttAudioInput,
|
|
6
|
+
SttContext,
|
|
7
|
+
SttPromptPolicy,
|
|
8
|
+
SttSegment,
|
|
9
|
+
SttTranscribeRequest,
|
|
10
|
+
SttTranscript,
|
|
11
|
+
SttUnsupportedOptionPolicy,
|
|
12
|
+
SttVerificationCodeOptions,
|
|
13
|
+
VerificationCodeCandidate,
|
|
14
|
+
VerificationCodeCandidateSource,
|
|
15
|
+
VerificationCodeExtractionResult,
|
|
16
|
+
} from "../types";
|
|
17
|
+
|
|
18
|
+
export const APIFUSE__STT__BACKEND_ENV = "APIFUSE__STT__BACKEND";
|
|
19
|
+
export const APIFUSE__STT__MODEL_ENV = "APIFUSE__STT__MODEL";
|
|
20
|
+
export const CLOUDFLARE_ACCOUNT_ID_ENV = "APIFUSE__CLOUDFLARE__ACCOUNT_ID";
|
|
21
|
+
export const APIFUSE__STT__CLOUDFLARE_API_TOKEN_ENV =
|
|
22
|
+
"APIFUSE__STT__CLOUDFLARE_API_TOKEN";
|
|
23
|
+
export const CLOUDFLARE_WORKERS_AI_STT_BACKEND = "cloudflare-workers-ai";
|
|
24
|
+
export const DEFAULT_CLOUDFLARE_WORKERS_AI_STT_MODEL =
|
|
25
|
+
"@cf/openai/whisper-large-v3-turbo";
|
|
26
|
+
export const DEFAULT_STT_MAX_AUDIO_BYTES = 10 * 1024 * 1024;
|
|
27
|
+
export const DEFAULT_STT_TIMEOUT_MS = 30_000;
|
|
28
|
+
|
|
29
|
+
const BASE64_AUDIO_PATTERN = /^[A-Za-z0-9+/]+={0,2}$/;
|
|
30
|
+
const DEFAULT_OTP_HINT =
|
|
31
|
+
"Transcribe verification codes using Arabic numerals only. Preserve leading zeros and spacing.";
|
|
32
|
+
|
|
33
|
+
type EnvLike = Record<string, string | undefined>;
|
|
34
|
+
|
|
35
|
+
type CloudflareWorkersAiSttClientOptions = {
|
|
36
|
+
accountId: string;
|
|
37
|
+
apiToken: string;
|
|
38
|
+
model?: string;
|
|
39
|
+
fetch?: typeof fetch;
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
type ErrorSttClientOptions = {
|
|
43
|
+
code: string;
|
|
44
|
+
message: string;
|
|
45
|
+
fix?: string;
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
function providerError(
|
|
49
|
+
message: string,
|
|
50
|
+
options: { code: string; fix?: string },
|
|
51
|
+
): ProviderError {
|
|
52
|
+
return new ProviderError(message, options);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function createErrorSttClient(options: ErrorSttClientOptions): SttContext {
|
|
56
|
+
return {
|
|
57
|
+
async transcribe() {
|
|
58
|
+
throw providerError(options.message, {
|
|
59
|
+
code: options.code,
|
|
60
|
+
fix: options.fix,
|
|
61
|
+
});
|
|
62
|
+
},
|
|
63
|
+
extractVerificationCode,
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export function createUnsupportedSttClient(reason?: string): SttContext {
|
|
68
|
+
return createErrorSttClient({
|
|
69
|
+
code: "STT_UNAVAILABLE",
|
|
70
|
+
message: reason ?? "STT runtime is not configured",
|
|
71
|
+
fix: `Configure ${APIFUSE__STT__BACKEND_ENV} and the matching backend credentials, or provide a test SttContext override.`,
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function normalizedEnvValue(env: EnvLike, key: string): string | undefined {
|
|
76
|
+
const value = env[key]?.trim();
|
|
77
|
+
return value ? value : undefined;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export function createSttClientFromEnv(
|
|
81
|
+
config: ProviderSttConfig | undefined,
|
|
82
|
+
env: EnvLike = process.env,
|
|
83
|
+
): SttContext {
|
|
84
|
+
if (!config) {
|
|
85
|
+
return createUnsupportedSttClient(
|
|
86
|
+
"Provider does not declare STT capability",
|
|
87
|
+
);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
const backend = normalizedEnvValue(env, APIFUSE__STT__BACKEND_ENV);
|
|
91
|
+
if (!backend) {
|
|
92
|
+
return createUnsupportedSttClient(
|
|
93
|
+
config.mode === "required"
|
|
94
|
+
? `STT is required by this provider but ${APIFUSE__STT__BACKEND_ENV} is not configured`
|
|
95
|
+
: undefined,
|
|
96
|
+
);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
if (backend !== CLOUDFLARE_WORKERS_AI_STT_BACKEND) {
|
|
100
|
+
return createErrorSttClient({
|
|
101
|
+
code: "UNSUPPORTED_STT_BACKEND",
|
|
102
|
+
message: `Unsupported STT backend "${backend}"`,
|
|
103
|
+
fix: `Use ${APIFUSE__STT__BACKEND_ENV}=${CLOUDFLARE_WORKERS_AI_STT_BACKEND} or provide a custom SttContext override.`,
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
const accountId = env.APIFUSE__CLOUDFLARE__ACCOUNT_ID?.trim();
|
|
108
|
+
const apiToken = env.APIFUSE__STT__CLOUDFLARE_API_TOKEN?.trim();
|
|
109
|
+
if (!accountId || !apiToken) {
|
|
110
|
+
return createUnsupportedSttClient(
|
|
111
|
+
`STT backend ${backend} requires ${CLOUDFLARE_ACCOUNT_ID_ENV} and ${APIFUSE__STT__CLOUDFLARE_API_TOKEN_ENV}`,
|
|
112
|
+
);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
return createCloudflareWorkersAiSttClient({
|
|
116
|
+
accountId,
|
|
117
|
+
apiToken,
|
|
118
|
+
model:
|
|
119
|
+
normalizedEnvValue(env, APIFUSE__STT__MODEL_ENV) ??
|
|
120
|
+
DEFAULT_CLOUDFLARE_WORKERS_AI_STT_MODEL,
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function base64ByteLength(data: string): number {
|
|
125
|
+
const normalized = data.trim();
|
|
126
|
+
const padding = normalized.endsWith("==")
|
|
127
|
+
? 2
|
|
128
|
+
: normalized.endsWith("=")
|
|
129
|
+
? 1
|
|
130
|
+
: 0;
|
|
131
|
+
return Math.floor((normalized.length * 3) / 4) - padding;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function assertBase64Audio(
|
|
135
|
+
audio: SttAudioInput,
|
|
136
|
+
maxAudioBytes: number | undefined,
|
|
137
|
+
): number {
|
|
138
|
+
if (audio.kind !== "base64") {
|
|
139
|
+
throw new ValidationError("Unsupported STT audio input kind", {
|
|
140
|
+
code: "UNSUPPORTED_STT_OPTION",
|
|
141
|
+
fix: 'Use audio: { kind: "base64", data, mediaType } for STT v1.',
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
const data = audio.data.trim();
|
|
145
|
+
if (
|
|
146
|
+
data.length === 0 ||
|
|
147
|
+
data.length % 4 !== 0 ||
|
|
148
|
+
!BASE64_AUDIO_PATTERN.test(data)
|
|
149
|
+
) {
|
|
150
|
+
throw new ValidationError(
|
|
151
|
+
"STT audio.data must be a base64-encoded string",
|
|
152
|
+
{
|
|
153
|
+
code: "INVALID_STT_AUDIO",
|
|
154
|
+
},
|
|
155
|
+
);
|
|
156
|
+
}
|
|
157
|
+
const bytes = base64ByteLength(data);
|
|
158
|
+
const maxBytes = maxAudioBytes ?? DEFAULT_STT_MAX_AUDIO_BYTES;
|
|
159
|
+
if (bytes > maxBytes) {
|
|
160
|
+
throw new ValidationError(
|
|
161
|
+
`STT audio exceeds maxAudioBytes (${bytes} > ${maxBytes})`,
|
|
162
|
+
{ code: "STT_AUDIO_TOO_LARGE" },
|
|
163
|
+
);
|
|
164
|
+
}
|
|
165
|
+
return bytes;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
export function resolveSttPrompt(
|
|
169
|
+
request: SttTranscribeRequest,
|
|
170
|
+
): string | undefined {
|
|
171
|
+
const policy = effectivePromptPolicy(request);
|
|
172
|
+
if (policy === "none") return undefined;
|
|
173
|
+
if (policy === "default-hint") return DEFAULT_OTP_HINT;
|
|
174
|
+
return request.initialPrompt;
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
function effectivePromptPolicy(request: SttTranscribeRequest): SttPromptPolicy {
|
|
178
|
+
if (request.promptPolicy) return request.promptPolicy;
|
|
179
|
+
return request.mode === "otp" ? "default-hint" : "none";
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
function warnOrThrowUnsupportedOption(
|
|
183
|
+
request: SttTranscribeRequest,
|
|
184
|
+
message: string,
|
|
185
|
+
): { code: "UNSUPPORTED_STT_OPTION"; message: string } | undefined {
|
|
186
|
+
const policy: SttUnsupportedOptionPolicy =
|
|
187
|
+
request.unsupportedOptionPolicy ?? "warn";
|
|
188
|
+
if (policy === "error") {
|
|
189
|
+
throw new ProviderError(message, { code: "UNSUPPORTED_STT_OPTION" });
|
|
190
|
+
}
|
|
191
|
+
return { code: "UNSUPPORTED_STT_OPTION", message };
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
function normalizeCloudflareLanguage(
|
|
195
|
+
language: Bcp47Locale | undefined,
|
|
196
|
+
): string | undefined {
|
|
197
|
+
return language?.split("-")[0]?.toLowerCase();
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
function isTimeoutLikeError(error: unknown): error is Error {
|
|
201
|
+
return (
|
|
202
|
+
error instanceof Error &&
|
|
203
|
+
(error.name === "AbortError" ||
|
|
204
|
+
error.name === "TimeoutError" ||
|
|
205
|
+
/\b(timed out|timeout|deadline exceeded)\b/i.test(error.message))
|
|
206
|
+
);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
function toSttTransportError(error: unknown): TransportError {
|
|
210
|
+
if (error instanceof TransportError) return error;
|
|
211
|
+
if (isTimeoutLikeError(error)) {
|
|
212
|
+
return new TransportError("STT upstream request timed out", {
|
|
213
|
+
code: "transport_timeout",
|
|
214
|
+
status: 0,
|
|
215
|
+
cause: error,
|
|
216
|
+
});
|
|
217
|
+
}
|
|
218
|
+
return new TransportError("STT upstream network request failed", {
|
|
219
|
+
code: "transport_network_error",
|
|
220
|
+
status: 0,
|
|
221
|
+
cause: error instanceof Error ? error : undefined,
|
|
222
|
+
});
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
function createTimeoutController(signalTimeoutMs: number): {
|
|
226
|
+
controller: AbortController;
|
|
227
|
+
clear: () => void;
|
|
228
|
+
} {
|
|
229
|
+
const controller = new AbortController();
|
|
230
|
+
const timeout = setTimeout(() => controller.abort(), signalTimeoutMs);
|
|
231
|
+
timeout.unref?.();
|
|
232
|
+
return { controller, clear: () => clearTimeout(timeout) };
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
function toCloudflareInput(
|
|
236
|
+
request: SttTranscribeRequest,
|
|
237
|
+
): Record<string, unknown> {
|
|
238
|
+
const prompt = resolveSttPrompt(request);
|
|
239
|
+
const input: Record<string, unknown> = {
|
|
240
|
+
audio: request.audio.data.trim(),
|
|
241
|
+
task: "transcribe",
|
|
242
|
+
};
|
|
243
|
+
const language = normalizeCloudflareLanguage(request.language);
|
|
244
|
+
if (language) input.language = language;
|
|
245
|
+
if (prompt) input.initial_prompt = prompt;
|
|
246
|
+
return input;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
function unknownRecord(value: unknown): Record<string, unknown> | undefined {
|
|
250
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
251
|
+
return undefined;
|
|
252
|
+
}
|
|
253
|
+
return Object.fromEntries(Object.entries(value));
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
function parseSegments(value: unknown): SttSegment[] | undefined {
|
|
257
|
+
if (!Array.isArray(value)) return undefined;
|
|
258
|
+
const segments: SttSegment[] = [];
|
|
259
|
+
for (const item of value) {
|
|
260
|
+
const segment = unknownRecord(item);
|
|
261
|
+
if (!segment || typeof segment.text !== "string") continue;
|
|
262
|
+
segments.push({
|
|
263
|
+
text: segment.text,
|
|
264
|
+
startMs:
|
|
265
|
+
typeof segment.start === "number"
|
|
266
|
+
? Math.round(segment.start * 1000)
|
|
267
|
+
: typeof segment.startMs === "number"
|
|
268
|
+
? segment.startMs
|
|
269
|
+
: undefined,
|
|
270
|
+
endMs:
|
|
271
|
+
typeof segment.end === "number"
|
|
272
|
+
? Math.round(segment.end * 1000)
|
|
273
|
+
: typeof segment.endMs === "number"
|
|
274
|
+
? segment.endMs
|
|
275
|
+
: undefined,
|
|
276
|
+
confidence:
|
|
277
|
+
typeof segment.confidence === "number" ? segment.confidence : undefined,
|
|
278
|
+
});
|
|
279
|
+
}
|
|
280
|
+
return segments.length > 0 ? segments : undefined;
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
function toSttTranscript(payload: unknown, audioBytes: number): SttTranscript {
|
|
284
|
+
const envelope = unknownRecord(payload);
|
|
285
|
+
const result = unknownRecord(envelope?.result) ?? envelope;
|
|
286
|
+
const info = unknownRecord(result?.transcription_info);
|
|
287
|
+
const text =
|
|
288
|
+
(typeof result?.text === "string" ? result.text : undefined) ??
|
|
289
|
+
(typeof info?.text === "string" ? info.text : undefined);
|
|
290
|
+
if (!text) {
|
|
291
|
+
throw new TransportError(
|
|
292
|
+
"STT upstream response did not include transcript text",
|
|
293
|
+
{
|
|
294
|
+
code: "STT_UPSTREAM_FAILED",
|
|
295
|
+
status: 502,
|
|
296
|
+
},
|
|
297
|
+
);
|
|
298
|
+
}
|
|
299
|
+
const durationMs =
|
|
300
|
+
typeof result?.durationMs === "number"
|
|
301
|
+
? result.durationMs
|
|
302
|
+
: typeof info?.duration === "number"
|
|
303
|
+
? Math.round(info.duration * 1000)
|
|
304
|
+
: undefined;
|
|
305
|
+
return {
|
|
306
|
+
text,
|
|
307
|
+
language:
|
|
308
|
+
typeof result?.language === "string"
|
|
309
|
+
? result.language
|
|
310
|
+
: typeof info?.language === "string"
|
|
311
|
+
? info.language
|
|
312
|
+
: undefined,
|
|
313
|
+
durationMs,
|
|
314
|
+
segments: parseSegments(result?.segments),
|
|
315
|
+
usage: {
|
|
316
|
+
audioBytes,
|
|
317
|
+
...(durationMs ? { audioDurationMs: durationMs } : {}),
|
|
318
|
+
},
|
|
319
|
+
};
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
export function createCloudflareWorkersAiSttClient(
|
|
323
|
+
options: CloudflareWorkersAiSttClientOptions,
|
|
324
|
+
): SttContext {
|
|
325
|
+
const model = options.model ?? DEFAULT_CLOUDFLARE_WORKERS_AI_STT_MODEL;
|
|
326
|
+
const runFetch = options.fetch ?? fetch;
|
|
327
|
+
return {
|
|
328
|
+
async transcribe(request) {
|
|
329
|
+
const warnings = [];
|
|
330
|
+
if (
|
|
331
|
+
request.initialPrompt &&
|
|
332
|
+
effectivePromptPolicy(request) !== "custom-hint"
|
|
333
|
+
) {
|
|
334
|
+
const warning = warnOrThrowUnsupportedOption(
|
|
335
|
+
request,
|
|
336
|
+
"initialPrompt is honored only when promptPolicy is custom-hint",
|
|
337
|
+
);
|
|
338
|
+
if (warning) warnings.push(warning);
|
|
339
|
+
}
|
|
340
|
+
const audioBytes = assertBase64Audio(
|
|
341
|
+
request.audio,
|
|
342
|
+
request.maxAudioBytes,
|
|
343
|
+
);
|
|
344
|
+
const timeout = createTimeoutController(
|
|
345
|
+
request.timeoutMs ?? DEFAULT_STT_TIMEOUT_MS,
|
|
346
|
+
);
|
|
347
|
+
try {
|
|
348
|
+
let response: Response;
|
|
349
|
+
try {
|
|
350
|
+
response = await runFetch(
|
|
351
|
+
`https://api.cloudflare.com/client/v4/accounts/${encodeURIComponent(options.accountId)}/ai/run/${model}`,
|
|
352
|
+
{
|
|
353
|
+
method: "POST",
|
|
354
|
+
headers: {
|
|
355
|
+
Authorization: `Bearer ${options.apiToken}`,
|
|
356
|
+
"Content-Type": "application/json",
|
|
357
|
+
},
|
|
358
|
+
body: JSON.stringify(toCloudflareInput(request)),
|
|
359
|
+
signal: timeout.controller.signal,
|
|
360
|
+
},
|
|
361
|
+
);
|
|
362
|
+
} catch (error) {
|
|
363
|
+
throw toSttTransportError(error);
|
|
364
|
+
}
|
|
365
|
+
const payload = await response.json().catch(() => undefined);
|
|
366
|
+
if (!response.ok) {
|
|
367
|
+
throw new TransportError("STT upstream request failed", {
|
|
368
|
+
code: "STT_UPSTREAM_FAILED",
|
|
369
|
+
status: response.status,
|
|
370
|
+
upstreamStatus: response.status,
|
|
371
|
+
});
|
|
372
|
+
}
|
|
373
|
+
const envelope = unknownRecord(payload);
|
|
374
|
+
if (envelope?.success === false) {
|
|
375
|
+
throw new TransportError("STT upstream returned an error", {
|
|
376
|
+
code: "STT_UPSTREAM_FAILED",
|
|
377
|
+
status: 502,
|
|
378
|
+
});
|
|
379
|
+
}
|
|
380
|
+
const transcript = toSttTranscript(payload, audioBytes);
|
|
381
|
+
const withWarnings =
|
|
382
|
+
warnings.length > 0
|
|
383
|
+
? {
|
|
384
|
+
...transcript,
|
|
385
|
+
warnings: [...(transcript.warnings ?? []), ...warnings],
|
|
386
|
+
}
|
|
387
|
+
: transcript;
|
|
388
|
+
if (request.mode === "otp" || request.verificationCode) {
|
|
389
|
+
return {
|
|
390
|
+
...withWarnings,
|
|
391
|
+
verificationCode: extractVerificationCode(
|
|
392
|
+
withWarnings.text,
|
|
393
|
+
request.verificationCode,
|
|
394
|
+
),
|
|
395
|
+
};
|
|
396
|
+
}
|
|
397
|
+
return withWarnings;
|
|
398
|
+
} finally {
|
|
399
|
+
timeout.clear();
|
|
400
|
+
}
|
|
401
|
+
},
|
|
402
|
+
extractVerificationCode,
|
|
403
|
+
};
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
const EN_DIGITS: Record<string, string> = {
|
|
407
|
+
zero: "0",
|
|
408
|
+
oh: "0",
|
|
409
|
+
o: "0",
|
|
410
|
+
one: "1",
|
|
411
|
+
two: "2",
|
|
412
|
+
three: "3",
|
|
413
|
+
four: "4",
|
|
414
|
+
five: "5",
|
|
415
|
+
six: "6",
|
|
416
|
+
seven: "7",
|
|
417
|
+
eight: "8",
|
|
418
|
+
nine: "9",
|
|
419
|
+
};
|
|
420
|
+
|
|
421
|
+
const KO_DIGITS: Record<string, string> = {
|
|
422
|
+
공: "0",
|
|
423
|
+
영: "0",
|
|
424
|
+
일: "1",
|
|
425
|
+
이: "2",
|
|
426
|
+
삼: "3",
|
|
427
|
+
사: "4",
|
|
428
|
+
오: "5",
|
|
429
|
+
육: "6",
|
|
430
|
+
륙: "6",
|
|
431
|
+
칠: "7",
|
|
432
|
+
팔: "8",
|
|
433
|
+
구: "9",
|
|
434
|
+
};
|
|
435
|
+
|
|
436
|
+
function lengthSet(
|
|
437
|
+
codeLengths: SttVerificationCodeOptions["codeLengths"],
|
|
438
|
+
): Set<number> {
|
|
439
|
+
if (codeLengths === undefined) return new Set([4, 5, 6, 7, 8]);
|
|
440
|
+
if (typeof codeLengths === "number")
|
|
441
|
+
return new Set([validCodeLength(codeLengths)]);
|
|
442
|
+
if (Array.isArray(codeLengths)) {
|
|
443
|
+
const values = codeLengths.map((length) => validCodeLength(length));
|
|
444
|
+
return new Set(values);
|
|
445
|
+
}
|
|
446
|
+
if (!("min" in codeLengths) || !("max" in codeLengths)) {
|
|
447
|
+
throw new ValidationError("STT verification code range is malformed", {
|
|
448
|
+
code: "INVALID_STT_VERIFICATION_CODE_OPTIONS",
|
|
449
|
+
});
|
|
450
|
+
}
|
|
451
|
+
const min = validCodeLength(codeLengths.min);
|
|
452
|
+
const max = validCodeLength(codeLengths.max);
|
|
453
|
+
if (min > max) {
|
|
454
|
+
throw new ValidationError("STT verification code range min exceeds max", {
|
|
455
|
+
code: "INVALID_STT_VERIFICATION_CODE_OPTIONS",
|
|
456
|
+
});
|
|
457
|
+
}
|
|
458
|
+
if (max - min > 16) {
|
|
459
|
+
throw new ValidationError("STT verification code range is too large", {
|
|
460
|
+
code: "INVALID_STT_VERIFICATION_CODE_OPTIONS",
|
|
461
|
+
});
|
|
462
|
+
}
|
|
463
|
+
const values = new Set<number>();
|
|
464
|
+
for (let length = min; length <= max; length += 1) {
|
|
465
|
+
values.add(length);
|
|
466
|
+
}
|
|
467
|
+
return values;
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
function validCodeLength(value: number): number {
|
|
471
|
+
if (!Number.isInteger(value) || value < 1 || value > 32) {
|
|
472
|
+
throw new ValidationError(
|
|
473
|
+
"STT verification code length must be an integer between 1 and 32",
|
|
474
|
+
{ code: "INVALID_STT_VERIFICATION_CODE_OPTIONS" },
|
|
475
|
+
);
|
|
476
|
+
}
|
|
477
|
+
return value;
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
type DigitToken = {
|
|
481
|
+
text: string;
|
|
482
|
+
digits: string;
|
|
483
|
+
source: "digits" | "spoken_words";
|
|
484
|
+
startIndex: number;
|
|
485
|
+
endIndex: number;
|
|
486
|
+
};
|
|
487
|
+
|
|
488
|
+
function sourceForGroup(tokens: DigitToken[]): VerificationCodeCandidateSource {
|
|
489
|
+
const sources = new Set(tokens.map((token) => token.source));
|
|
490
|
+
return sources.size === 1 ? (tokens[0]?.source ?? "digits") : "mixed";
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
function emitWordToken(token: string, startIndex: number): DigitToken[] {
|
|
494
|
+
const lower = token.toLowerCase();
|
|
495
|
+
const english = EN_DIGITS[lower];
|
|
496
|
+
if (english) {
|
|
497
|
+
return [
|
|
498
|
+
{
|
|
499
|
+
text: token,
|
|
500
|
+
digits: english,
|
|
501
|
+
source: "spoken_words",
|
|
502
|
+
startIndex,
|
|
503
|
+
endIndex: startIndex + token.length,
|
|
504
|
+
},
|
|
505
|
+
];
|
|
506
|
+
}
|
|
507
|
+
const korean = KO_DIGITS[token];
|
|
508
|
+
if (korean) {
|
|
509
|
+
return [
|
|
510
|
+
{
|
|
511
|
+
text: token,
|
|
512
|
+
digits: korean,
|
|
513
|
+
source: "spoken_words",
|
|
514
|
+
startIndex,
|
|
515
|
+
endIndex: startIndex + token.length,
|
|
516
|
+
},
|
|
517
|
+
];
|
|
518
|
+
}
|
|
519
|
+
const chars = [...token];
|
|
520
|
+
if (chars.length > 1 && chars.every((char) => KO_DIGITS[char])) {
|
|
521
|
+
let offset = startIndex;
|
|
522
|
+
return chars.map((char) => {
|
|
523
|
+
const item: DigitToken = {
|
|
524
|
+
text: char,
|
|
525
|
+
digits: KO_DIGITS[char] ?? "",
|
|
526
|
+
source: "spoken_words",
|
|
527
|
+
startIndex: offset,
|
|
528
|
+
endIndex: offset + char.length,
|
|
529
|
+
};
|
|
530
|
+
offset += char.length;
|
|
531
|
+
return item;
|
|
532
|
+
});
|
|
533
|
+
}
|
|
534
|
+
return [];
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
function tokenizeDigits(text: string): DigitToken[] {
|
|
538
|
+
const tokens: DigitToken[] = [];
|
|
539
|
+
for (const match of text.matchAll(/[0-9]+|[A-Za-z]+|[가-힣]+/gu)) {
|
|
540
|
+
const value = match[0];
|
|
541
|
+
const index = match.index ?? 0;
|
|
542
|
+
if (/^[0-9]+$/.test(value)) {
|
|
543
|
+
tokens.push({
|
|
544
|
+
text: value,
|
|
545
|
+
digits: value,
|
|
546
|
+
source: "digits",
|
|
547
|
+
startIndex: index,
|
|
548
|
+
endIndex: index + value.length,
|
|
549
|
+
});
|
|
550
|
+
continue;
|
|
551
|
+
}
|
|
552
|
+
tokens.push(...emitWordToken(value, index));
|
|
553
|
+
}
|
|
554
|
+
return tokens.sort((a, b) => a.startIndex - b.startIndex);
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
function isAdjacent(
|
|
558
|
+
left: DigitToken,
|
|
559
|
+
right: DigitToken,
|
|
560
|
+
text: string,
|
|
561
|
+
): boolean {
|
|
562
|
+
const between = text.slice(left.endIndex, right.startIndex);
|
|
563
|
+
return /^[\s,.:;\-_/]*$/u.test(between);
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
function candidatesFromTokens(
|
|
567
|
+
text: string,
|
|
568
|
+
allowedLengths: Set<number>,
|
|
569
|
+
): VerificationCodeCandidate[] {
|
|
570
|
+
const tokens = tokenizeDigits(text);
|
|
571
|
+
const candidates: VerificationCodeCandidate[] = [];
|
|
572
|
+
let group: DigitToken[] = [];
|
|
573
|
+
const flush = () => {
|
|
574
|
+
if (group.length === 0) return;
|
|
575
|
+
const code = group.map((token) => token.digits).join("");
|
|
576
|
+
if (allowedLengths.has(code.length)) {
|
|
577
|
+
candidates.push({
|
|
578
|
+
code,
|
|
579
|
+
source: sourceForGroup(group),
|
|
580
|
+
startIndex: group[0]?.startIndex,
|
|
581
|
+
endIndex: group[group.length - 1]?.endIndex,
|
|
582
|
+
});
|
|
583
|
+
}
|
|
584
|
+
group = [];
|
|
585
|
+
};
|
|
586
|
+
|
|
587
|
+
for (const token of tokens) {
|
|
588
|
+
const previous = group[group.length - 1];
|
|
589
|
+
if (previous && !isAdjacent(previous, token, text)) {
|
|
590
|
+
flush();
|
|
591
|
+
}
|
|
592
|
+
group.push(token);
|
|
593
|
+
}
|
|
594
|
+
flush();
|
|
595
|
+
return candidates;
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
export function extractVerificationCode(
|
|
599
|
+
text: string,
|
|
600
|
+
options: SttVerificationCodeOptions = {},
|
|
601
|
+
): VerificationCodeExtractionResult {
|
|
602
|
+
const allowedLengths = lengthSet(options.codeLengths);
|
|
603
|
+
const candidatesByCode = new Map<string, VerificationCodeCandidate>();
|
|
604
|
+
for (const candidate of candidatesFromTokens(text, allowedLengths)) {
|
|
605
|
+
if (!candidatesByCode.has(candidate.code)) {
|
|
606
|
+
candidatesByCode.set(candidate.code, candidate);
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
const candidates = [...candidatesByCode.values()];
|
|
610
|
+
if (candidates.length === 0) {
|
|
611
|
+
throw new ProviderError(
|
|
612
|
+
"No verification code candidate found in transcript",
|
|
613
|
+
{
|
|
614
|
+
code: "NO_CODE_FOUND",
|
|
615
|
+
},
|
|
616
|
+
);
|
|
617
|
+
}
|
|
618
|
+
if (candidates.length > 1) {
|
|
619
|
+
throw new ProviderError("Multiple verification code candidates found", {
|
|
620
|
+
code: "AMBIGUOUS_CODE",
|
|
621
|
+
});
|
|
622
|
+
}
|
|
623
|
+
const [candidate] = candidates;
|
|
624
|
+
return {
|
|
625
|
+
code: candidate.code,
|
|
626
|
+
candidates,
|
|
627
|
+
normalizedText: text.normalize("NFKC"),
|
|
628
|
+
};
|
|
629
|
+
}
|
package/src/runtime/trace.ts
CHANGED