@speakableio/core 1.0.60 → 1.0.62
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.native.d.mts +3 -3
- package/dist/index.native.d.ts +3 -3
- package/dist/index.native.js +38 -135
- package/dist/index.native.js.map +1 -1
- package/dist/index.native.mjs +38 -135
- package/dist/index.native.mjs.map +1 -1
- package/dist/index.web.d.mts +3 -3
- package/dist/index.web.js +38 -135
- package/dist/index.web.js.map +1 -1
- package/package.json +1 -1
package/dist/index.native.d.mts
CHANGED
|
@@ -1690,7 +1690,7 @@ declare function SpeakableProvider({ user, children, queryClient, permissions, f
|
|
|
1690
1690
|
}): react_jsx_runtime.JSX.Element | null;
|
|
1691
1691
|
declare function useSpeakableApi(): FsContext;
|
|
1692
1692
|
|
|
1693
|
-
declare function getTranscript(model: 'gemini' | 'assemblyai' | 'whisper', args: {
|
|
1693
|
+
declare function getTranscript(model: 'gemini' | 'assemblyai' | 'whisper' | 'whisper-3', args: {
|
|
1694
1694
|
language: string;
|
|
1695
1695
|
audioUrl: string;
|
|
1696
1696
|
prompt?: string;
|
|
@@ -1698,7 +1698,7 @@ declare function getTranscript(model: 'gemini' | 'assemblyai' | 'whisper', args:
|
|
|
1698
1698
|
declare function getTranscriptCycle(args: {
|
|
1699
1699
|
audioUrl: string;
|
|
1700
1700
|
language: string;
|
|
1701
|
-
prompt
|
|
1701
|
+
prompt?: string;
|
|
1702
1702
|
}): Promise<{
|
|
1703
1703
|
transcript: string;
|
|
1704
1704
|
success: boolean;
|
|
@@ -2967,7 +2967,7 @@ declare const useOrganizationAccess: () => {
|
|
|
2967
2967
|
|
|
2968
2968
|
declare function useSpeakableTranscript(): {
|
|
2969
2969
|
mutation: _tanstack_react_query.UseMutationResult<string | null, Error, {
|
|
2970
|
-
model: "gemini" | "assemblyai" | "whisper";
|
|
2970
|
+
model: "gemini" | "assemblyai" | "whisper" | "whisper-3";
|
|
2971
2971
|
audioUrl: string;
|
|
2972
2972
|
language: string;
|
|
2973
2973
|
prompt?: string;
|
package/dist/index.native.d.ts
CHANGED
|
@@ -1690,7 +1690,7 @@ declare function SpeakableProvider({ user, children, queryClient, permissions, f
|
|
|
1690
1690
|
}): react_jsx_runtime.JSX.Element | null;
|
|
1691
1691
|
declare function useSpeakableApi(): FsContext;
|
|
1692
1692
|
|
|
1693
|
-
declare function getTranscript(model: 'gemini' | 'assemblyai' | 'whisper', args: {
|
|
1693
|
+
declare function getTranscript(model: 'gemini' | 'assemblyai' | 'whisper' | 'whisper-3', args: {
|
|
1694
1694
|
language: string;
|
|
1695
1695
|
audioUrl: string;
|
|
1696
1696
|
prompt?: string;
|
|
@@ -1698,7 +1698,7 @@ declare function getTranscript(model: 'gemini' | 'assemblyai' | 'whisper', args:
|
|
|
1698
1698
|
declare function getTranscriptCycle(args: {
|
|
1699
1699
|
audioUrl: string;
|
|
1700
1700
|
language: string;
|
|
1701
|
-
prompt
|
|
1701
|
+
prompt?: string;
|
|
1702
1702
|
}): Promise<{
|
|
1703
1703
|
transcript: string;
|
|
1704
1704
|
success: boolean;
|
|
@@ -2967,7 +2967,7 @@ declare const useOrganizationAccess: () => {
|
|
|
2967
2967
|
|
|
2968
2968
|
declare function useSpeakableTranscript(): {
|
|
2969
2969
|
mutation: _tanstack_react_query.UseMutationResult<string | null, Error, {
|
|
2970
|
-
model: "gemini" | "assemblyai" | "whisper";
|
|
2970
|
+
model: "gemini" | "assemblyai" | "whisper" | "whisper-3";
|
|
2971
2971
|
audioUrl: string;
|
|
2972
2972
|
language: string;
|
|
2973
2973
|
prompt?: string;
|
package/dist/index.native.js
CHANGED
|
@@ -2328,80 +2328,18 @@ var createSetRepo = () => {
|
|
|
2328
2328
|
};
|
|
2329
2329
|
|
|
2330
2330
|
// src/utils/ai/detect-transcript-hallucionation.ts
|
|
2331
|
-
|
|
2332
|
-
// Short repeats
|
|
2333
|
-
MIN_CONSECUTIVE_REPEATS: 5,
|
|
2334
|
-
// Increased from 3 to allow phrases like "pio pio pio" or "no no no no"
|
|
2335
|
-
MIN_WORDS_FOR_RATIO_CHECK: 15,
|
|
2336
|
-
// Increased from 10 to require longer text for ratio check
|
|
2337
|
-
MAX_UNIQUE_WORDS_FOR_RATIO: 3,
|
|
2338
|
-
MIN_REPETITION_RATIO: 4,
|
|
2339
|
-
// Increased from 3 to be more permissive
|
|
2340
|
-
// Phrase repeats
|
|
2341
|
-
MIN_SENTENCE_LENGTH: 15,
|
|
2342
|
-
// Increased from 10 to avoid flagging short natural sentences
|
|
2343
|
-
MIN_CONSECUTIVE_SIMILAR_SENTENCES: 3,
|
|
2344
|
-
// Increased from 2 to allow some natural repetition
|
|
2345
|
-
MIN_SENTENCES_FOR_DUPLICATE_CHECK: 4,
|
|
2346
|
-
// Increased from 3
|
|
2347
|
-
// Cyclic patterns
|
|
2348
|
-
MIN_CYCLE_LENGTH: 30,
|
|
2349
|
-
// Increased from 20 to focus on longer patterns
|
|
2350
|
-
MIN_CYCLE_REPEATS: 3,
|
|
2351
|
-
// Entropy detection
|
|
2352
|
-
MIN_LENGTH_FOR_ENTROPY_CHECK: 60,
|
|
2353
|
-
// Increased from 50
|
|
2354
|
-
MAX_ENTROPY_THRESHOLD: 2.2,
|
|
2355
|
-
// Decreased from 2.5 to be more strict on entropy (lower = more repetitive needed)
|
|
2356
|
-
// Similarity
|
|
2357
|
-
SENTENCE_SIMILARITY_THRESHOLD: 0.85,
|
|
2358
|
-
// Increased from 0.8 to require more similarity
|
|
2359
|
-
SEGMENT_SIMILARITY_THRESHOLD: 0.9
|
|
2360
|
-
// Increased from 0.85
|
|
2361
|
-
};
|
|
2362
|
-
function detectTranscriptHallucinationWithDetails(transcript) {
|
|
2331
|
+
function detectTranscriptHallucination(transcript) {
|
|
2363
2332
|
if (!transcript || transcript.trim().length === 0) {
|
|
2364
|
-
return
|
|
2333
|
+
return false;
|
|
2365
2334
|
}
|
|
2366
2335
|
const text = transcript.trim();
|
|
2367
|
-
if (text.length < 10) {
|
|
2368
|
-
return { isHallucination: false };
|
|
2369
|
-
}
|
|
2370
2336
|
const shortRepeats = detectShortRepeats(text);
|
|
2371
|
-
if (shortRepeats)
|
|
2372
|
-
return {
|
|
2373
|
-
isHallucination: true,
|
|
2374
|
-
reason: "Detected repeated short words or phrases",
|
|
2375
|
-
confidence: 0.9
|
|
2376
|
-
};
|
|
2377
|
-
}
|
|
2337
|
+
if (shortRepeats) return true;
|
|
2378
2338
|
const phraseRepeats = detectPhraseRepeats(text);
|
|
2379
|
-
if (phraseRepeats)
|
|
2380
|
-
return {
|
|
2381
|
-
isHallucination: true,
|
|
2382
|
-
reason: "Detected repeated sentences or phrases",
|
|
2383
|
-
confidence: 0.85
|
|
2384
|
-
};
|
|
2385
|
-
}
|
|
2339
|
+
if (phraseRepeats) return true;
|
|
2386
2340
|
const cyclicRepeats = detectCyclicPattern(text);
|
|
2387
|
-
if (cyclicRepeats)
|
|
2388
|
-
|
|
2389
|
-
isHallucination: true,
|
|
2390
|
-
reason: "Detected cyclic repetition pattern",
|
|
2391
|
-
confidence: 0.8
|
|
2392
|
-
};
|
|
2393
|
-
}
|
|
2394
|
-
if (text.length >= HALLUCINATION_THRESHOLDS.MIN_LENGTH_FOR_ENTROPY_CHECK) {
|
|
2395
|
-
const entropy = calculateEntropy(text);
|
|
2396
|
-
if (entropy < HALLUCINATION_THRESHOLDS.MAX_ENTROPY_THRESHOLD) {
|
|
2397
|
-
return {
|
|
2398
|
-
isHallucination: true,
|
|
2399
|
-
reason: "Detected low entropy (likely gibberish or excessive repetition)",
|
|
2400
|
-
confidence: 0.75
|
|
2401
|
-
};
|
|
2402
|
-
}
|
|
2403
|
-
}
|
|
2404
|
-
return { isHallucination: false };
|
|
2341
|
+
if (cyclicRepeats) return true;
|
|
2342
|
+
return false;
|
|
2405
2343
|
}
|
|
2406
2344
|
function detectShortRepeats(text) {
|
|
2407
2345
|
const words = text.toLowerCase().split(/[\s,;.!?]+/).filter((w) => w.length > 0);
|
|
@@ -2410,22 +2348,20 @@ function detectShortRepeats(text) {
|
|
|
2410
2348
|
for (let i = 1; i < words.length; i++) {
|
|
2411
2349
|
if (words[i] === words[i - 1]) {
|
|
2412
2350
|
repeatCount++;
|
|
2413
|
-
if (repeatCount >=
|
|
2414
|
-
return true;
|
|
2415
|
-
}
|
|
2351
|
+
if (repeatCount >= 3) return true;
|
|
2416
2352
|
} else {
|
|
2417
2353
|
repeatCount = 1;
|
|
2418
2354
|
}
|
|
2419
2355
|
}
|
|
2420
2356
|
const uniqueWords = new Set(words);
|
|
2421
2357
|
const repetitionRatio = words.length / uniqueWords.size;
|
|
2422
|
-
if (words.length >=
|
|
2358
|
+
if (words.length >= 10 && uniqueWords.size <= 3 && repetitionRatio >= 3) {
|
|
2423
2359
|
return true;
|
|
2424
2360
|
}
|
|
2425
2361
|
return false;
|
|
2426
2362
|
}
|
|
2427
2363
|
function detectPhraseRepeats(text) {
|
|
2428
|
-
const sentences = text.split(/[.!?]+/).map((s) => s.trim().toLowerCase()).filter((s) => s.length >
|
|
2364
|
+
const sentences = text.split(/[.!?]+/).map((s) => s.trim().toLowerCase()).filter((s) => s.length > 10);
|
|
2429
2365
|
if (sentences.length < 2) return false;
|
|
2430
2366
|
for (let i = 0; i < sentences.length - 1; i++) {
|
|
2431
2367
|
let consecutiveRepeats = 1;
|
|
@@ -2436,17 +2372,17 @@ function detectPhraseRepeats(text) {
|
|
|
2436
2372
|
break;
|
|
2437
2373
|
}
|
|
2438
2374
|
}
|
|
2439
|
-
if (consecutiveRepeats >=
|
|
2375
|
+
if (consecutiveRepeats >= 2) {
|
|
2440
2376
|
return true;
|
|
2441
2377
|
}
|
|
2442
2378
|
}
|
|
2443
2379
|
const uniqueSentences = new Set(sentences);
|
|
2444
|
-
if (sentences.length >=
|
|
2380
|
+
if (sentences.length >= 3 && uniqueSentences.size === 1) {
|
|
2445
2381
|
return true;
|
|
2446
2382
|
}
|
|
2447
2383
|
return false;
|
|
2448
2384
|
}
|
|
2449
|
-
function isSimilarSentence(s1, s2
|
|
2385
|
+
function isSimilarSentence(s1, s2) {
|
|
2450
2386
|
if (s1 === s2) return true;
|
|
2451
2387
|
const normalized1 = s1.replace(/\s+/g, " ").trim();
|
|
2452
2388
|
const normalized2 = s2.replace(/\s+/g, " ").trim();
|
|
@@ -2458,81 +2394,35 @@ function isSimilarSentence(s1, s2, threshold = HALLUCINATION_THRESHOLDS.SENTENCE
|
|
|
2458
2394
|
const set2 = new Set(words2);
|
|
2459
2395
|
const intersection = new Set([...set1].filter((w) => set2.has(w)));
|
|
2460
2396
|
const similarity = intersection.size * 2 / (set1.size + set2.size);
|
|
2461
|
-
return similarity >=
|
|
2397
|
+
return similarity >= 0.8;
|
|
2462
2398
|
}
|
|
2463
2399
|
function detectCyclicPattern(text) {
|
|
2464
2400
|
const normalized = text.toLowerCase().replace(/\s+/g, " ").trim();
|
|
2465
2401
|
const length = normalized.length;
|
|
2466
|
-
const minCycleLength =
|
|
2402
|
+
const minCycleLength = 20;
|
|
2467
2403
|
const maxCycleLength = Math.floor(length / 2);
|
|
2468
|
-
|
|
2469
|
-
const step = 5;
|
|
2470
|
-
for (let cycleLen = minCycleLength; cycleLen <= maxCycleLength; cycleLen += step) {
|
|
2404
|
+
for (let cycleLen = minCycleLength; cycleLen <= maxCycleLength; cycleLen++) {
|
|
2471
2405
|
const pattern = normalized.substring(0, cycleLen);
|
|
2472
2406
|
let matchCount = 0;
|
|
2473
2407
|
let pos = 0;
|
|
2474
2408
|
while (pos < length) {
|
|
2475
2409
|
const segment = normalized.substring(pos, pos + cycleLen);
|
|
2476
|
-
if (segment
|
|
2477
|
-
const partialMatch = pattern.startsWith(segment);
|
|
2478
|
-
if (partialMatch && matchCount > 0) {
|
|
2479
|
-
matchCount++;
|
|
2480
|
-
}
|
|
2481
|
-
break;
|
|
2482
|
-
}
|
|
2483
|
-
if (segment === pattern || isSegmentSimilar(segment, pattern)) {
|
|
2410
|
+
if (segment === pattern || isSimilarSentence(segment, pattern)) {
|
|
2484
2411
|
matchCount++;
|
|
2485
2412
|
pos += cycleLen;
|
|
2486
2413
|
} else {
|
|
2487
2414
|
break;
|
|
2488
2415
|
}
|
|
2489
2416
|
}
|
|
2490
|
-
if (matchCount >=
|
|
2417
|
+
if (matchCount >= 3) {
|
|
2491
2418
|
return true;
|
|
2492
2419
|
}
|
|
2493
2420
|
}
|
|
2494
2421
|
return false;
|
|
2495
2422
|
}
|
|
2496
|
-
function isSegmentSimilar(s1, s2) {
|
|
2497
|
-
if (s1 === s2) return true;
|
|
2498
|
-
if (s1.length !== s2.length) return false;
|
|
2499
|
-
let matches = 0;
|
|
2500
|
-
const minLength = Math.min(s1.length, s2.length);
|
|
2501
|
-
for (let i = 0; i < minLength; i++) {
|
|
2502
|
-
if (s1[i] === s2[i]) {
|
|
2503
|
-
matches++;
|
|
2504
|
-
}
|
|
2505
|
-
}
|
|
2506
|
-
const similarity = matches / minLength;
|
|
2507
|
-
return similarity >= HALLUCINATION_THRESHOLDS.SEGMENT_SIMILARITY_THRESHOLD;
|
|
2508
|
-
}
|
|
2509
|
-
function calculateEntropy(text) {
|
|
2510
|
-
if (!text || text.length === 0) {
|
|
2511
|
-
return 0;
|
|
2512
|
-
}
|
|
2513
|
-
const frequencies = /* @__PURE__ */ new Map();
|
|
2514
|
-
for (const char of text.toLowerCase()) {
|
|
2515
|
-
frequencies.set(char, (frequencies.get(char) || 0) + 1);
|
|
2516
|
-
}
|
|
2517
|
-
let entropy = 0;
|
|
2518
|
-
const length = text.length;
|
|
2519
|
-
for (const count of frequencies.values()) {
|
|
2520
|
-
const probability = count / length;
|
|
2521
|
-
entropy -= probability * Math.log2(probability);
|
|
2522
|
-
}
|
|
2523
|
-
return entropy;
|
|
2524
|
-
}
|
|
2525
2423
|
function cleanHallucinatedTranscript(transcript) {
|
|
2526
|
-
|
|
2527
|
-
|
|
2528
|
-
if (result.isHallucination) {
|
|
2529
|
-
console.warn(
|
|
2530
|
-
"Hallucinated transcript detected and removed:",
|
|
2531
|
-
transcript.substring(0, 100),
|
|
2532
|
-
`
|
|
2533
|
-
Reason: ${(_a = result.reason) != null ? _a : "Unknown"}`,
|
|
2534
|
-
`Confidence: ${String((_b = result.confidence) != null ? _b : "Unknown")}`
|
|
2535
|
-
);
|
|
2424
|
+
if (detectTranscriptHallucination(transcript)) {
|
|
2425
|
+
console.warn("Hallucinated transcript detected and removed:", transcript.substring(0, 100));
|
|
2536
2426
|
return "";
|
|
2537
2427
|
}
|
|
2538
2428
|
return transcript;
|
|
@@ -2540,19 +2430,32 @@ Reason: ${(_a = result.reason) != null ? _a : "Unknown"}`,
|
|
|
2540
2430
|
|
|
2541
2431
|
// src/utils/ai/get-transcript.ts
|
|
2542
2432
|
async function getTranscript(model, args, cleanHallucinations = true) {
|
|
2543
|
-
var _a, _b, _c, _d, _e, _f;
|
|
2433
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _i;
|
|
2544
2434
|
const getGeminiTranscript = (_b = (_a = api).httpsCallable) == null ? void 0 : _b.call(_a, "getGeminiTranscript");
|
|
2545
2435
|
const getAssemblyAITranscript = (_d = (_c = api).httpsCallable) == null ? void 0 : _d.call(_c, "transcribeAssemblyAIAudio");
|
|
2546
|
-
const
|
|
2436
|
+
const getWhisper3Transcript = (_f = (_e = api).httpsCallable) == null ? void 0 : _f.call(_e, "generateGroqTranscript");
|
|
2437
|
+
const getWhisper1Transcript = (_h = (_g = api).httpsCallable) == null ? void 0 : _h.call(_g, "transcribeAudio");
|
|
2438
|
+
if (model === "whisper-3") {
|
|
2439
|
+
try {
|
|
2440
|
+
const { data } = await (getWhisper3Transcript == null ? void 0 : getWhisper3Transcript({
|
|
2441
|
+
audioUrl: args.audioUrl,
|
|
2442
|
+
language: args.language
|
|
2443
|
+
}));
|
|
2444
|
+
return cleanHallucinations ? cleanHallucinatedTranscript(data) : data;
|
|
2445
|
+
} catch (error) {
|
|
2446
|
+
console.error("Error getting transcript from Whisper-3:", error);
|
|
2447
|
+
throw error;
|
|
2448
|
+
}
|
|
2449
|
+
}
|
|
2547
2450
|
if (model === "whisper") {
|
|
2548
2451
|
try {
|
|
2549
|
-
const { data } = await (
|
|
2452
|
+
const { data } = await (getWhisper1Transcript == null ? void 0 : getWhisper1Transcript({
|
|
2550
2453
|
audioUrl: args.audioUrl,
|
|
2551
2454
|
language: args.language
|
|
2552
2455
|
}));
|
|
2553
2456
|
return cleanHallucinations ? cleanHallucinatedTranscript(data) : data;
|
|
2554
2457
|
} catch (error) {
|
|
2555
|
-
console.error("Error getting transcript from Whisper:", error);
|
|
2458
|
+
console.error("Error getting transcript from Whisper-1:", error);
|
|
2556
2459
|
throw error;
|
|
2557
2460
|
}
|
|
2558
2461
|
}
|
|
@@ -2561,7 +2464,7 @@ async function getTranscript(model, args, cleanHallucinations = true) {
|
|
|
2561
2464
|
const { data } = await (getGeminiTranscript == null ? void 0 : getGeminiTranscript({
|
|
2562
2465
|
audioUrl: args.audioUrl,
|
|
2563
2466
|
targetLanguage: args.language,
|
|
2564
|
-
prompt: args.prompt
|
|
2467
|
+
prompt: (_i = args.prompt) != null ? _i : ""
|
|
2565
2468
|
}));
|
|
2566
2469
|
return cleanHallucinations ? cleanHallucinatedTranscript(data.transcript) : data.transcript;
|
|
2567
2470
|
} catch (error) {
|
|
@@ -2584,7 +2487,7 @@ async function getTranscript(model, args, cleanHallucinations = true) {
|
|
|
2584
2487
|
return null;
|
|
2585
2488
|
}
|
|
2586
2489
|
async function getTranscriptCycle(args) {
|
|
2587
|
-
const models = ["whisper", "gemini", "assemblyai"];
|
|
2490
|
+
const models = ["whisper-3", "whisper", "gemini", "assemblyai"];
|
|
2588
2491
|
let transcript = "";
|
|
2589
2492
|
let lastError = null;
|
|
2590
2493
|
for (const model of models) {
|