@speakableio/core 1.0.26 → 1.0.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.native.d.mts +20 -2
- package/dist/index.native.d.ts +20 -2
- package/dist/index.native.js +249 -4
- package/dist/index.native.js.map +1 -1
- package/dist/index.native.mjs +249 -4
- package/dist/index.native.mjs.map +1 -1
- package/dist/index.web.d.mts +20 -2
- package/dist/index.web.js +249 -4
- package/dist/index.web.js.map +1 -1
- package/package.json +1 -1
package/dist/index.native.d.mts
CHANGED
|
@@ -1525,7 +1525,15 @@ declare function getTranscript(model: 'gemini' | 'assemblyai' | 'whisper', args:
|
|
|
1525
1525
|
language: string;
|
|
1526
1526
|
audioUrl: string;
|
|
1527
1527
|
prompt?: string;
|
|
1528
|
-
}): Promise<string | null>;
|
|
1528
|
+
}, cleanHallucinations?: boolean): Promise<string | null>;
|
|
1529
|
+
declare function getTranscriptCycle(args: {
|
|
1530
|
+
audioUrl: string;
|
|
1531
|
+
language: string;
|
|
1532
|
+
prompt: string;
|
|
1533
|
+
}): Promise<{
|
|
1534
|
+
transcript: string;
|
|
1535
|
+
success: boolean;
|
|
1536
|
+
}>;
|
|
1529
1537
|
|
|
1530
1538
|
declare const getRespondCardTool: ({ language, standard, }: {
|
|
1531
1539
|
language: string;
|
|
@@ -2782,6 +2790,16 @@ declare function useSpeakableTranscript(): {
|
|
|
2782
2790
|
prompt?: string;
|
|
2783
2791
|
}, unknown>;
|
|
2784
2792
|
};
|
|
2793
|
+
declare function useSpeakableTranscriptCycle(): {
|
|
2794
|
+
mutationTranscriptCycle: _tanstack_react_query.UseMutationResult<{
|
|
2795
|
+
transcript: string;
|
|
2796
|
+
success: boolean;
|
|
2797
|
+
}, Error, {
|
|
2798
|
+
audioUrl: string;
|
|
2799
|
+
language: string;
|
|
2800
|
+
prompt: string;
|
|
2801
|
+
}, unknown>;
|
|
2802
|
+
};
|
|
2785
2803
|
|
|
2786
2804
|
declare const useUpdateStudentVocab: (page: PageActivityWithId | null) => {
|
|
2787
2805
|
studentVocabMarkVoiceSuccess: undefined;
|
|
@@ -3296,4 +3314,4 @@ declare const createFsClientNative: ({ db, httpsCallable, logEvent }: FsClientPa
|
|
|
3296
3314
|
};
|
|
3297
3315
|
};
|
|
3298
3316
|
|
|
3299
|
-
export { ActivityPageType, type Assignment, type AssignmentAnalyticsType$1 as AssignmentAnalyticsType, type AssignmentWithId, BASE_MULTIPLE_CHOICE_FIELD_VALUES, BASE_REPEAT_FIELD_VALUES, BASE_RESPOND_FIELD_VALUES, type CreditContract, FeedbackTypesCard, FsCtx, type InstitutionSubscription, LENIENCY_OPTIONS, LeniencyCard, MULTIPLE_CHOICE_PAGE_ACTIVITY_TYPES, type Organization, type OrganizationAccess, type PageActivity, type PageActivityWithId, type PageScore, REPEAT_PAGE_ACTIVITY_TYPES, RESPOND_AUDIO_PAGE_ACTIVITY_TYPES, RESPOND_PAGE_ACTIVITY_TYPES, RESPOND_WRITE_PAGE_ACTIVITY_TYPES, type RefsCardsFiresotre, type RefsSetsFirestore, SPEAKABLE_NOTIFICATIONS, STUDENT_LEVELS_OPTIONS, type Score, type ScoreWithId, type Set, type SetWithId, type SpeakableNotificationType, SpeakableNotificationTypes, SpeakableProvider, VerificationCardStatus, assignmentQueryKeys, cardsQueryKeys, checkIsMCPage, checkIsMediaPage, checkIsRepeatPage, checkIsRespondAudioPage, checkIsRespondPage, checkIsRespondWrittenPage, checkIsShortAnswerPage, checkTypePageActivity, cleanString, createAssignmentRepo, createCardRepo, createFsClientNative as createFsClient, createSetRepo, creditQueryKeys, debounce, getCardFromCache, getLabelPage, getPagePrompt, getPhraseLength, getRespondCardTool, getSetFromCache, getTotalCompletedCards, getTranscript, getWordHash, purify, refsCardsFiresotre, refsSetsFirestore, scoreQueryKeys, setsQueryKeys, updateCardInCache, updateSetInCache, useActivity, useActivityFeedbackAccess, useAssignment, useBaseOpenAI, useCards, useClearScore, useClearScoreV2, useCreateCard, useCreateCards, useCreateNotification, useGetCard, useOrganizationAccess, useScore, useSet, useSpeakableApi, useSpeakableTranscript, useSubmitAssignmentScore, useSubmitPracticeScore, useUpdateCardScore, useUpdateScore, useUpdateStudentVocab, useUserCredits };
|
|
3317
|
+
export { ActivityPageType, type Assignment, type AssignmentAnalyticsType$1 as AssignmentAnalyticsType, type AssignmentWithId, BASE_MULTIPLE_CHOICE_FIELD_VALUES, BASE_REPEAT_FIELD_VALUES, BASE_RESPOND_FIELD_VALUES, type CreditContract, FeedbackTypesCard, FsCtx, type InstitutionSubscription, LENIENCY_OPTIONS, LeniencyCard, MULTIPLE_CHOICE_PAGE_ACTIVITY_TYPES, type Organization, type OrganizationAccess, type PageActivity, type PageActivityWithId, type PageScore, REPEAT_PAGE_ACTIVITY_TYPES, RESPOND_AUDIO_PAGE_ACTIVITY_TYPES, RESPOND_PAGE_ACTIVITY_TYPES, RESPOND_WRITE_PAGE_ACTIVITY_TYPES, type RefsCardsFiresotre, type RefsSetsFirestore, SPEAKABLE_NOTIFICATIONS, STUDENT_LEVELS_OPTIONS, type Score, type ScoreWithId, type Set, type SetWithId, type SpeakableNotificationType, SpeakableNotificationTypes, SpeakableProvider, VerificationCardStatus, assignmentQueryKeys, cardsQueryKeys, checkIsMCPage, checkIsMediaPage, checkIsRepeatPage, checkIsRespondAudioPage, checkIsRespondPage, checkIsRespondWrittenPage, checkIsShortAnswerPage, checkTypePageActivity, cleanString, createAssignmentRepo, createCardRepo, createFsClientNative as createFsClient, createSetRepo, creditQueryKeys, debounce, getCardFromCache, getLabelPage, getPagePrompt, getPhraseLength, getRespondCardTool, getSetFromCache, getTotalCompletedCards, getTranscript, getTranscriptCycle, getWordHash, purify, refsCardsFiresotre, refsSetsFirestore, scoreQueryKeys, setsQueryKeys, updateCardInCache, updateSetInCache, useActivity, useActivityFeedbackAccess, useAssignment, useBaseOpenAI, useCards, useClearScore, useClearScoreV2, useCreateCard, useCreateCards, useCreateNotification, useGetCard, useOrganizationAccess, useScore, useSet, useSpeakableApi, useSpeakableTranscript, useSpeakableTranscriptCycle, useSubmitAssignmentScore, useSubmitPracticeScore, useUpdateCardScore, useUpdateScore, useUpdateStudentVocab, useUserCredits };
|
package/dist/index.native.d.ts
CHANGED
|
@@ -1525,7 +1525,15 @@ declare function getTranscript(model: 'gemini' | 'assemblyai' | 'whisper', args:
|
|
|
1525
1525
|
language: string;
|
|
1526
1526
|
audioUrl: string;
|
|
1527
1527
|
prompt?: string;
|
|
1528
|
-
}): Promise<string | null>;
|
|
1528
|
+
}, cleanHallucinations?: boolean): Promise<string | null>;
|
|
1529
|
+
declare function getTranscriptCycle(args: {
|
|
1530
|
+
audioUrl: string;
|
|
1531
|
+
language: string;
|
|
1532
|
+
prompt: string;
|
|
1533
|
+
}): Promise<{
|
|
1534
|
+
transcript: string;
|
|
1535
|
+
success: boolean;
|
|
1536
|
+
}>;
|
|
1529
1537
|
|
|
1530
1538
|
declare const getRespondCardTool: ({ language, standard, }: {
|
|
1531
1539
|
language: string;
|
|
@@ -2782,6 +2790,16 @@ declare function useSpeakableTranscript(): {
|
|
|
2782
2790
|
prompt?: string;
|
|
2783
2791
|
}, unknown>;
|
|
2784
2792
|
};
|
|
2793
|
+
declare function useSpeakableTranscriptCycle(): {
|
|
2794
|
+
mutationTranscriptCycle: _tanstack_react_query.UseMutationResult<{
|
|
2795
|
+
transcript: string;
|
|
2796
|
+
success: boolean;
|
|
2797
|
+
}, Error, {
|
|
2798
|
+
audioUrl: string;
|
|
2799
|
+
language: string;
|
|
2800
|
+
prompt: string;
|
|
2801
|
+
}, unknown>;
|
|
2802
|
+
};
|
|
2785
2803
|
|
|
2786
2804
|
declare const useUpdateStudentVocab: (page: PageActivityWithId | null) => {
|
|
2787
2805
|
studentVocabMarkVoiceSuccess: undefined;
|
|
@@ -3296,4 +3314,4 @@ declare const createFsClientNative: ({ db, httpsCallable, logEvent }: FsClientPa
|
|
|
3296
3314
|
};
|
|
3297
3315
|
};
|
|
3298
3316
|
|
|
3299
|
-
export { ActivityPageType, type Assignment, type AssignmentAnalyticsType$1 as AssignmentAnalyticsType, type AssignmentWithId, BASE_MULTIPLE_CHOICE_FIELD_VALUES, BASE_REPEAT_FIELD_VALUES, BASE_RESPOND_FIELD_VALUES, type CreditContract, FeedbackTypesCard, FsCtx, type InstitutionSubscription, LENIENCY_OPTIONS, LeniencyCard, MULTIPLE_CHOICE_PAGE_ACTIVITY_TYPES, type Organization, type OrganizationAccess, type PageActivity, type PageActivityWithId, type PageScore, REPEAT_PAGE_ACTIVITY_TYPES, RESPOND_AUDIO_PAGE_ACTIVITY_TYPES, RESPOND_PAGE_ACTIVITY_TYPES, RESPOND_WRITE_PAGE_ACTIVITY_TYPES, type RefsCardsFiresotre, type RefsSetsFirestore, SPEAKABLE_NOTIFICATIONS, STUDENT_LEVELS_OPTIONS, type Score, type ScoreWithId, type Set, type SetWithId, type SpeakableNotificationType, SpeakableNotificationTypes, SpeakableProvider, VerificationCardStatus, assignmentQueryKeys, cardsQueryKeys, checkIsMCPage, checkIsMediaPage, checkIsRepeatPage, checkIsRespondAudioPage, checkIsRespondPage, checkIsRespondWrittenPage, checkIsShortAnswerPage, checkTypePageActivity, cleanString, createAssignmentRepo, createCardRepo, createFsClientNative as createFsClient, createSetRepo, creditQueryKeys, debounce, getCardFromCache, getLabelPage, getPagePrompt, getPhraseLength, getRespondCardTool, getSetFromCache, getTotalCompletedCards, getTranscript, getWordHash, purify, refsCardsFiresotre, refsSetsFirestore, scoreQueryKeys, setsQueryKeys, updateCardInCache, updateSetInCache, useActivity, useActivityFeedbackAccess, useAssignment, useBaseOpenAI, useCards, useClearScore, useClearScoreV2, useCreateCard, useCreateCards, useCreateNotification, useGetCard, useOrganizationAccess, useScore, useSet, useSpeakableApi, useSpeakableTranscript, useSubmitAssignmentScore, useSubmitPracticeScore, useUpdateCardScore, useUpdateScore, useUpdateStudentVocab, useUserCredits };
|
|
3317
|
+
export { ActivityPageType, type Assignment, type AssignmentAnalyticsType$1 as AssignmentAnalyticsType, type AssignmentWithId, BASE_MULTIPLE_CHOICE_FIELD_VALUES, BASE_REPEAT_FIELD_VALUES, BASE_RESPOND_FIELD_VALUES, type CreditContract, FeedbackTypesCard, FsCtx, type InstitutionSubscription, LENIENCY_OPTIONS, LeniencyCard, MULTIPLE_CHOICE_PAGE_ACTIVITY_TYPES, type Organization, type OrganizationAccess, type PageActivity, type PageActivityWithId, type PageScore, REPEAT_PAGE_ACTIVITY_TYPES, RESPOND_AUDIO_PAGE_ACTIVITY_TYPES, RESPOND_PAGE_ACTIVITY_TYPES, RESPOND_WRITE_PAGE_ACTIVITY_TYPES, type RefsCardsFiresotre, type RefsSetsFirestore, SPEAKABLE_NOTIFICATIONS, STUDENT_LEVELS_OPTIONS, type Score, type ScoreWithId, type Set, type SetWithId, type SpeakableNotificationType, SpeakableNotificationTypes, SpeakableProvider, VerificationCardStatus, assignmentQueryKeys, cardsQueryKeys, checkIsMCPage, checkIsMediaPage, checkIsRepeatPage, checkIsRespondAudioPage, checkIsRespondPage, checkIsRespondWrittenPage, checkIsShortAnswerPage, checkTypePageActivity, cleanString, createAssignmentRepo, createCardRepo, createFsClientNative as createFsClient, createSetRepo, creditQueryKeys, debounce, getCardFromCache, getLabelPage, getPagePrompt, getPhraseLength, getRespondCardTool, getSetFromCache, getTotalCompletedCards, getTranscript, getTranscriptCycle, getWordHash, purify, refsCardsFiresotre, refsSetsFirestore, scoreQueryKeys, setsQueryKeys, updateCardInCache, updateSetInCache, useActivity, useActivityFeedbackAccess, useAssignment, useBaseOpenAI, useCards, useClearScore, useClearScoreV2, useCreateCard, useCreateCards, useCreateNotification, useGetCard, useOrganizationAccess, useScore, useSet, useSpeakableApi, useSpeakableTranscript, useSpeakableTranscriptCycle, useSubmitAssignmentScore, useSubmitPracticeScore, useUpdateCardScore, useUpdateScore, useUpdateStudentVocab, useUserCredits };
|
package/dist/index.native.js
CHANGED
|
@@ -73,6 +73,7 @@ __export(index_native_exports, {
|
|
|
73
73
|
getSetFromCache: () => getSetFromCache,
|
|
74
74
|
getTotalCompletedCards: () => getTotalCompletedCards,
|
|
75
75
|
getTranscript: () => getTranscript,
|
|
76
|
+
getTranscriptCycle: () => getTranscriptCycle,
|
|
76
77
|
getWordHash: () => getWordHash,
|
|
77
78
|
purify: () => purify,
|
|
78
79
|
refsCardsFiresotre: () => refsCardsFiresotre,
|
|
@@ -97,6 +98,7 @@ __export(index_native_exports, {
|
|
|
97
98
|
useSet: () => useSet,
|
|
98
99
|
useSpeakableApi: () => useSpeakableApi,
|
|
99
100
|
useSpeakableTranscript: () => useSpeakableTranscript,
|
|
101
|
+
useSpeakableTranscriptCycle: () => useSpeakableTranscriptCycle,
|
|
100
102
|
useSubmitAssignmentScore: () => useSubmitAssignmentScore,
|
|
101
103
|
useSubmitPracticeScore: () => useSubmitPracticeScore,
|
|
102
104
|
useUpdateCardScore: () => useUpdateCardScore,
|
|
@@ -2242,8 +2244,209 @@ var createSetRepo = () => {
|
|
|
2242
2244
|
};
|
|
2243
2245
|
};
|
|
2244
2246
|
|
|
2247
|
+
// src/utils/ai/detect-transcript-hallucionation.ts
|
|
2248
|
+
var HALLUCINATION_THRESHOLDS = {
|
|
2249
|
+
// Short repeats
|
|
2250
|
+
MIN_CONSECUTIVE_REPEATS: 3,
|
|
2251
|
+
MIN_WORDS_FOR_RATIO_CHECK: 10,
|
|
2252
|
+
MAX_UNIQUE_WORDS_FOR_RATIO: 3,
|
|
2253
|
+
MIN_REPETITION_RATIO: 3,
|
|
2254
|
+
// Phrase repeats
|
|
2255
|
+
MIN_SENTENCE_LENGTH: 10,
|
|
2256
|
+
MIN_CONSECUTIVE_SIMILAR_SENTENCES: 2,
|
|
2257
|
+
MIN_SENTENCES_FOR_DUPLICATE_CHECK: 3,
|
|
2258
|
+
// Cyclic patterns
|
|
2259
|
+
MIN_CYCLE_LENGTH: 20,
|
|
2260
|
+
MIN_CYCLE_REPEATS: 3,
|
|
2261
|
+
// Entropy detection
|
|
2262
|
+
MIN_LENGTH_FOR_ENTROPY_CHECK: 50,
|
|
2263
|
+
MAX_ENTROPY_THRESHOLD: 2.5,
|
|
2264
|
+
// bits per character
|
|
2265
|
+
// Similarity
|
|
2266
|
+
SENTENCE_SIMILARITY_THRESHOLD: 0.8,
|
|
2267
|
+
SEGMENT_SIMILARITY_THRESHOLD: 0.85
|
|
2268
|
+
};
|
|
2269
|
+
function detectTranscriptHallucinationWithDetails(transcript) {
|
|
2270
|
+
if (!transcript || transcript.trim().length === 0) {
|
|
2271
|
+
return { isHallucination: false };
|
|
2272
|
+
}
|
|
2273
|
+
const text = transcript.trim();
|
|
2274
|
+
if (text.length < 10) {
|
|
2275
|
+
return { isHallucination: false };
|
|
2276
|
+
}
|
|
2277
|
+
const shortRepeats = detectShortRepeats(text);
|
|
2278
|
+
if (shortRepeats) {
|
|
2279
|
+
return {
|
|
2280
|
+
isHallucination: true,
|
|
2281
|
+
reason: "Detected repeated short words or phrases",
|
|
2282
|
+
confidence: 0.9
|
|
2283
|
+
};
|
|
2284
|
+
}
|
|
2285
|
+
const phraseRepeats = detectPhraseRepeats(text);
|
|
2286
|
+
if (phraseRepeats) {
|
|
2287
|
+
return {
|
|
2288
|
+
isHallucination: true,
|
|
2289
|
+
reason: "Detected repeated sentences or phrases",
|
|
2290
|
+
confidence: 0.85
|
|
2291
|
+
};
|
|
2292
|
+
}
|
|
2293
|
+
const cyclicRepeats = detectCyclicPattern(text);
|
|
2294
|
+
if (cyclicRepeats) {
|
|
2295
|
+
return {
|
|
2296
|
+
isHallucination: true,
|
|
2297
|
+
reason: "Detected cyclic repetition pattern",
|
|
2298
|
+
confidence: 0.8
|
|
2299
|
+
};
|
|
2300
|
+
}
|
|
2301
|
+
if (text.length >= HALLUCINATION_THRESHOLDS.MIN_LENGTH_FOR_ENTROPY_CHECK) {
|
|
2302
|
+
const entropy = calculateEntropy(text);
|
|
2303
|
+
if (entropy < HALLUCINATION_THRESHOLDS.MAX_ENTROPY_THRESHOLD) {
|
|
2304
|
+
return {
|
|
2305
|
+
isHallucination: true,
|
|
2306
|
+
reason: "Detected low entropy (likely gibberish or excessive repetition)",
|
|
2307
|
+
confidence: 0.75
|
|
2308
|
+
};
|
|
2309
|
+
}
|
|
2310
|
+
}
|
|
2311
|
+
return { isHallucination: false };
|
|
2312
|
+
}
|
|
2313
|
+
function detectShortRepeats(text) {
|
|
2314
|
+
const words = text.toLowerCase().split(/[\s,;.!?]+/).filter((w) => w.length > 0);
|
|
2315
|
+
if (words.length < 4) return false;
|
|
2316
|
+
let repeatCount = 1;
|
|
2317
|
+
for (let i = 1; i < words.length; i++) {
|
|
2318
|
+
if (words[i] === words[i - 1]) {
|
|
2319
|
+
repeatCount++;
|
|
2320
|
+
if (repeatCount >= HALLUCINATION_THRESHOLDS.MIN_CONSECUTIVE_REPEATS) {
|
|
2321
|
+
return true;
|
|
2322
|
+
}
|
|
2323
|
+
} else {
|
|
2324
|
+
repeatCount = 1;
|
|
2325
|
+
}
|
|
2326
|
+
}
|
|
2327
|
+
const uniqueWords = new Set(words);
|
|
2328
|
+
const repetitionRatio = words.length / uniqueWords.size;
|
|
2329
|
+
if (words.length >= HALLUCINATION_THRESHOLDS.MIN_WORDS_FOR_RATIO_CHECK && uniqueWords.size <= HALLUCINATION_THRESHOLDS.MAX_UNIQUE_WORDS_FOR_RATIO && repetitionRatio >= HALLUCINATION_THRESHOLDS.MIN_REPETITION_RATIO) {
|
|
2330
|
+
return true;
|
|
2331
|
+
}
|
|
2332
|
+
return false;
|
|
2333
|
+
}
|
|
2334
|
+
function detectPhraseRepeats(text) {
|
|
2335
|
+
const sentences = text.split(/[.!?]+/).map((s) => s.trim().toLowerCase()).filter((s) => s.length > HALLUCINATION_THRESHOLDS.MIN_SENTENCE_LENGTH);
|
|
2336
|
+
if (sentences.length < 2) return false;
|
|
2337
|
+
for (let i = 0; i < sentences.length - 1; i++) {
|
|
2338
|
+
let consecutiveRepeats = 1;
|
|
2339
|
+
for (let j = i + 1; j < sentences.length; j++) {
|
|
2340
|
+
if (isSimilarSentence(sentences[i], sentences[j])) {
|
|
2341
|
+
consecutiveRepeats++;
|
|
2342
|
+
} else {
|
|
2343
|
+
break;
|
|
2344
|
+
}
|
|
2345
|
+
}
|
|
2346
|
+
if (consecutiveRepeats >= HALLUCINATION_THRESHOLDS.MIN_CONSECUTIVE_SIMILAR_SENTENCES) {
|
|
2347
|
+
return true;
|
|
2348
|
+
}
|
|
2349
|
+
}
|
|
2350
|
+
const uniqueSentences = new Set(sentences);
|
|
2351
|
+
if (sentences.length >= HALLUCINATION_THRESHOLDS.MIN_SENTENCES_FOR_DUPLICATE_CHECK && uniqueSentences.size === 1) {
|
|
2352
|
+
return true;
|
|
2353
|
+
}
|
|
2354
|
+
return false;
|
|
2355
|
+
}
|
|
2356
|
+
function isSimilarSentence(s1, s2, threshold = HALLUCINATION_THRESHOLDS.SENTENCE_SIMILARITY_THRESHOLD) {
|
|
2357
|
+
if (s1 === s2) return true;
|
|
2358
|
+
const normalized1 = s1.replace(/\s+/g, " ").trim();
|
|
2359
|
+
const normalized2 = s2.replace(/\s+/g, " ").trim();
|
|
2360
|
+
if (normalized1 === normalized2) return true;
|
|
2361
|
+
const words1 = normalized1.split(/\s+/);
|
|
2362
|
+
const words2 = normalized2.split(/\s+/);
|
|
2363
|
+
if (Math.abs(words1.length - words2.length) > 2) return false;
|
|
2364
|
+
const set1 = new Set(words1);
|
|
2365
|
+
const set2 = new Set(words2);
|
|
2366
|
+
const intersection = new Set([...set1].filter((w) => set2.has(w)));
|
|
2367
|
+
const similarity = intersection.size * 2 / (set1.size + set2.size);
|
|
2368
|
+
return similarity >= threshold;
|
|
2369
|
+
}
|
|
2370
|
+
function detectCyclicPattern(text) {
|
|
2371
|
+
const normalized = text.toLowerCase().replace(/\s+/g, " ").trim();
|
|
2372
|
+
const length = normalized.length;
|
|
2373
|
+
const minCycleLength = HALLUCINATION_THRESHOLDS.MIN_CYCLE_LENGTH;
|
|
2374
|
+
const maxCycleLength = Math.floor(length / 2);
|
|
2375
|
+
if (maxCycleLength < minCycleLength) return false;
|
|
2376
|
+
const step = 5;
|
|
2377
|
+
for (let cycleLen = minCycleLength; cycleLen <= maxCycleLength; cycleLen += step) {
|
|
2378
|
+
const pattern = normalized.substring(0, cycleLen);
|
|
2379
|
+
let matchCount = 0;
|
|
2380
|
+
let pos = 0;
|
|
2381
|
+
while (pos < length) {
|
|
2382
|
+
const segment = normalized.substring(pos, pos + cycleLen);
|
|
2383
|
+
if (segment.length < cycleLen) {
|
|
2384
|
+
const partialMatch = pattern.startsWith(segment);
|
|
2385
|
+
if (partialMatch && matchCount > 0) {
|
|
2386
|
+
matchCount++;
|
|
2387
|
+
}
|
|
2388
|
+
break;
|
|
2389
|
+
}
|
|
2390
|
+
if (segment === pattern || isSegmentSimilar(segment, pattern)) {
|
|
2391
|
+
matchCount++;
|
|
2392
|
+
pos += cycleLen;
|
|
2393
|
+
} else {
|
|
2394
|
+
break;
|
|
2395
|
+
}
|
|
2396
|
+
}
|
|
2397
|
+
if (matchCount >= HALLUCINATION_THRESHOLDS.MIN_CYCLE_REPEATS) {
|
|
2398
|
+
return true;
|
|
2399
|
+
}
|
|
2400
|
+
}
|
|
2401
|
+
return false;
|
|
2402
|
+
}
|
|
2403
|
+
function isSegmentSimilar(s1, s2) {
|
|
2404
|
+
if (s1 === s2) return true;
|
|
2405
|
+
if (s1.length !== s2.length) return false;
|
|
2406
|
+
let matches = 0;
|
|
2407
|
+
const minLength = Math.min(s1.length, s2.length);
|
|
2408
|
+
for (let i = 0; i < minLength; i++) {
|
|
2409
|
+
if (s1[i] === s2[i]) {
|
|
2410
|
+
matches++;
|
|
2411
|
+
}
|
|
2412
|
+
}
|
|
2413
|
+
const similarity = matches / minLength;
|
|
2414
|
+
return similarity >= HALLUCINATION_THRESHOLDS.SEGMENT_SIMILARITY_THRESHOLD;
|
|
2415
|
+
}
|
|
2416
|
+
function calculateEntropy(text) {
|
|
2417
|
+
if (!text || text.length === 0) {
|
|
2418
|
+
return 0;
|
|
2419
|
+
}
|
|
2420
|
+
const frequencies = /* @__PURE__ */ new Map();
|
|
2421
|
+
for (const char of text.toLowerCase()) {
|
|
2422
|
+
frequencies.set(char, (frequencies.get(char) || 0) + 1);
|
|
2423
|
+
}
|
|
2424
|
+
let entropy = 0;
|
|
2425
|
+
const length = text.length;
|
|
2426
|
+
for (const count of frequencies.values()) {
|
|
2427
|
+
const probability = count / length;
|
|
2428
|
+
entropy -= probability * Math.log2(probability);
|
|
2429
|
+
}
|
|
2430
|
+
return entropy;
|
|
2431
|
+
}
|
|
2432
|
+
function cleanHallucinatedTranscript(transcript) {
|
|
2433
|
+
var _a, _b;
|
|
2434
|
+
const result = detectTranscriptHallucinationWithDetails(transcript);
|
|
2435
|
+
if (result.isHallucination) {
|
|
2436
|
+
console.warn(
|
|
2437
|
+
"Hallucinated transcript detected and removed:",
|
|
2438
|
+
transcript.substring(0, 100),
|
|
2439
|
+
`
|
|
2440
|
+
Reason: ${(_a = result.reason) != null ? _a : "Unknown"}`,
|
|
2441
|
+
`Confidence: ${String((_b = result.confidence) != null ? _b : "Unknown")}`
|
|
2442
|
+
);
|
|
2443
|
+
return "";
|
|
2444
|
+
}
|
|
2445
|
+
return transcript;
|
|
2446
|
+
}
|
|
2447
|
+
|
|
2245
2448
|
// src/utils/ai/get-transcript.ts
|
|
2246
|
-
async function getTranscript(model, args) {
|
|
2449
|
+
async function getTranscript(model, args, cleanHallucinations = true) {
|
|
2247
2450
|
var _a, _b, _c, _d, _e, _f;
|
|
2248
2451
|
const getGeminiTranscript = (_b = (_a = api).httpsCallable) == null ? void 0 : _b.call(_a, "getGeminiTranscript");
|
|
2249
2452
|
const getAssemblyAITranscript = (_d = (_c = api).httpsCallable) == null ? void 0 : _d.call(_c, "transcribeAssemblyAIAudio");
|
|
@@ -2254,7 +2457,7 @@ async function getTranscript(model, args) {
|
|
|
2254
2457
|
audioUrl: args.audioUrl,
|
|
2255
2458
|
language: args.language
|
|
2256
2459
|
}));
|
|
2257
|
-
return data;
|
|
2460
|
+
return cleanHallucinations ? cleanHallucinatedTranscript(data) : data;
|
|
2258
2461
|
} catch (error) {
|
|
2259
2462
|
console.error("Error getting transcript from Whisper:", error);
|
|
2260
2463
|
throw error;
|
|
@@ -2267,7 +2470,7 @@ async function getTranscript(model, args) {
|
|
|
2267
2470
|
targetLanguage: args.language,
|
|
2268
2471
|
prompt: args.prompt
|
|
2269
2472
|
}));
|
|
2270
|
-
return data.transcript;
|
|
2473
|
+
return cleanHallucinations ? cleanHallucinatedTranscript(data.transcript) : data.transcript;
|
|
2271
2474
|
} catch (error) {
|
|
2272
2475
|
console.error("Error getting transcript from Gemini:", error);
|
|
2273
2476
|
throw error;
|
|
@@ -2279,7 +2482,7 @@ async function getTranscript(model, args) {
|
|
|
2279
2482
|
audioUrl: args.audioUrl,
|
|
2280
2483
|
language: args.language
|
|
2281
2484
|
}));
|
|
2282
|
-
return response.data;
|
|
2485
|
+
return cleanHallucinations ? cleanHallucinatedTranscript(response.data) : response.data;
|
|
2283
2486
|
} catch (error) {
|
|
2284
2487
|
console.error("Error getting transcript from AssemblyAI:", error);
|
|
2285
2488
|
throw error;
|
|
@@ -2287,6 +2490,37 @@ async function getTranscript(model, args) {
|
|
|
2287
2490
|
}
|
|
2288
2491
|
return null;
|
|
2289
2492
|
}
|
|
2493
|
+
async function getTranscriptCycle(args) {
|
|
2494
|
+
const models = ["whisper", "gemini", "assemblyai"];
|
|
2495
|
+
let transcript = "";
|
|
2496
|
+
let lastError = null;
|
|
2497
|
+
for (const model of models) {
|
|
2498
|
+
try {
|
|
2499
|
+
const transcriptResult = await getTranscript(model, args, false);
|
|
2500
|
+
const rawTranscript = transcriptResult || "";
|
|
2501
|
+
transcript = cleanHallucinatedTranscript(rawTranscript);
|
|
2502
|
+
if (transcript !== "") {
|
|
2503
|
+
console.log(`Successfully got transcript from ${model}`);
|
|
2504
|
+
break;
|
|
2505
|
+
}
|
|
2506
|
+
console.warn(`${model} returned empty transcript, trying next model`);
|
|
2507
|
+
} catch (e) {
|
|
2508
|
+
console.error(`Error with ${model} transcript:`, e);
|
|
2509
|
+
lastError = e;
|
|
2510
|
+
}
|
|
2511
|
+
}
|
|
2512
|
+
if (transcript === "") {
|
|
2513
|
+
console.error("All transcript models failed or returned empty", lastError);
|
|
2514
|
+
return {
|
|
2515
|
+
transcript: "",
|
|
2516
|
+
success: false
|
|
2517
|
+
};
|
|
2518
|
+
}
|
|
2519
|
+
return {
|
|
2520
|
+
transcript,
|
|
2521
|
+
success: true
|
|
2522
|
+
};
|
|
2523
|
+
}
|
|
2290
2524
|
|
|
2291
2525
|
// src/constants/all-langs.json
|
|
2292
2526
|
var all_langs_default = {
|
|
@@ -3033,6 +3267,17 @@ function useSpeakableTranscript() {
|
|
|
3033
3267
|
mutation
|
|
3034
3268
|
};
|
|
3035
3269
|
}
|
|
3270
|
+
function useSpeakableTranscriptCycle() {
|
|
3271
|
+
const mutation = (0, import_react_query7.useMutation)({
|
|
3272
|
+
mutationFn: async (args) => {
|
|
3273
|
+
return getTranscriptCycle(args);
|
|
3274
|
+
},
|
|
3275
|
+
retry: false
|
|
3276
|
+
});
|
|
3277
|
+
return {
|
|
3278
|
+
mutationTranscriptCycle: mutation
|
|
3279
|
+
};
|
|
3280
|
+
}
|
|
3036
3281
|
|
|
3037
3282
|
// src/hooks/useUpdateStudentVoc.ts
|
|
3038
3283
|
var useUpdateStudentVocab = (page) => {
|