@speakableio/core 1.0.26 → 1.0.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.native.d.mts +76 -3
- package/dist/index.native.d.ts +76 -3
- package/dist/index.native.js +249 -4
- package/dist/index.native.js.map +1 -1
- package/dist/index.native.mjs +249 -4
- package/dist/index.native.mjs.map +1 -1
- package/dist/index.web.d.mts +76 -3
- package/dist/index.web.js +249 -4
- package/dist/index.web.js.map +1 -1
- package/package.json +1 -1
package/dist/index.web.d.mts
CHANGED
|
@@ -93,6 +93,11 @@ interface PageActivity {
|
|
|
93
93
|
completed?: boolean;
|
|
94
94
|
media_area_id?: string | null;
|
|
95
95
|
media_area_layout?: 'left' | 'right' | null;
|
|
96
|
+
media_mode?: 'single' | 'media_area' | 'none' | null;
|
|
97
|
+
media?: {
|
|
98
|
+
type: 'image' | 'video';
|
|
99
|
+
url: string;
|
|
100
|
+
} | null;
|
|
96
101
|
score?: number;
|
|
97
102
|
verificationStatus?: VerificationCardStatus;
|
|
98
103
|
native_text?: string;
|
|
@@ -191,6 +196,11 @@ declare function useCards({ cardIds, enabled, asObject, }: {
|
|
|
191
196
|
completed?: boolean;
|
|
192
197
|
media_area_id?: string | null;
|
|
193
198
|
media_area_layout?: "left" | "right" | null;
|
|
199
|
+
media_mode?: "single" | "media_area" | "none" | null;
|
|
200
|
+
media?: {
|
|
201
|
+
type: "image" | "video";
|
|
202
|
+
url: string;
|
|
203
|
+
} | null;
|
|
194
204
|
score?: number;
|
|
195
205
|
verificationStatus?: VerificationCardStatus;
|
|
196
206
|
native_text?: string;
|
|
@@ -266,6 +276,11 @@ declare function useCreateCards(): {
|
|
|
266
276
|
completed?: boolean;
|
|
267
277
|
media_area_id?: string | null;
|
|
268
278
|
media_area_layout?: "left" | "right" | null;
|
|
279
|
+
media_mode?: "single" | "media_area" | "none" | null;
|
|
280
|
+
media?: {
|
|
281
|
+
type: "image" | "video";
|
|
282
|
+
url: string;
|
|
283
|
+
} | null;
|
|
269
284
|
score?: number;
|
|
270
285
|
verificationStatus?: VerificationCardStatus;
|
|
271
286
|
native_text?: string;
|
|
@@ -349,6 +364,11 @@ declare function useGetCard({ cardId, enabled }: {
|
|
|
349
364
|
completed?: boolean;
|
|
350
365
|
media_area_id?: string | null;
|
|
351
366
|
media_area_layout?: "left" | "right" | null;
|
|
367
|
+
media_mode?: "single" | "media_area" | "none" | null;
|
|
368
|
+
media?: {
|
|
369
|
+
type: "image" | "video";
|
|
370
|
+
url: string;
|
|
371
|
+
} | null;
|
|
352
372
|
score?: number;
|
|
353
373
|
verificationStatus?: VerificationCardStatus;
|
|
354
374
|
native_text?: string;
|
|
@@ -424,6 +444,11 @@ declare const createCardRepo: () => {
|
|
|
424
444
|
completed?: boolean;
|
|
425
445
|
media_area_id?: string | null;
|
|
426
446
|
media_area_layout?: "left" | "right" | null;
|
|
447
|
+
media_mode?: "single" | "media_area" | "none" | null;
|
|
448
|
+
media?: {
|
|
449
|
+
type: "image" | "video";
|
|
450
|
+
url: string;
|
|
451
|
+
} | null;
|
|
427
452
|
score?: number;
|
|
428
453
|
verificationStatus?: VerificationCardStatus;
|
|
429
454
|
native_text?: string;
|
|
@@ -494,6 +519,11 @@ declare const createCardRepo: () => {
|
|
|
494
519
|
completed?: boolean;
|
|
495
520
|
media_area_id?: string | null;
|
|
496
521
|
media_area_layout?: "left" | "right" | null;
|
|
522
|
+
media_mode?: "single" | "media_area" | "none" | null;
|
|
523
|
+
media?: {
|
|
524
|
+
type: "image" | "video";
|
|
525
|
+
url: string;
|
|
526
|
+
} | null;
|
|
497
527
|
score?: number;
|
|
498
528
|
verificationStatus?: VerificationCardStatus;
|
|
499
529
|
native_text?: string;
|
|
@@ -1318,6 +1348,11 @@ declare function createFsClientBase({ db, helpers, httpsCallable, logEvent, }: {
|
|
|
1318
1348
|
completed?: boolean;
|
|
1319
1349
|
media_area_id?: string | null;
|
|
1320
1350
|
media_area_layout?: "left" | "right" | null;
|
|
1351
|
+
media_mode?: "single" | "media_area" | "none" | null;
|
|
1352
|
+
media?: {
|
|
1353
|
+
type: "image" | "video";
|
|
1354
|
+
url: string;
|
|
1355
|
+
} | null;
|
|
1321
1356
|
score?: number;
|
|
1322
1357
|
verificationStatus?: VerificationCardStatus;
|
|
1323
1358
|
native_text?: string;
|
|
@@ -1388,6 +1423,11 @@ declare function createFsClientBase({ db, helpers, httpsCallable, logEvent, }: {
|
|
|
1388
1423
|
completed?: boolean;
|
|
1389
1424
|
media_area_id?: string | null;
|
|
1390
1425
|
media_area_layout?: "left" | "right" | null;
|
|
1426
|
+
media_mode?: "single" | "media_area" | "none" | null;
|
|
1427
|
+
media?: {
|
|
1428
|
+
type: "image" | "video";
|
|
1429
|
+
url: string;
|
|
1430
|
+
} | null;
|
|
1391
1431
|
score?: number;
|
|
1392
1432
|
verificationStatus?: VerificationCardStatus;
|
|
1393
1433
|
native_text?: string;
|
|
@@ -1525,7 +1565,15 @@ declare function getTranscript(model: 'gemini' | 'assemblyai' | 'whisper', args:
|
|
|
1525
1565
|
language: string;
|
|
1526
1566
|
audioUrl: string;
|
|
1527
1567
|
prompt?: string;
|
|
1528
|
-
}): Promise<string | null>;
|
|
1568
|
+
}, cleanHallucinations?: boolean): Promise<string | null>;
|
|
1569
|
+
declare function getTranscriptCycle(args: {
|
|
1570
|
+
audioUrl: string;
|
|
1571
|
+
language: string;
|
|
1572
|
+
prompt: string;
|
|
1573
|
+
}): Promise<{
|
|
1574
|
+
transcript: string;
|
|
1575
|
+
success: boolean;
|
|
1576
|
+
}>;
|
|
1529
1577
|
|
|
1530
1578
|
declare const getRespondCardTool: ({ language, standard, }: {
|
|
1531
1579
|
language: string;
|
|
@@ -1633,6 +1681,11 @@ declare function useActivity({ id, isAssignment, onAssignmentSubmitted, ltiData,
|
|
|
1633
1681
|
completed?: boolean;
|
|
1634
1682
|
media_area_id?: string | null;
|
|
1635
1683
|
media_area_layout?: "left" | "right" | null;
|
|
1684
|
+
media_mode?: "single" | "media_area" | "none" | null;
|
|
1685
|
+
media?: {
|
|
1686
|
+
type: "image" | "video";
|
|
1687
|
+
url: string;
|
|
1688
|
+
} | null;
|
|
1636
1689
|
score?: number;
|
|
1637
1690
|
verificationStatus?: VerificationCardStatus;
|
|
1638
1691
|
native_text?: string;
|
|
@@ -2782,6 +2835,16 @@ declare function useSpeakableTranscript(): {
|
|
|
2782
2835
|
prompt?: string;
|
|
2783
2836
|
}, unknown>;
|
|
2784
2837
|
};
|
|
2838
|
+
declare function useSpeakableTranscriptCycle(): {
|
|
2839
|
+
mutationTranscriptCycle: _tanstack_react_query.UseMutationResult<{
|
|
2840
|
+
transcript: string;
|
|
2841
|
+
success: boolean;
|
|
2842
|
+
}, Error, {
|
|
2843
|
+
audioUrl: string;
|
|
2844
|
+
language: string;
|
|
2845
|
+
prompt: string;
|
|
2846
|
+
}, unknown>;
|
|
2847
|
+
};
|
|
2785
2848
|
|
|
2786
2849
|
declare const useUpdateStudentVocab: (page: PageActivityWithId | null) => {
|
|
2787
2850
|
studentVocabMarkVoiceSuccess: undefined;
|
|
@@ -2838,7 +2901,7 @@ declare const useBaseOpenAI: ({ onTranscriptSuccess, onTranscriptError, onComple
|
|
|
2838
2901
|
noFeedbackAvailable: boolean;
|
|
2839
2902
|
success: boolean;
|
|
2840
2903
|
reason: string;
|
|
2841
|
-
accessType: "
|
|
2904
|
+
accessType: "none" | "ai_enabled" | "teacher_preview" | "student_with_teacher_plan";
|
|
2842
2905
|
} | {
|
|
2843
2906
|
noFeedbackAvailable: boolean;
|
|
2844
2907
|
success: boolean;
|
|
@@ -3164,6 +3227,11 @@ declare const createFsClientWeb: ({ db, httpsCallable, logEvent }: FsClientParam
|
|
|
3164
3227
|
completed?: boolean;
|
|
3165
3228
|
media_area_id?: string | null;
|
|
3166
3229
|
media_area_layout?: "left" | "right" | null;
|
|
3230
|
+
media_mode?: "single" | "media_area" | "none" | null;
|
|
3231
|
+
media?: {
|
|
3232
|
+
type: "image" | "video";
|
|
3233
|
+
url: string;
|
|
3234
|
+
} | null;
|
|
3167
3235
|
score?: number;
|
|
3168
3236
|
verificationStatus?: VerificationCardStatus;
|
|
3169
3237
|
native_text?: string;
|
|
@@ -3234,6 +3302,11 @@ declare const createFsClientWeb: ({ db, httpsCallable, logEvent }: FsClientParam
|
|
|
3234
3302
|
completed?: boolean;
|
|
3235
3303
|
media_area_id?: string | null;
|
|
3236
3304
|
media_area_layout?: "left" | "right" | null;
|
|
3305
|
+
media_mode?: "single" | "media_area" | "none" | null;
|
|
3306
|
+
media?: {
|
|
3307
|
+
type: "image" | "video";
|
|
3308
|
+
url: string;
|
|
3309
|
+
} | null;
|
|
3237
3310
|
score?: number;
|
|
3238
3311
|
verificationStatus?: VerificationCardStatus;
|
|
3239
3312
|
native_text?: string;
|
|
@@ -3296,4 +3369,4 @@ declare const createFsClientWeb: ({ db, httpsCallable, logEvent }: FsClientParam
|
|
|
3296
3369
|
};
|
|
3297
3370
|
};
|
|
3298
3371
|
|
|
3299
|
-
export { ActivityPageType, type Assignment, type AssignmentAnalyticsType$1 as AssignmentAnalyticsType, type AssignmentWithId, BASE_MULTIPLE_CHOICE_FIELD_VALUES, BASE_REPEAT_FIELD_VALUES, BASE_RESPOND_FIELD_VALUES, type CreditContract, FeedbackTypesCard, FsCtx, type InstitutionSubscription, LENIENCY_OPTIONS, LeniencyCard, MULTIPLE_CHOICE_PAGE_ACTIVITY_TYPES, type Organization, type OrganizationAccess, type PageActivity, type PageActivityWithId, type PageScore, REPEAT_PAGE_ACTIVITY_TYPES, RESPOND_AUDIO_PAGE_ACTIVITY_TYPES, RESPOND_PAGE_ACTIVITY_TYPES, RESPOND_WRITE_PAGE_ACTIVITY_TYPES, type RefsCardsFiresotre, type RefsSetsFirestore, SPEAKABLE_NOTIFICATIONS, STUDENT_LEVELS_OPTIONS, type Score, type ScoreWithId, type Set, type SetWithId, type SpeakableNotificationType, SpeakableNotificationTypes, SpeakableProvider, VerificationCardStatus, assignmentQueryKeys, cardsQueryKeys, checkIsMCPage, checkIsMediaPage, checkIsRepeatPage, checkIsRespondAudioPage, checkIsRespondPage, checkIsRespondWrittenPage, checkIsShortAnswerPage, checkTypePageActivity, cleanString, createAssignmentRepo, createCardRepo, createFsClientWeb as createFsClient, createSetRepo, creditQueryKeys, debounce, getCardFromCache, getLabelPage, getPagePrompt, getPhraseLength, getRespondCardTool, getSetFromCache, getTotalCompletedCards, getTranscript, getWordHash, purify, refsCardsFiresotre, refsSetsFirestore, scoreQueryKeys, setsQueryKeys, updateCardInCache, updateSetInCache, useActivity, useActivityFeedbackAccess, useAssignment, useBaseOpenAI, useCards, useClearScore, useClearScoreV2, useCreateCard, useCreateCards, useCreateNotification, useGetCard, useOrganizationAccess, useScore, useSet, useSpeakableApi, useSpeakableTranscript, useSubmitAssignmentScore, useSubmitPracticeScore, useUpdateCardScore, useUpdateScore, useUpdateStudentVocab, useUserCredits };
|
|
3372
|
+
export { ActivityPageType, type Assignment, type AssignmentAnalyticsType$1 as AssignmentAnalyticsType, type AssignmentWithId, BASE_MULTIPLE_CHOICE_FIELD_VALUES, BASE_REPEAT_FIELD_VALUES, BASE_RESPOND_FIELD_VALUES, type CreditContract, FeedbackTypesCard, FsCtx, type InstitutionSubscription, LENIENCY_OPTIONS, LeniencyCard, MULTIPLE_CHOICE_PAGE_ACTIVITY_TYPES, type Organization, type OrganizationAccess, type PageActivity, type PageActivityWithId, type PageScore, REPEAT_PAGE_ACTIVITY_TYPES, RESPOND_AUDIO_PAGE_ACTIVITY_TYPES, RESPOND_PAGE_ACTIVITY_TYPES, RESPOND_WRITE_PAGE_ACTIVITY_TYPES, type RefsCardsFiresotre, type RefsSetsFirestore, SPEAKABLE_NOTIFICATIONS, STUDENT_LEVELS_OPTIONS, type Score, type ScoreWithId, type Set, type SetWithId, type SpeakableNotificationType, SpeakableNotificationTypes, SpeakableProvider, VerificationCardStatus, assignmentQueryKeys, cardsQueryKeys, checkIsMCPage, checkIsMediaPage, checkIsRepeatPage, checkIsRespondAudioPage, checkIsRespondPage, checkIsRespondWrittenPage, checkIsShortAnswerPage, checkTypePageActivity, cleanString, createAssignmentRepo, createCardRepo, createFsClientWeb as createFsClient, createSetRepo, creditQueryKeys, debounce, getCardFromCache, getLabelPage, getPagePrompt, getPhraseLength, getRespondCardTool, getSetFromCache, getTotalCompletedCards, getTranscript, getTranscriptCycle, getWordHash, purify, refsCardsFiresotre, refsSetsFirestore, scoreQueryKeys, setsQueryKeys, updateCardInCache, updateSetInCache, useActivity, useActivityFeedbackAccess, useAssignment, useBaseOpenAI, useCards, useClearScore, useClearScoreV2, useCreateCard, useCreateCards, useCreateNotification, useGetCard, useOrganizationAccess, useScore, useSet, useSpeakableApi, useSpeakableTranscript, useSpeakableTranscriptCycle, useSubmitAssignmentScore, useSubmitPracticeScore, useUpdateCardScore, useUpdateScore, useUpdateStudentVocab, useUserCredits };
|
package/dist/index.web.js
CHANGED
|
@@ -2134,8 +2134,209 @@ var createSetRepo = () => {
|
|
|
2134
2134
|
};
|
|
2135
2135
|
};
|
|
2136
2136
|
|
|
2137
|
+
// src/utils/ai/detect-transcript-hallucionation.ts
|
|
2138
|
+
var HALLUCINATION_THRESHOLDS = {
|
|
2139
|
+
// Short repeats
|
|
2140
|
+
MIN_CONSECUTIVE_REPEATS: 3,
|
|
2141
|
+
MIN_WORDS_FOR_RATIO_CHECK: 10,
|
|
2142
|
+
MAX_UNIQUE_WORDS_FOR_RATIO: 3,
|
|
2143
|
+
MIN_REPETITION_RATIO: 3,
|
|
2144
|
+
// Phrase repeats
|
|
2145
|
+
MIN_SENTENCE_LENGTH: 10,
|
|
2146
|
+
MIN_CONSECUTIVE_SIMILAR_SENTENCES: 2,
|
|
2147
|
+
MIN_SENTENCES_FOR_DUPLICATE_CHECK: 3,
|
|
2148
|
+
// Cyclic patterns
|
|
2149
|
+
MIN_CYCLE_LENGTH: 20,
|
|
2150
|
+
MIN_CYCLE_REPEATS: 3,
|
|
2151
|
+
// Entropy detection
|
|
2152
|
+
MIN_LENGTH_FOR_ENTROPY_CHECK: 50,
|
|
2153
|
+
MAX_ENTROPY_THRESHOLD: 2.5,
|
|
2154
|
+
// bits per character
|
|
2155
|
+
// Similarity
|
|
2156
|
+
SENTENCE_SIMILARITY_THRESHOLD: 0.8,
|
|
2157
|
+
SEGMENT_SIMILARITY_THRESHOLD: 0.85
|
|
2158
|
+
};
|
|
2159
|
+
function detectTranscriptHallucinationWithDetails(transcript) {
|
|
2160
|
+
if (!transcript || transcript.trim().length === 0) {
|
|
2161
|
+
return { isHallucination: false };
|
|
2162
|
+
}
|
|
2163
|
+
const text = transcript.trim();
|
|
2164
|
+
if (text.length < 10) {
|
|
2165
|
+
return { isHallucination: false };
|
|
2166
|
+
}
|
|
2167
|
+
const shortRepeats = detectShortRepeats(text);
|
|
2168
|
+
if (shortRepeats) {
|
|
2169
|
+
return {
|
|
2170
|
+
isHallucination: true,
|
|
2171
|
+
reason: "Detected repeated short words or phrases",
|
|
2172
|
+
confidence: 0.9
|
|
2173
|
+
};
|
|
2174
|
+
}
|
|
2175
|
+
const phraseRepeats = detectPhraseRepeats(text);
|
|
2176
|
+
if (phraseRepeats) {
|
|
2177
|
+
return {
|
|
2178
|
+
isHallucination: true,
|
|
2179
|
+
reason: "Detected repeated sentences or phrases",
|
|
2180
|
+
confidence: 0.85
|
|
2181
|
+
};
|
|
2182
|
+
}
|
|
2183
|
+
const cyclicRepeats = detectCyclicPattern(text);
|
|
2184
|
+
if (cyclicRepeats) {
|
|
2185
|
+
return {
|
|
2186
|
+
isHallucination: true,
|
|
2187
|
+
reason: "Detected cyclic repetition pattern",
|
|
2188
|
+
confidence: 0.8
|
|
2189
|
+
};
|
|
2190
|
+
}
|
|
2191
|
+
if (text.length >= HALLUCINATION_THRESHOLDS.MIN_LENGTH_FOR_ENTROPY_CHECK) {
|
|
2192
|
+
const entropy = calculateEntropy(text);
|
|
2193
|
+
if (entropy < HALLUCINATION_THRESHOLDS.MAX_ENTROPY_THRESHOLD) {
|
|
2194
|
+
return {
|
|
2195
|
+
isHallucination: true,
|
|
2196
|
+
reason: "Detected low entropy (likely gibberish or excessive repetition)",
|
|
2197
|
+
confidence: 0.75
|
|
2198
|
+
};
|
|
2199
|
+
}
|
|
2200
|
+
}
|
|
2201
|
+
return { isHallucination: false };
|
|
2202
|
+
}
|
|
2203
|
+
function detectShortRepeats(text) {
|
|
2204
|
+
const words = text.toLowerCase().split(/[\s,;.!?]+/).filter((w) => w.length > 0);
|
|
2205
|
+
if (words.length < 4) return false;
|
|
2206
|
+
let repeatCount = 1;
|
|
2207
|
+
for (let i = 1; i < words.length; i++) {
|
|
2208
|
+
if (words[i] === words[i - 1]) {
|
|
2209
|
+
repeatCount++;
|
|
2210
|
+
if (repeatCount >= HALLUCINATION_THRESHOLDS.MIN_CONSECUTIVE_REPEATS) {
|
|
2211
|
+
return true;
|
|
2212
|
+
}
|
|
2213
|
+
} else {
|
|
2214
|
+
repeatCount = 1;
|
|
2215
|
+
}
|
|
2216
|
+
}
|
|
2217
|
+
const uniqueWords = new Set(words);
|
|
2218
|
+
const repetitionRatio = words.length / uniqueWords.size;
|
|
2219
|
+
if (words.length >= HALLUCINATION_THRESHOLDS.MIN_WORDS_FOR_RATIO_CHECK && uniqueWords.size <= HALLUCINATION_THRESHOLDS.MAX_UNIQUE_WORDS_FOR_RATIO && repetitionRatio >= HALLUCINATION_THRESHOLDS.MIN_REPETITION_RATIO) {
|
|
2220
|
+
return true;
|
|
2221
|
+
}
|
|
2222
|
+
return false;
|
|
2223
|
+
}
|
|
2224
|
+
function detectPhraseRepeats(text) {
|
|
2225
|
+
const sentences = text.split(/[.!?]+/).map((s) => s.trim().toLowerCase()).filter((s) => s.length > HALLUCINATION_THRESHOLDS.MIN_SENTENCE_LENGTH);
|
|
2226
|
+
if (sentences.length < 2) return false;
|
|
2227
|
+
for (let i = 0; i < sentences.length - 1; i++) {
|
|
2228
|
+
let consecutiveRepeats = 1;
|
|
2229
|
+
for (let j = i + 1; j < sentences.length; j++) {
|
|
2230
|
+
if (isSimilarSentence(sentences[i], sentences[j])) {
|
|
2231
|
+
consecutiveRepeats++;
|
|
2232
|
+
} else {
|
|
2233
|
+
break;
|
|
2234
|
+
}
|
|
2235
|
+
}
|
|
2236
|
+
if (consecutiveRepeats >= HALLUCINATION_THRESHOLDS.MIN_CONSECUTIVE_SIMILAR_SENTENCES) {
|
|
2237
|
+
return true;
|
|
2238
|
+
}
|
|
2239
|
+
}
|
|
2240
|
+
const uniqueSentences = new Set(sentences);
|
|
2241
|
+
if (sentences.length >= HALLUCINATION_THRESHOLDS.MIN_SENTENCES_FOR_DUPLICATE_CHECK && uniqueSentences.size === 1) {
|
|
2242
|
+
return true;
|
|
2243
|
+
}
|
|
2244
|
+
return false;
|
|
2245
|
+
}
|
|
2246
|
+
function isSimilarSentence(s1, s2, threshold = HALLUCINATION_THRESHOLDS.SENTENCE_SIMILARITY_THRESHOLD) {
|
|
2247
|
+
if (s1 === s2) return true;
|
|
2248
|
+
const normalized1 = s1.replace(/\s+/g, " ").trim();
|
|
2249
|
+
const normalized2 = s2.replace(/\s+/g, " ").trim();
|
|
2250
|
+
if (normalized1 === normalized2) return true;
|
|
2251
|
+
const words1 = normalized1.split(/\s+/);
|
|
2252
|
+
const words2 = normalized2.split(/\s+/);
|
|
2253
|
+
if (Math.abs(words1.length - words2.length) > 2) return false;
|
|
2254
|
+
const set1 = new Set(words1);
|
|
2255
|
+
const set2 = new Set(words2);
|
|
2256
|
+
const intersection = new Set([...set1].filter((w) => set2.has(w)));
|
|
2257
|
+
const similarity = intersection.size * 2 / (set1.size + set2.size);
|
|
2258
|
+
return similarity >= threshold;
|
|
2259
|
+
}
|
|
2260
|
+
function detectCyclicPattern(text) {
|
|
2261
|
+
const normalized = text.toLowerCase().replace(/\s+/g, " ").trim();
|
|
2262
|
+
const length = normalized.length;
|
|
2263
|
+
const minCycleLength = HALLUCINATION_THRESHOLDS.MIN_CYCLE_LENGTH;
|
|
2264
|
+
const maxCycleLength = Math.floor(length / 2);
|
|
2265
|
+
if (maxCycleLength < minCycleLength) return false;
|
|
2266
|
+
const step = 5;
|
|
2267
|
+
for (let cycleLen = minCycleLength; cycleLen <= maxCycleLength; cycleLen += step) {
|
|
2268
|
+
const pattern = normalized.substring(0, cycleLen);
|
|
2269
|
+
let matchCount = 0;
|
|
2270
|
+
let pos = 0;
|
|
2271
|
+
while (pos < length) {
|
|
2272
|
+
const segment = normalized.substring(pos, pos + cycleLen);
|
|
2273
|
+
if (segment.length < cycleLen) {
|
|
2274
|
+
const partialMatch = pattern.startsWith(segment);
|
|
2275
|
+
if (partialMatch && matchCount > 0) {
|
|
2276
|
+
matchCount++;
|
|
2277
|
+
}
|
|
2278
|
+
break;
|
|
2279
|
+
}
|
|
2280
|
+
if (segment === pattern || isSegmentSimilar(segment, pattern)) {
|
|
2281
|
+
matchCount++;
|
|
2282
|
+
pos += cycleLen;
|
|
2283
|
+
} else {
|
|
2284
|
+
break;
|
|
2285
|
+
}
|
|
2286
|
+
}
|
|
2287
|
+
if (matchCount >= HALLUCINATION_THRESHOLDS.MIN_CYCLE_REPEATS) {
|
|
2288
|
+
return true;
|
|
2289
|
+
}
|
|
2290
|
+
}
|
|
2291
|
+
return false;
|
|
2292
|
+
}
|
|
2293
|
+
function isSegmentSimilar(s1, s2) {
|
|
2294
|
+
if (s1 === s2) return true;
|
|
2295
|
+
if (s1.length !== s2.length) return false;
|
|
2296
|
+
let matches = 0;
|
|
2297
|
+
const minLength = Math.min(s1.length, s2.length);
|
|
2298
|
+
for (let i = 0; i < minLength; i++) {
|
|
2299
|
+
if (s1[i] === s2[i]) {
|
|
2300
|
+
matches++;
|
|
2301
|
+
}
|
|
2302
|
+
}
|
|
2303
|
+
const similarity = matches / minLength;
|
|
2304
|
+
return similarity >= HALLUCINATION_THRESHOLDS.SEGMENT_SIMILARITY_THRESHOLD;
|
|
2305
|
+
}
|
|
2306
|
+
function calculateEntropy(text) {
|
|
2307
|
+
if (!text || text.length === 0) {
|
|
2308
|
+
return 0;
|
|
2309
|
+
}
|
|
2310
|
+
const frequencies = /* @__PURE__ */ new Map();
|
|
2311
|
+
for (const char of text.toLowerCase()) {
|
|
2312
|
+
frequencies.set(char, (frequencies.get(char) || 0) + 1);
|
|
2313
|
+
}
|
|
2314
|
+
let entropy = 0;
|
|
2315
|
+
const length = text.length;
|
|
2316
|
+
for (const count of frequencies.values()) {
|
|
2317
|
+
const probability = count / length;
|
|
2318
|
+
entropy -= probability * Math.log2(probability);
|
|
2319
|
+
}
|
|
2320
|
+
return entropy;
|
|
2321
|
+
}
|
|
2322
|
+
function cleanHallucinatedTranscript(transcript) {
|
|
2323
|
+
var _a, _b;
|
|
2324
|
+
const result = detectTranscriptHallucinationWithDetails(transcript);
|
|
2325
|
+
if (result.isHallucination) {
|
|
2326
|
+
console.warn(
|
|
2327
|
+
"Hallucinated transcript detected and removed:",
|
|
2328
|
+
transcript.substring(0, 100),
|
|
2329
|
+
`
|
|
2330
|
+
Reason: ${(_a = result.reason) != null ? _a : "Unknown"}`,
|
|
2331
|
+
`Confidence: ${String((_b = result.confidence) != null ? _b : "Unknown")}`
|
|
2332
|
+
);
|
|
2333
|
+
return "";
|
|
2334
|
+
}
|
|
2335
|
+
return transcript;
|
|
2336
|
+
}
|
|
2337
|
+
|
|
2137
2338
|
// src/utils/ai/get-transcript.ts
|
|
2138
|
-
async function getTranscript(model, args) {
|
|
2339
|
+
async function getTranscript(model, args, cleanHallucinations = true) {
|
|
2139
2340
|
var _a, _b, _c, _d, _e, _f;
|
|
2140
2341
|
const getGeminiTranscript = (_b = (_a = api).httpsCallable) == null ? void 0 : _b.call(_a, "getGeminiTranscript");
|
|
2141
2342
|
const getAssemblyAITranscript = (_d = (_c = api).httpsCallable) == null ? void 0 : _d.call(_c, "transcribeAssemblyAIAudio");
|
|
@@ -2146,7 +2347,7 @@ async function getTranscript(model, args) {
|
|
|
2146
2347
|
audioUrl: args.audioUrl,
|
|
2147
2348
|
language: args.language
|
|
2148
2349
|
}));
|
|
2149
|
-
return data;
|
|
2350
|
+
return cleanHallucinations ? cleanHallucinatedTranscript(data) : data;
|
|
2150
2351
|
} catch (error) {
|
|
2151
2352
|
console.error("Error getting transcript from Whisper:", error);
|
|
2152
2353
|
throw error;
|
|
@@ -2159,7 +2360,7 @@ async function getTranscript(model, args) {
|
|
|
2159
2360
|
targetLanguage: args.language,
|
|
2160
2361
|
prompt: args.prompt
|
|
2161
2362
|
}));
|
|
2162
|
-
return data.transcript;
|
|
2363
|
+
return cleanHallucinations ? cleanHallucinatedTranscript(data.transcript) : data.transcript;
|
|
2163
2364
|
} catch (error) {
|
|
2164
2365
|
console.error("Error getting transcript from Gemini:", error);
|
|
2165
2366
|
throw error;
|
|
@@ -2171,7 +2372,7 @@ async function getTranscript(model, args) {
|
|
|
2171
2372
|
audioUrl: args.audioUrl,
|
|
2172
2373
|
language: args.language
|
|
2173
2374
|
}));
|
|
2174
|
-
return response.data;
|
|
2375
|
+
return cleanHallucinations ? cleanHallucinatedTranscript(response.data) : response.data;
|
|
2175
2376
|
} catch (error) {
|
|
2176
2377
|
console.error("Error getting transcript from AssemblyAI:", error);
|
|
2177
2378
|
throw error;
|
|
@@ -2179,6 +2380,37 @@ async function getTranscript(model, args) {
|
|
|
2179
2380
|
}
|
|
2180
2381
|
return null;
|
|
2181
2382
|
}
|
|
2383
|
+
async function getTranscriptCycle(args) {
|
|
2384
|
+
const models = ["whisper", "gemini", "assemblyai"];
|
|
2385
|
+
let transcript = "";
|
|
2386
|
+
let lastError = null;
|
|
2387
|
+
for (const model of models) {
|
|
2388
|
+
try {
|
|
2389
|
+
const transcriptResult = await getTranscript(model, args, false);
|
|
2390
|
+
const rawTranscript = transcriptResult || "";
|
|
2391
|
+
transcript = cleanHallucinatedTranscript(rawTranscript);
|
|
2392
|
+
if (transcript !== "") {
|
|
2393
|
+
console.log(`Successfully got transcript from ${model}`);
|
|
2394
|
+
break;
|
|
2395
|
+
}
|
|
2396
|
+
console.warn(`${model} returned empty transcript, trying next model`);
|
|
2397
|
+
} catch (e) {
|
|
2398
|
+
console.error(`Error with ${model} transcript:`, e);
|
|
2399
|
+
lastError = e;
|
|
2400
|
+
}
|
|
2401
|
+
}
|
|
2402
|
+
if (transcript === "") {
|
|
2403
|
+
console.error("All transcript models failed or returned empty", lastError);
|
|
2404
|
+
return {
|
|
2405
|
+
transcript: "",
|
|
2406
|
+
success: false
|
|
2407
|
+
};
|
|
2408
|
+
}
|
|
2409
|
+
return {
|
|
2410
|
+
transcript,
|
|
2411
|
+
success: true
|
|
2412
|
+
};
|
|
2413
|
+
}
|
|
2182
2414
|
|
|
2183
2415
|
// src/constants/all-langs.json
|
|
2184
2416
|
var all_langs_default = {
|
|
@@ -2925,6 +3157,17 @@ function useSpeakableTranscript() {
|
|
|
2925
3157
|
mutation
|
|
2926
3158
|
};
|
|
2927
3159
|
}
|
|
3160
|
+
function useSpeakableTranscriptCycle() {
|
|
3161
|
+
const mutation = useMutation3({
|
|
3162
|
+
mutationFn: async (args) => {
|
|
3163
|
+
return getTranscriptCycle(args);
|
|
3164
|
+
},
|
|
3165
|
+
retry: false
|
|
3166
|
+
});
|
|
3167
|
+
return {
|
|
3168
|
+
mutationTranscriptCycle: mutation
|
|
3169
|
+
};
|
|
3170
|
+
}
|
|
2928
3171
|
|
|
2929
3172
|
// src/hooks/useUpdateStudentVoc.ts
|
|
2930
3173
|
var useUpdateStudentVocab = (page) => {
|
|
@@ -3478,6 +3721,7 @@ export {
|
|
|
3478
3721
|
getSetFromCache,
|
|
3479
3722
|
getTotalCompletedCards,
|
|
3480
3723
|
getTranscript,
|
|
3724
|
+
getTranscriptCycle,
|
|
3481
3725
|
getWordHash,
|
|
3482
3726
|
purify,
|
|
3483
3727
|
refsCardsFiresotre,
|
|
@@ -3502,6 +3746,7 @@ export {
|
|
|
3502
3746
|
useSet,
|
|
3503
3747
|
useSpeakableApi,
|
|
3504
3748
|
useSpeakableTranscript,
|
|
3749
|
+
useSpeakableTranscriptCycle,
|
|
3505
3750
|
useSubmitAssignmentScore,
|
|
3506
3751
|
useSubmitPracticeScore,
|
|
3507
3752
|
useUpdateCardScore,
|