@speakableio/core 1.0.64 → 1.0.66

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1690,7 +1690,8 @@ declare function SpeakableProvider({ user, children, queryClient, permissions, f
1690
1690
  }): react_jsx_runtime.JSX.Element | null;
1691
1691
  declare function useSpeakableApi(): FsContext;
1692
1692
 
1693
- declare function getTranscript(model: 'gemini' | 'assemblyai' | 'whisper' | 'whisper-3', args: {
1693
+ type TranscriptModel = 'gemini' | 'assemblyai' | 'whisper' | 'whisper-3' | 'gpt-4o';
1694
+ declare function getTranscript(model: TranscriptModel, args: {
1694
1695
  language: string;
1695
1696
  audioUrl: string;
1696
1697
  prompt?: string;
@@ -1699,6 +1700,10 @@ declare function getTranscriptCycle(args: {
1699
1700
  audioUrl: string;
1700
1701
  language: string;
1701
1702
  prompt?: string;
1703
+ cleanHallucinations?: boolean;
1704
+ options?: {
1705
+ modelOrder?: TranscriptModel[];
1706
+ };
1702
1707
  }): Promise<{
1703
1708
  transcript: string;
1704
1709
  success: boolean;
@@ -2967,10 +2972,11 @@ declare const useOrganizationAccess: () => {
2967
2972
 
2968
2973
  declare function useSpeakableTranscript(): {
2969
2974
  mutation: _tanstack_react_query.UseMutationResult<string | null, Error, {
2970
- model: "gemini" | "assemblyai" | "whisper" | "whisper-3";
2975
+ model: TranscriptModel;
2971
2976
  audioUrl: string;
2972
2977
  language: string;
2973
2978
  prompt?: string;
2979
+ cleanHallucinations?: boolean;
2974
2980
  }, unknown>;
2975
2981
  };
2976
2982
  declare function useSpeakableTranscriptCycle(): {
@@ -2981,6 +2987,10 @@ declare function useSpeakableTranscriptCycle(): {
2981
2987
  audioUrl: string;
2982
2988
  language: string;
2983
2989
  prompt: string;
2990
+ cleanHallucinations?: boolean;
2991
+ options?: {
2992
+ modelOrder?: TranscriptModel[];
2993
+ };
2984
2994
  }, unknown>;
2985
2995
  };
2986
2996
 
@@ -3563,4 +3573,4 @@ declare const createFsClientNative: ({ db, httpsCallable, logEvent }: FsClientPa
3563
3573
  };
3564
3574
  };
3565
3575
 
3566
- export { ActivityPageType, type Assignment, type AssignmentAnalyticsType$1 as AssignmentAnalyticsType, type AssignmentWithId, BASE_MULTIPLE_CHOICE_FIELD_VALUES, BASE_REPEAT_FIELD_VALUES, BASE_RESPOND_FIELD_VALUES, CONVERSATION_PAGE_ACTIVITY_TYPES, ConversationPageMode, type CreditContract, FeedbackTypesCard, FsCtx, type InstitutionSubscription, LENIENCY_OPTIONS, LeniencyCard, MULTIPLE_CHOICE_PAGE_ACTIVITY_TYPES, type MediaPageActivity, type Organization, type OrganizationAccess, type PageActivity, type PageActivityWithId, type PageScore, REPEAT_PAGE_ACTIVITY_TYPES, RESPOND_AUDIO_PAGE_ACTIVITY_TYPES, RESPOND_PAGE_ACTIVITY_TYPES, RESPOND_WRITE_PAGE_ACTIVITY_TYPES, type RefsCardsFiresotre, type RefsSetsFirestore, SPEAKABLE_ANALYTICS, SPEAKABLE_NOTIFICATIONS, STUDENT_LEVELS_OPTIONS, type Score, type ScoreWithId, type Set, type SetWithId, type SpeakableNotificationType, SpeakableNotificationTypes, SpeakableProvider, VerificationCardStatus, assignmentQueryKeys, cardsQueryKeys, checkIsConversationPage, checkIsMCPage, checkIsMediaPage, checkIsRepeatPage, checkIsRespondAudioPage, checkIsRespondPage, checkIsRespondWrittenPage, checkIsShortAnswerPage, checkTypePageActivity, cleanString, createAssignmentRepo, createCardRepo, createFsClientNative as createFsClient, createSetRepo, creditQueryKeys, debounce, getCardFromCache, getLabelPage, getPageMediaData, getPagePrompt, getPhraseLength, getRespondCardTool, getSetFromCache, getSingleMediaPageData, getTotalCompletedCards, getTranscript, getTranscriptCycle, getWordHash, purify, refsCardsFiresotre, refsSetsFirestore, scoreQueryKeys, setsQueryKeys, updateCardInCache, updateSetInCache, useActivity, useActivityFeedbackAccess, useAssignment, useBaseOpenAI, useCards, useClearScore, useClearScoreV2, useCreateCard, useCreateCards, useCreateNotification, useGetCard, useOrganizationAccess, useScore, useSet, useSpeakableApi, useSpeakableTranscript, useSpeakableTranscriptCycle, useSubmitAssignmentScore, useSubmitPracticeScore, useUpdateCardScore, useUpdateScore, useUpdateStudentVocab, useUserCredits };
3576
+ export { ActivityPageType, type Assignment, type AssignmentAnalyticsType$1 as AssignmentAnalyticsType, type AssignmentWithId, BASE_MULTIPLE_CHOICE_FIELD_VALUES, BASE_REPEAT_FIELD_VALUES, BASE_RESPOND_FIELD_VALUES, CONVERSATION_PAGE_ACTIVITY_TYPES, ConversationPageMode, type CreditContract, FeedbackTypesCard, FsCtx, type InstitutionSubscription, LENIENCY_OPTIONS, LeniencyCard, MULTIPLE_CHOICE_PAGE_ACTIVITY_TYPES, type MediaPageActivity, type Organization, type OrganizationAccess, type PageActivity, type PageActivityWithId, type PageScore, REPEAT_PAGE_ACTIVITY_TYPES, RESPOND_AUDIO_PAGE_ACTIVITY_TYPES, RESPOND_PAGE_ACTIVITY_TYPES, RESPOND_WRITE_PAGE_ACTIVITY_TYPES, type RefsCardsFiresotre, type RefsSetsFirestore, SPEAKABLE_ANALYTICS, SPEAKABLE_NOTIFICATIONS, STUDENT_LEVELS_OPTIONS, type Score, type ScoreWithId, type Set, type SetWithId, type SpeakableNotificationType, SpeakableNotificationTypes, SpeakableProvider, type TranscriptModel, VerificationCardStatus, assignmentQueryKeys, cardsQueryKeys, checkIsConversationPage, checkIsMCPage, checkIsMediaPage, checkIsRepeatPage, checkIsRespondAudioPage, checkIsRespondPage, checkIsRespondWrittenPage, checkIsShortAnswerPage, checkTypePageActivity, cleanString, createAssignmentRepo, createCardRepo, createFsClientNative as createFsClient, createSetRepo, creditQueryKeys, debounce, getCardFromCache, getLabelPage, getPageMediaData, getPagePrompt, getPhraseLength, getRespondCardTool, getSetFromCache, getSingleMediaPageData, getTotalCompletedCards, getTranscript, getTranscriptCycle, getWordHash, purify, refsCardsFiresotre, refsSetsFirestore, scoreQueryKeys, setsQueryKeys, updateCardInCache, updateSetInCache, useActivity, useActivityFeedbackAccess, useAssignment, useBaseOpenAI, useCards, useClearScore, useClearScoreV2, useCreateCard, useCreateCards, useCreateNotification, useGetCard, useOrganizationAccess, useScore, useSet, useSpeakableApi, useSpeakableTranscript, useSpeakableTranscriptCycle, useSubmitAssignmentScore, useSubmitPracticeScore, useUpdateCardScore, useUpdateScore, useUpdateStudentVocab, useUserCredits };
@@ -1690,7 +1690,8 @@ declare function SpeakableProvider({ user, children, queryClient, permissions, f
1690
1690
  }): react_jsx_runtime.JSX.Element | null;
1691
1691
  declare function useSpeakableApi(): FsContext;
1692
1692
 
1693
- declare function getTranscript(model: 'gemini' | 'assemblyai' | 'whisper' | 'whisper-3', args: {
1693
+ type TranscriptModel = 'gemini' | 'assemblyai' | 'whisper' | 'whisper-3' | 'gpt-4o';
1694
+ declare function getTranscript(model: TranscriptModel, args: {
1694
1695
  language: string;
1695
1696
  audioUrl: string;
1696
1697
  prompt?: string;
@@ -1699,6 +1700,10 @@ declare function getTranscriptCycle(args: {
1699
1700
  audioUrl: string;
1700
1701
  language: string;
1701
1702
  prompt?: string;
1703
+ cleanHallucinations?: boolean;
1704
+ options?: {
1705
+ modelOrder?: TranscriptModel[];
1706
+ };
1702
1707
  }): Promise<{
1703
1708
  transcript: string;
1704
1709
  success: boolean;
@@ -2967,10 +2972,11 @@ declare const useOrganizationAccess: () => {
2967
2972
 
2968
2973
  declare function useSpeakableTranscript(): {
2969
2974
  mutation: _tanstack_react_query.UseMutationResult<string | null, Error, {
2970
- model: "gemini" | "assemblyai" | "whisper" | "whisper-3";
2975
+ model: TranscriptModel;
2971
2976
  audioUrl: string;
2972
2977
  language: string;
2973
2978
  prompt?: string;
2979
+ cleanHallucinations?: boolean;
2974
2980
  }, unknown>;
2975
2981
  };
2976
2982
  declare function useSpeakableTranscriptCycle(): {
@@ -2981,6 +2987,10 @@ declare function useSpeakableTranscriptCycle(): {
2981
2987
  audioUrl: string;
2982
2988
  language: string;
2983
2989
  prompt: string;
2990
+ cleanHallucinations?: boolean;
2991
+ options?: {
2992
+ modelOrder?: TranscriptModel[];
2993
+ };
2984
2994
  }, unknown>;
2985
2995
  };
2986
2996
 
@@ -3563,4 +3573,4 @@ declare const createFsClientNative: ({ db, httpsCallable, logEvent }: FsClientPa
3563
3573
  };
3564
3574
  };
3565
3575
 
3566
- export { ActivityPageType, type Assignment, type AssignmentAnalyticsType$1 as AssignmentAnalyticsType, type AssignmentWithId, BASE_MULTIPLE_CHOICE_FIELD_VALUES, BASE_REPEAT_FIELD_VALUES, BASE_RESPOND_FIELD_VALUES, CONVERSATION_PAGE_ACTIVITY_TYPES, ConversationPageMode, type CreditContract, FeedbackTypesCard, FsCtx, type InstitutionSubscription, LENIENCY_OPTIONS, LeniencyCard, MULTIPLE_CHOICE_PAGE_ACTIVITY_TYPES, type MediaPageActivity, type Organization, type OrganizationAccess, type PageActivity, type PageActivityWithId, type PageScore, REPEAT_PAGE_ACTIVITY_TYPES, RESPOND_AUDIO_PAGE_ACTIVITY_TYPES, RESPOND_PAGE_ACTIVITY_TYPES, RESPOND_WRITE_PAGE_ACTIVITY_TYPES, type RefsCardsFiresotre, type RefsSetsFirestore, SPEAKABLE_ANALYTICS, SPEAKABLE_NOTIFICATIONS, STUDENT_LEVELS_OPTIONS, type Score, type ScoreWithId, type Set, type SetWithId, type SpeakableNotificationType, SpeakableNotificationTypes, SpeakableProvider, VerificationCardStatus, assignmentQueryKeys, cardsQueryKeys, checkIsConversationPage, checkIsMCPage, checkIsMediaPage, checkIsRepeatPage, checkIsRespondAudioPage, checkIsRespondPage, checkIsRespondWrittenPage, checkIsShortAnswerPage, checkTypePageActivity, cleanString, createAssignmentRepo, createCardRepo, createFsClientNative as createFsClient, createSetRepo, creditQueryKeys, debounce, getCardFromCache, getLabelPage, getPageMediaData, getPagePrompt, getPhraseLength, getRespondCardTool, getSetFromCache, getSingleMediaPageData, getTotalCompletedCards, getTranscript, getTranscriptCycle, getWordHash, purify, refsCardsFiresotre, refsSetsFirestore, scoreQueryKeys, setsQueryKeys, updateCardInCache, updateSetInCache, useActivity, useActivityFeedbackAccess, useAssignment, useBaseOpenAI, useCards, useClearScore, useClearScoreV2, useCreateCard, useCreateCards, useCreateNotification, useGetCard, useOrganizationAccess, useScore, useSet, useSpeakableApi, useSpeakableTranscript, useSpeakableTranscriptCycle, useSubmitAssignmentScore, useSubmitPracticeScore, useUpdateCardScore, useUpdateScore, useUpdateStudentVocab, useUserCredits };
3576
+ export { ActivityPageType, type Assignment, type AssignmentAnalyticsType$1 as AssignmentAnalyticsType, type AssignmentWithId, BASE_MULTIPLE_CHOICE_FIELD_VALUES, BASE_REPEAT_FIELD_VALUES, BASE_RESPOND_FIELD_VALUES, CONVERSATION_PAGE_ACTIVITY_TYPES, ConversationPageMode, type CreditContract, FeedbackTypesCard, FsCtx, type InstitutionSubscription, LENIENCY_OPTIONS, LeniencyCard, MULTIPLE_CHOICE_PAGE_ACTIVITY_TYPES, type MediaPageActivity, type Organization, type OrganizationAccess, type PageActivity, type PageActivityWithId, type PageScore, REPEAT_PAGE_ACTIVITY_TYPES, RESPOND_AUDIO_PAGE_ACTIVITY_TYPES, RESPOND_PAGE_ACTIVITY_TYPES, RESPOND_WRITE_PAGE_ACTIVITY_TYPES, type RefsCardsFiresotre, type RefsSetsFirestore, SPEAKABLE_ANALYTICS, SPEAKABLE_NOTIFICATIONS, STUDENT_LEVELS_OPTIONS, type Score, type ScoreWithId, type Set, type SetWithId, type SpeakableNotificationType, SpeakableNotificationTypes, SpeakableProvider, type TranscriptModel, VerificationCardStatus, assignmentQueryKeys, cardsQueryKeys, checkIsConversationPage, checkIsMCPage, checkIsMediaPage, checkIsRepeatPage, checkIsRespondAudioPage, checkIsRespondPage, checkIsRespondWrittenPage, checkIsShortAnswerPage, checkTypePageActivity, cleanString, createAssignmentRepo, createCardRepo, createFsClientNative as createFsClient, createSetRepo, creditQueryKeys, debounce, getCardFromCache, getLabelPage, getPageMediaData, getPagePrompt, getPhraseLength, getRespondCardTool, getSetFromCache, getSingleMediaPageData, getTotalCompletedCards, getTranscript, getTranscriptCycle, getWordHash, purify, refsCardsFiresotre, refsSetsFirestore, scoreQueryKeys, setsQueryKeys, updateCardInCache, updateSetInCache, useActivity, useActivityFeedbackAccess, useAssignment, useBaseOpenAI, useCards, useClearScore, useClearScoreV2, useCreateCard, useCreateCards, useCreateNotification, useGetCard, useOrganizationAccess, useScore, useSet, useSpeakableApi, useSpeakableTranscript, useSpeakableTranscriptCycle, useSubmitAssignmentScore, useSubmitPracticeScore, useUpdateCardScore, useUpdateScore, useUpdateStudentVocab, useUserCredits };
@@ -2333,6 +2333,10 @@ function detectTranscriptHallucination(transcript) {
2333
2333
  return false;
2334
2334
  }
2335
2335
  const text = transcript.trim();
2336
+ const wordCount = text.split(/\s+/).filter(Boolean).length;
2337
+ if (text.length < 120 || wordCount < 20) {
2338
+ return false;
2339
+ }
2336
2340
  const shortRepeats = detectShortRepeats(text);
2337
2341
  if (shortRepeats) return true;
2338
2342
  const phraseRepeats = detectPhraseRepeats(text);
@@ -2348,14 +2352,14 @@ function detectShortRepeats(text) {
2348
2352
  for (let i = 1; i < words.length; i++) {
2349
2353
  if (words[i] === words[i - 1]) {
2350
2354
  repeatCount++;
2351
- if (repeatCount >= 3) return true;
2355
+ if (repeatCount >= 4) return true;
2352
2356
  } else {
2353
2357
  repeatCount = 1;
2354
2358
  }
2355
2359
  }
2356
2360
  const uniqueWords = new Set(words);
2357
2361
  const repetitionRatio = words.length / uniqueWords.size;
2358
- if (words.length >= 10 && uniqueWords.size <= 3 && repetitionRatio >= 3) {
2362
+ if (words.length >= 12 && uniqueWords.size <= 2 && repetitionRatio >= 5) {
2359
2363
  return true;
2360
2364
  }
2361
2365
  return false;
@@ -2372,12 +2376,12 @@ function detectPhraseRepeats(text) {
2372
2376
  break;
2373
2377
  }
2374
2378
  }
2375
- if (consecutiveRepeats >= 2) {
2379
+ if (consecutiveRepeats >= 3) {
2376
2380
  return true;
2377
2381
  }
2378
2382
  }
2379
2383
  const uniqueSentences = new Set(sentences);
2380
- if (sentences.length >= 3 && uniqueSentences.size === 1) {
2384
+ if (sentences.length >= 4 && uniqueSentences.size === 1) {
2381
2385
  return true;
2382
2386
  }
2383
2387
  return false;
@@ -2394,7 +2398,7 @@ function isSimilarSentence(s1, s2) {
2394
2398
  const set2 = new Set(words2);
2395
2399
  const intersection = new Set([...set1].filter((w) => set2.has(w)));
2396
2400
  const similarity = intersection.size * 2 / (set1.size + set2.size);
2397
- return similarity >= 0.8;
2401
+ return similarity >= 0.9;
2398
2402
  }
2399
2403
  function detectCyclicPattern(text) {
2400
2404
  const normalized = text.toLowerCase().replace(/\s+/g, " ").trim();
@@ -2414,7 +2418,7 @@ function detectCyclicPattern(text) {
2414
2418
  break;
2415
2419
  }
2416
2420
  }
2417
- if (matchCount >= 3) {
2421
+ if (matchCount >= 4) {
2418
2422
  return true;
2419
2423
  }
2420
2424
  }
@@ -2430,12 +2434,26 @@ function cleanHallucinatedTranscript(transcript) {
2430
2434
 
2431
2435
  // src/utils/ai/get-transcript.ts
2432
2436
  async function getTranscript(model, args, cleanHallucinations = true) {
2433
- var _a, _b, _c, _d, _e, _f, _g, _h, _i;
2437
+ var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l;
2434
2438
  const getGeminiTranscript = (_b = (_a = api).httpsCallable) == null ? void 0 : _b.call(_a, "getGeminiTranscript");
2435
2439
  const getAssemblyAITranscript = (_d = (_c = api).httpsCallable) == null ? void 0 : _d.call(_c, "transcribeAssemblyAIAudio");
2436
2440
  const getWhisper3Transcript = (_f = (_e = api).httpsCallable) == null ? void 0 : _f.call(_e, "generateGroqTranscript");
2437
2441
  const getWhisper1Transcript = (_h = (_g = api).httpsCallable) == null ? void 0 : _h.call(_g, "transcribeAudio");
2438
- console.log("Getting transcript from", model);
2442
+ const getGPT4oTranscript = (_j = (_i = api).httpsCallable) == null ? void 0 : _j.call(_i, "generateGpt4oTranscript");
2443
+ console.log("Getting transcript from", model, " cleanHallucinations", cleanHallucinations);
2444
+ if (model === "gpt-4o") {
2445
+ try {
2446
+ const { data } = await (getGPT4oTranscript == null ? void 0 : getGPT4oTranscript({
2447
+ audioUrl: args.audioUrl,
2448
+ language: args.language,
2449
+ teacherPrompt: (_k = args.prompt) != null ? _k : ""
2450
+ }));
2451
+ return data;
2452
+ } catch (error) {
2453
+ console.error("Error getting transcript from GPT-4o:", error);
2454
+ throw error;
2455
+ }
2456
+ }
2439
2457
  if (model === "whisper-3") {
2440
2458
  try {
2441
2459
  const { data } = await (getWhisper3Transcript == null ? void 0 : getWhisper3Transcript({
@@ -2465,7 +2483,7 @@ async function getTranscript(model, args, cleanHallucinations = true) {
2465
2483
  const { data } = await (getGeminiTranscript == null ? void 0 : getGeminiTranscript({
2466
2484
  audioUrl: args.audioUrl,
2467
2485
  targetLanguage: args.language,
2468
- prompt: (_i = args.prompt) != null ? _i : ""
2486
+ prompt: (_l = args.prompt) != null ? _l : ""
2469
2487
  }));
2470
2488
  return cleanHallucinations ? cleanHallucinatedTranscript(data.transcript) : data.transcript;
2471
2489
  } catch (error) {
@@ -2488,15 +2506,27 @@ async function getTranscript(model, args, cleanHallucinations = true) {
2488
2506
  return null;
2489
2507
  }
2490
2508
  async function getTranscriptCycle(args) {
2491
- const models = ["whisper-3", "whisper", "gemini", "assemblyai"];
2509
+ var _a, _b, _c;
2510
+ const models = (_b = (_a = args.options) == null ? void 0 : _a.modelOrder) != null ? _b : [
2511
+ "gpt-4o",
2512
+ "whisper",
2513
+ "whisper-3",
2514
+ "gemini",
2515
+ "assemblyai"
2516
+ ];
2492
2517
  let transcript = "";
2493
2518
  let lastError = null;
2494
2519
  for (const model of models) {
2495
2520
  try {
2496
- console.log("Getting transcript from", model);
2497
- const transcriptResult = await getTranscript(model, args, false);
2521
+ console.log(
2522
+ "Getting transcript from",
2523
+ model,
2524
+ " cleanHallucinations",
2525
+ args.cleanHallucinations
2526
+ );
2527
+ const transcriptResult = await getTranscript(model, args, (_c = args.cleanHallucinations) != null ? _c : true);
2498
2528
  const rawTranscript = transcriptResult || "";
2499
- transcript = cleanHallucinatedTranscript(rawTranscript);
2529
+ transcript = rawTranscript;
2500
2530
  if (transcript !== "") {
2501
2531
  console.log(`Successfully got transcript from ${model}`);
2502
2532
  break;
@@ -3253,9 +3283,10 @@ function useSpeakableTranscript() {
3253
3283
  model,
3254
3284
  audioUrl,
3255
3285
  language,
3256
- prompt
3286
+ prompt,
3287
+ cleanHallucinations = true
3257
3288
  }) => {
3258
- return getTranscript(model, { audioUrl, language, prompt });
3289
+ return getTranscript(model, { audioUrl, language, prompt }, cleanHallucinations);
3259
3290
  },
3260
3291
  retry: false
3261
3292
  });
@@ -3266,7 +3297,11 @@ function useSpeakableTranscript() {
3266
3297
  function useSpeakableTranscriptCycle() {
3267
3298
  const mutation = (0, import_react_query7.useMutation)({
3268
3299
  mutationFn: async (args) => {
3269
- return getTranscriptCycle(args);
3300
+ return getTranscriptCycle({
3301
+ ...args,
3302
+ cleanHallucinations: args.cleanHallucinations,
3303
+ options: args.options
3304
+ });
3270
3305
  },
3271
3306
  retry: false
3272
3307
  });