@speakableio/core 1.0.64 → 1.0.66

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1690,7 +1690,8 @@ declare function SpeakableProvider({ user, children, queryClient, permissions, f
1690
1690
  }): react_jsx_runtime.JSX.Element | null;
1691
1691
  declare function useSpeakableApi(): FsContext;
1692
1692
 
1693
- declare function getTranscript(model: 'gemini' | 'assemblyai' | 'whisper' | 'whisper-3', args: {
1693
+ type TranscriptModel = 'gemini' | 'assemblyai' | 'whisper' | 'whisper-3' | 'gpt-4o';
1694
+ declare function getTranscript(model: TranscriptModel, args: {
1694
1695
  language: string;
1695
1696
  audioUrl: string;
1696
1697
  prompt?: string;
@@ -1699,6 +1700,10 @@ declare function getTranscriptCycle(args: {
1699
1700
  audioUrl: string;
1700
1701
  language: string;
1701
1702
  prompt?: string;
1703
+ cleanHallucinations?: boolean;
1704
+ options?: {
1705
+ modelOrder?: TranscriptModel[];
1706
+ };
1702
1707
  }): Promise<{
1703
1708
  transcript: string;
1704
1709
  success: boolean;
@@ -2967,10 +2972,11 @@ declare const useOrganizationAccess: () => {
2967
2972
 
2968
2973
  declare function useSpeakableTranscript(): {
2969
2974
  mutation: _tanstack_react_query.UseMutationResult<string | null, Error, {
2970
- model: "gemini" | "assemblyai" | "whisper" | "whisper-3";
2975
+ model: TranscriptModel;
2971
2976
  audioUrl: string;
2972
2977
  language: string;
2973
2978
  prompt?: string;
2979
+ cleanHallucinations?: boolean;
2974
2980
  }, unknown>;
2975
2981
  };
2976
2982
  declare function useSpeakableTranscriptCycle(): {
@@ -2981,6 +2987,10 @@ declare function useSpeakableTranscriptCycle(): {
2981
2987
  audioUrl: string;
2982
2988
  language: string;
2983
2989
  prompt: string;
2990
+ cleanHallucinations?: boolean;
2991
+ options?: {
2992
+ modelOrder?: TranscriptModel[];
2993
+ };
2984
2994
  }, unknown>;
2985
2995
  };
2986
2996
 
@@ -3563,4 +3573,4 @@ declare const createFsClientWeb: ({ db, httpsCallable, logEvent }: FsClientParam
3563
3573
  };
3564
3574
  };
3565
3575
 
3566
- export { ActivityPageType, type Assignment, type AssignmentAnalyticsType$1 as AssignmentAnalyticsType, type AssignmentWithId, BASE_MULTIPLE_CHOICE_FIELD_VALUES, BASE_REPEAT_FIELD_VALUES, BASE_RESPOND_FIELD_VALUES, CONVERSATION_PAGE_ACTIVITY_TYPES, ConversationPageMode, type CreditContract, FeedbackTypesCard, FsCtx, type InstitutionSubscription, LENIENCY_OPTIONS, LeniencyCard, MULTIPLE_CHOICE_PAGE_ACTIVITY_TYPES, type MediaPageActivity, type Organization, type OrganizationAccess, type PageActivity, type PageActivityWithId, type PageScore, REPEAT_PAGE_ACTIVITY_TYPES, RESPOND_AUDIO_PAGE_ACTIVITY_TYPES, RESPOND_PAGE_ACTIVITY_TYPES, RESPOND_WRITE_PAGE_ACTIVITY_TYPES, type RefsCardsFiresotre, type RefsSetsFirestore, SPEAKABLE_ANALYTICS, SPEAKABLE_NOTIFICATIONS, STUDENT_LEVELS_OPTIONS, type Score, type ScoreWithId, type Set, type SetWithId, type SpeakableNotificationType, SpeakableNotificationTypes, SpeakableProvider, VerificationCardStatus, assignmentQueryKeys, cardsQueryKeys, checkIsConversationPage, checkIsMCPage, checkIsMediaPage, checkIsRepeatPage, checkIsRespondAudioPage, checkIsRespondPage, checkIsRespondWrittenPage, checkIsShortAnswerPage, checkTypePageActivity, cleanString, createAssignmentRepo, createCardRepo, createFsClientWeb as createFsClient, createSetRepo, creditQueryKeys, debounce, getCardFromCache, getLabelPage, getPageMediaData, getPagePrompt, getPhraseLength, getRespondCardTool, getSetFromCache, getSingleMediaPageData, getTotalCompletedCards, getTranscript, getTranscriptCycle, getWordHash, purify, refsCardsFiresotre, refsSetsFirestore, scoreQueryKeys, setsQueryKeys, updateCardInCache, updateSetInCache, useActivity, useActivityFeedbackAccess, useAssignment, useBaseOpenAI, useCards, useClearScore, useClearScoreV2, useCreateCard, useCreateCards, useCreateNotification, useGetCard, useOrganizationAccess, useScore, useSet, useSpeakableApi, useSpeakableTranscript, useSpeakableTranscriptCycle, useSubmitAssignmentScore, useSubmitPracticeScore, useUpdateCardScore, useUpdateScore, useUpdateStudentVocab, useUserCredits };
3576
+ export { ActivityPageType, type Assignment, type AssignmentAnalyticsType$1 as AssignmentAnalyticsType, type AssignmentWithId, BASE_MULTIPLE_CHOICE_FIELD_VALUES, BASE_REPEAT_FIELD_VALUES, BASE_RESPOND_FIELD_VALUES, CONVERSATION_PAGE_ACTIVITY_TYPES, ConversationPageMode, type CreditContract, FeedbackTypesCard, FsCtx, type InstitutionSubscription, LENIENCY_OPTIONS, LeniencyCard, MULTIPLE_CHOICE_PAGE_ACTIVITY_TYPES, type MediaPageActivity, type Organization, type OrganizationAccess, type PageActivity, type PageActivityWithId, type PageScore, REPEAT_PAGE_ACTIVITY_TYPES, RESPOND_AUDIO_PAGE_ACTIVITY_TYPES, RESPOND_PAGE_ACTIVITY_TYPES, RESPOND_WRITE_PAGE_ACTIVITY_TYPES, type RefsCardsFiresotre, type RefsSetsFirestore, SPEAKABLE_ANALYTICS, SPEAKABLE_NOTIFICATIONS, STUDENT_LEVELS_OPTIONS, type Score, type ScoreWithId, type Set, type SetWithId, type SpeakableNotificationType, SpeakableNotificationTypes, SpeakableProvider, type TranscriptModel, VerificationCardStatus, assignmentQueryKeys, cardsQueryKeys, checkIsConversationPage, checkIsMCPage, checkIsMediaPage, checkIsRepeatPage, checkIsRespondAudioPage, checkIsRespondPage, checkIsRespondWrittenPage, checkIsShortAnswerPage, checkTypePageActivity, cleanString, createAssignmentRepo, createCardRepo, createFsClientWeb as createFsClient, createSetRepo, creditQueryKeys, debounce, getCardFromCache, getLabelPage, getPageMediaData, getPagePrompt, getPhraseLength, getRespondCardTool, getSetFromCache, getSingleMediaPageData, getTotalCompletedCards, getTranscript, getTranscriptCycle, getWordHash, purify, refsCardsFiresotre, refsSetsFirestore, scoreQueryKeys, setsQueryKeys, updateCardInCache, updateSetInCache, useActivity, useActivityFeedbackAccess, useAssignment, useBaseOpenAI, useCards, useClearScore, useClearScoreV2, useCreateCard, useCreateCards, useCreateNotification, useGetCard, useOrganizationAccess, useScore, useSet, useSpeakableApi, useSpeakableTranscript, useSpeakableTranscriptCycle, useSubmitAssignmentScore, useSubmitPracticeScore, useUpdateCardScore, useUpdateScore, useUpdateStudentVocab, useUserCredits };
package/dist/index.web.js CHANGED
@@ -2217,6 +2217,10 @@ function detectTranscriptHallucination(transcript) {
2217
2217
  return false;
2218
2218
  }
2219
2219
  const text = transcript.trim();
2220
+ const wordCount = text.split(/\s+/).filter(Boolean).length;
2221
+ if (text.length < 120 || wordCount < 20) {
2222
+ return false;
2223
+ }
2220
2224
  const shortRepeats = detectShortRepeats(text);
2221
2225
  if (shortRepeats) return true;
2222
2226
  const phraseRepeats = detectPhraseRepeats(text);
@@ -2232,14 +2236,14 @@ function detectShortRepeats(text) {
2232
2236
  for (let i = 1; i < words.length; i++) {
2233
2237
  if (words[i] === words[i - 1]) {
2234
2238
  repeatCount++;
2235
- if (repeatCount >= 3) return true;
2239
+ if (repeatCount >= 4) return true;
2236
2240
  } else {
2237
2241
  repeatCount = 1;
2238
2242
  }
2239
2243
  }
2240
2244
  const uniqueWords = new Set(words);
2241
2245
  const repetitionRatio = words.length / uniqueWords.size;
2242
- if (words.length >= 10 && uniqueWords.size <= 3 && repetitionRatio >= 3) {
2246
+ if (words.length >= 12 && uniqueWords.size <= 2 && repetitionRatio >= 5) {
2243
2247
  return true;
2244
2248
  }
2245
2249
  return false;
@@ -2256,12 +2260,12 @@ function detectPhraseRepeats(text) {
2256
2260
  break;
2257
2261
  }
2258
2262
  }
2259
- if (consecutiveRepeats >= 2) {
2263
+ if (consecutiveRepeats >= 3) {
2260
2264
  return true;
2261
2265
  }
2262
2266
  }
2263
2267
  const uniqueSentences = new Set(sentences);
2264
- if (sentences.length >= 3 && uniqueSentences.size === 1) {
2268
+ if (sentences.length >= 4 && uniqueSentences.size === 1) {
2265
2269
  return true;
2266
2270
  }
2267
2271
  return false;
@@ -2278,7 +2282,7 @@ function isSimilarSentence(s1, s2) {
2278
2282
  const set2 = new Set(words2);
2279
2283
  const intersection = new Set([...set1].filter((w) => set2.has(w)));
2280
2284
  const similarity = intersection.size * 2 / (set1.size + set2.size);
2281
- return similarity >= 0.8;
2285
+ return similarity >= 0.9;
2282
2286
  }
2283
2287
  function detectCyclicPattern(text) {
2284
2288
  const normalized = text.toLowerCase().replace(/\s+/g, " ").trim();
@@ -2298,7 +2302,7 @@ function detectCyclicPattern(text) {
2298
2302
  break;
2299
2303
  }
2300
2304
  }
2301
- if (matchCount >= 3) {
2305
+ if (matchCount >= 4) {
2302
2306
  return true;
2303
2307
  }
2304
2308
  }
@@ -2314,12 +2318,26 @@ function cleanHallucinatedTranscript(transcript) {
2314
2318
 
2315
2319
  // src/utils/ai/get-transcript.ts
2316
2320
  async function getTranscript(model, args, cleanHallucinations = true) {
2317
- var _a, _b, _c, _d, _e, _f, _g, _h, _i;
2321
+ var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l;
2318
2322
  const getGeminiTranscript = (_b = (_a = api).httpsCallable) == null ? void 0 : _b.call(_a, "getGeminiTranscript");
2319
2323
  const getAssemblyAITranscript = (_d = (_c = api).httpsCallable) == null ? void 0 : _d.call(_c, "transcribeAssemblyAIAudio");
2320
2324
  const getWhisper3Transcript = (_f = (_e = api).httpsCallable) == null ? void 0 : _f.call(_e, "generateGroqTranscript");
2321
2325
  const getWhisper1Transcript = (_h = (_g = api).httpsCallable) == null ? void 0 : _h.call(_g, "transcribeAudio");
2322
- console.log("Getting transcript from", model);
2326
+ const getGPT4oTranscript = (_j = (_i = api).httpsCallable) == null ? void 0 : _j.call(_i, "generateGpt4oTranscript");
2327
+ console.log("Getting transcript from", model, " cleanHallucinations", cleanHallucinations);
2328
+ if (model === "gpt-4o") {
2329
+ try {
2330
+ const { data } = await (getGPT4oTranscript == null ? void 0 : getGPT4oTranscript({
2331
+ audioUrl: args.audioUrl,
2332
+ language: args.language,
2333
+ teacherPrompt: (_k = args.prompt) != null ? _k : ""
2334
+ }));
2335
+ return data;
2336
+ } catch (error) {
2337
+ console.error("Error getting transcript from GPT-4o:", error);
2338
+ throw error;
2339
+ }
2340
+ }
2323
2341
  if (model === "whisper-3") {
2324
2342
  try {
2325
2343
  const { data } = await (getWhisper3Transcript == null ? void 0 : getWhisper3Transcript({
@@ -2349,7 +2367,7 @@ async function getTranscript(model, args, cleanHallucinations = true) {
2349
2367
  const { data } = await (getGeminiTranscript == null ? void 0 : getGeminiTranscript({
2350
2368
  audioUrl: args.audioUrl,
2351
2369
  targetLanguage: args.language,
2352
- prompt: (_i = args.prompt) != null ? _i : ""
2370
+ prompt: (_l = args.prompt) != null ? _l : ""
2353
2371
  }));
2354
2372
  return cleanHallucinations ? cleanHallucinatedTranscript(data.transcript) : data.transcript;
2355
2373
  } catch (error) {
@@ -2372,15 +2390,27 @@ async function getTranscript(model, args, cleanHallucinations = true) {
2372
2390
  return null;
2373
2391
  }
2374
2392
  async function getTranscriptCycle(args) {
2375
- const models = ["whisper-3", "whisper", "gemini", "assemblyai"];
2393
+ var _a, _b, _c;
2394
+ const models = (_b = (_a = args.options) == null ? void 0 : _a.modelOrder) != null ? _b : [
2395
+ "gpt-4o",
2396
+ "whisper",
2397
+ "whisper-3",
2398
+ "gemini",
2399
+ "assemblyai"
2400
+ ];
2376
2401
  let transcript = "";
2377
2402
  let lastError = null;
2378
2403
  for (const model of models) {
2379
2404
  try {
2380
- console.log("Getting transcript from", model);
2381
- const transcriptResult = await getTranscript(model, args, false);
2405
+ console.log(
2406
+ "Getting transcript from",
2407
+ model,
2408
+ " cleanHallucinations",
2409
+ args.cleanHallucinations
2410
+ );
2411
+ const transcriptResult = await getTranscript(model, args, (_c = args.cleanHallucinations) != null ? _c : true);
2382
2412
  const rawTranscript = transcriptResult || "";
2383
- transcript = cleanHallucinatedTranscript(rawTranscript);
2413
+ transcript = rawTranscript;
2384
2414
  if (transcript !== "") {
2385
2415
  console.log(`Successfully got transcript from ${model}`);
2386
2416
  break;
@@ -3137,9 +3167,10 @@ function useSpeakableTranscript() {
3137
3167
  model,
3138
3168
  audioUrl,
3139
3169
  language,
3140
- prompt
3170
+ prompt,
3171
+ cleanHallucinations = true
3141
3172
  }) => {
3142
- return getTranscript(model, { audioUrl, language, prompt });
3173
+ return getTranscript(model, { audioUrl, language, prompt }, cleanHallucinations);
3143
3174
  },
3144
3175
  retry: false
3145
3176
  });
@@ -3150,7 +3181,11 @@ function useSpeakableTranscript() {
3150
3181
  function useSpeakableTranscriptCycle() {
3151
3182
  const mutation = useMutation3({
3152
3183
  mutationFn: async (args) => {
3153
- return getTranscriptCycle(args);
3184
+ return getTranscriptCycle({
3185
+ ...args,
3186
+ cleanHallucinations: args.cleanHallucinations,
3187
+ options: args.options
3188
+ });
3154
3189
  },
3155
3190
  retry: false
3156
3191
  });