@speakableio/core 1.0.60 → 1.0.61

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2212,80 +2212,18 @@ var createSetRepo = () => {
2212
2212
  };
2213
2213
 
2214
2214
  // src/utils/ai/detect-transcript-hallucionation.ts
2215
- var HALLUCINATION_THRESHOLDS = {
2216
- // Short repeats
2217
- MIN_CONSECUTIVE_REPEATS: 5,
2218
- // Increased from 3 to allow phrases like "pio pio pio" or "no no no no"
2219
- MIN_WORDS_FOR_RATIO_CHECK: 15,
2220
- // Increased from 10 to require longer text for ratio check
2221
- MAX_UNIQUE_WORDS_FOR_RATIO: 3,
2222
- MIN_REPETITION_RATIO: 4,
2223
- // Increased from 3 to be more permissive
2224
- // Phrase repeats
2225
- MIN_SENTENCE_LENGTH: 15,
2226
- // Increased from 10 to avoid flagging short natural sentences
2227
- MIN_CONSECUTIVE_SIMILAR_SENTENCES: 3,
2228
- // Increased from 2 to allow some natural repetition
2229
- MIN_SENTENCES_FOR_DUPLICATE_CHECK: 4,
2230
- // Increased from 3
2231
- // Cyclic patterns
2232
- MIN_CYCLE_LENGTH: 30,
2233
- // Increased from 20 to focus on longer patterns
2234
- MIN_CYCLE_REPEATS: 3,
2235
- // Entropy detection
2236
- MIN_LENGTH_FOR_ENTROPY_CHECK: 60,
2237
- // Increased from 50
2238
- MAX_ENTROPY_THRESHOLD: 2.2,
2239
- // Decreased from 2.5 to be more strict on entropy (lower = more repetitive needed)
2240
- // Similarity
2241
- SENTENCE_SIMILARITY_THRESHOLD: 0.85,
2242
- // Increased from 0.8 to require more similarity
2243
- SEGMENT_SIMILARITY_THRESHOLD: 0.9
2244
- // Increased from 0.85
2245
- };
2246
- function detectTranscriptHallucinationWithDetails(transcript) {
2215
+ function detectTranscriptHallucination(transcript) {
2247
2216
  if (!transcript || transcript.trim().length === 0) {
2248
- return { isHallucination: false };
2217
+ return false;
2249
2218
  }
2250
2219
  const text = transcript.trim();
2251
- if (text.length < 10) {
2252
- return { isHallucination: false };
2253
- }
2254
2220
  const shortRepeats = detectShortRepeats(text);
2255
- if (shortRepeats) {
2256
- return {
2257
- isHallucination: true,
2258
- reason: "Detected repeated short words or phrases",
2259
- confidence: 0.9
2260
- };
2261
- }
2221
+ if (shortRepeats) return true;
2262
2222
  const phraseRepeats = detectPhraseRepeats(text);
2263
- if (phraseRepeats) {
2264
- return {
2265
- isHallucination: true,
2266
- reason: "Detected repeated sentences or phrases",
2267
- confidence: 0.85
2268
- };
2269
- }
2223
+ if (phraseRepeats) return true;
2270
2224
  const cyclicRepeats = detectCyclicPattern(text);
2271
- if (cyclicRepeats) {
2272
- return {
2273
- isHallucination: true,
2274
- reason: "Detected cyclic repetition pattern",
2275
- confidence: 0.8
2276
- };
2277
- }
2278
- if (text.length >= HALLUCINATION_THRESHOLDS.MIN_LENGTH_FOR_ENTROPY_CHECK) {
2279
- const entropy = calculateEntropy(text);
2280
- if (entropy < HALLUCINATION_THRESHOLDS.MAX_ENTROPY_THRESHOLD) {
2281
- return {
2282
- isHallucination: true,
2283
- reason: "Detected low entropy (likely gibberish or excessive repetition)",
2284
- confidence: 0.75
2285
- };
2286
- }
2287
- }
2288
- return { isHallucination: false };
2225
+ if (cyclicRepeats) return true;
2226
+ return false;
2289
2227
  }
2290
2228
  function detectShortRepeats(text) {
2291
2229
  const words = text.toLowerCase().split(/[\s,;.!?]+/).filter((w) => w.length > 0);
@@ -2294,22 +2232,20 @@ function detectShortRepeats(text) {
2294
2232
  for (let i = 1; i < words.length; i++) {
2295
2233
  if (words[i] === words[i - 1]) {
2296
2234
  repeatCount++;
2297
- if (repeatCount >= HALLUCINATION_THRESHOLDS.MIN_CONSECUTIVE_REPEATS) {
2298
- return true;
2299
- }
2235
+ if (repeatCount >= 3) return true;
2300
2236
  } else {
2301
2237
  repeatCount = 1;
2302
2238
  }
2303
2239
  }
2304
2240
  const uniqueWords = new Set(words);
2305
2241
  const repetitionRatio = words.length / uniqueWords.size;
2306
- if (words.length >= HALLUCINATION_THRESHOLDS.MIN_WORDS_FOR_RATIO_CHECK && uniqueWords.size <= HALLUCINATION_THRESHOLDS.MAX_UNIQUE_WORDS_FOR_RATIO && repetitionRatio >= HALLUCINATION_THRESHOLDS.MIN_REPETITION_RATIO) {
2242
+ if (words.length >= 10 && uniqueWords.size <= 3 && repetitionRatio >= 3) {
2307
2243
  return true;
2308
2244
  }
2309
2245
  return false;
2310
2246
  }
2311
2247
  function detectPhraseRepeats(text) {
2312
- const sentences = text.split(/[.!?]+/).map((s) => s.trim().toLowerCase()).filter((s) => s.length > HALLUCINATION_THRESHOLDS.MIN_SENTENCE_LENGTH);
2248
+ const sentences = text.split(/[.!?]+/).map((s) => s.trim().toLowerCase()).filter((s) => s.length > 10);
2313
2249
  if (sentences.length < 2) return false;
2314
2250
  for (let i = 0; i < sentences.length - 1; i++) {
2315
2251
  let consecutiveRepeats = 1;
@@ -2320,17 +2256,17 @@ function detectPhraseRepeats(text) {
2320
2256
  break;
2321
2257
  }
2322
2258
  }
2323
- if (consecutiveRepeats >= HALLUCINATION_THRESHOLDS.MIN_CONSECUTIVE_SIMILAR_SENTENCES) {
2259
+ if (consecutiveRepeats >= 2) {
2324
2260
  return true;
2325
2261
  }
2326
2262
  }
2327
2263
  const uniqueSentences = new Set(sentences);
2328
- if (sentences.length >= HALLUCINATION_THRESHOLDS.MIN_SENTENCES_FOR_DUPLICATE_CHECK && uniqueSentences.size === 1) {
2264
+ if (sentences.length >= 3 && uniqueSentences.size === 1) {
2329
2265
  return true;
2330
2266
  }
2331
2267
  return false;
2332
2268
  }
2333
- function isSimilarSentence(s1, s2, threshold = HALLUCINATION_THRESHOLDS.SENTENCE_SIMILARITY_THRESHOLD) {
2269
+ function isSimilarSentence(s1, s2) {
2334
2270
  if (s1 === s2) return true;
2335
2271
  const normalized1 = s1.replace(/\s+/g, " ").trim();
2336
2272
  const normalized2 = s2.replace(/\s+/g, " ").trim();
@@ -2342,81 +2278,35 @@ function isSimilarSentence(s1, s2, threshold = HALLUCINATION_THRESHOLDS.SENTENCE
2342
2278
  const set2 = new Set(words2);
2343
2279
  const intersection = new Set([...set1].filter((w) => set2.has(w)));
2344
2280
  const similarity = intersection.size * 2 / (set1.size + set2.size);
2345
- return similarity >= threshold;
2281
+ return similarity >= 0.8;
2346
2282
  }
2347
2283
  function detectCyclicPattern(text) {
2348
2284
  const normalized = text.toLowerCase().replace(/\s+/g, " ").trim();
2349
2285
  const length = normalized.length;
2350
- const minCycleLength = HALLUCINATION_THRESHOLDS.MIN_CYCLE_LENGTH;
2286
+ const minCycleLength = 20;
2351
2287
  const maxCycleLength = Math.floor(length / 2);
2352
- if (maxCycleLength < minCycleLength) return false;
2353
- const step = 5;
2354
- for (let cycleLen = minCycleLength; cycleLen <= maxCycleLength; cycleLen += step) {
2288
+ for (let cycleLen = minCycleLength; cycleLen <= maxCycleLength; cycleLen++) {
2355
2289
  const pattern = normalized.substring(0, cycleLen);
2356
2290
  let matchCount = 0;
2357
2291
  let pos = 0;
2358
2292
  while (pos < length) {
2359
2293
  const segment = normalized.substring(pos, pos + cycleLen);
2360
- if (segment.length < cycleLen) {
2361
- const partialMatch = pattern.startsWith(segment);
2362
- if (partialMatch && matchCount > 0) {
2363
- matchCount++;
2364
- }
2365
- break;
2366
- }
2367
- if (segment === pattern || isSegmentSimilar(segment, pattern)) {
2294
+ if (segment === pattern || isSimilarSentence(segment, pattern)) {
2368
2295
  matchCount++;
2369
2296
  pos += cycleLen;
2370
2297
  } else {
2371
2298
  break;
2372
2299
  }
2373
2300
  }
2374
- if (matchCount >= HALLUCINATION_THRESHOLDS.MIN_CYCLE_REPEATS) {
2301
+ if (matchCount >= 3) {
2375
2302
  return true;
2376
2303
  }
2377
2304
  }
2378
2305
  return false;
2379
2306
  }
2380
- function isSegmentSimilar(s1, s2) {
2381
- if (s1 === s2) return true;
2382
- if (s1.length !== s2.length) return false;
2383
- let matches = 0;
2384
- const minLength = Math.min(s1.length, s2.length);
2385
- for (let i = 0; i < minLength; i++) {
2386
- if (s1[i] === s2[i]) {
2387
- matches++;
2388
- }
2389
- }
2390
- const similarity = matches / minLength;
2391
- return similarity >= HALLUCINATION_THRESHOLDS.SEGMENT_SIMILARITY_THRESHOLD;
2392
- }
2393
- function calculateEntropy(text) {
2394
- if (!text || text.length === 0) {
2395
- return 0;
2396
- }
2397
- const frequencies = /* @__PURE__ */ new Map();
2398
- for (const char of text.toLowerCase()) {
2399
- frequencies.set(char, (frequencies.get(char) || 0) + 1);
2400
- }
2401
- let entropy = 0;
2402
- const length = text.length;
2403
- for (const count of frequencies.values()) {
2404
- const probability = count / length;
2405
- entropy -= probability * Math.log2(probability);
2406
- }
2407
- return entropy;
2408
- }
2409
2307
  function cleanHallucinatedTranscript(transcript) {
2410
- var _a, _b;
2411
- const result = detectTranscriptHallucinationWithDetails(transcript);
2412
- if (result.isHallucination) {
2413
- console.warn(
2414
- "Hallucinated transcript detected and removed:",
2415
- transcript.substring(0, 100),
2416
- `
2417
- Reason: ${(_a = result.reason) != null ? _a : "Unknown"}`,
2418
- `Confidence: ${String((_b = result.confidence) != null ? _b : "Unknown")}`
2419
- );
2308
+ if (detectTranscriptHallucination(transcript)) {
2309
+ console.warn("Hallucinated transcript detected and removed:", transcript.substring(0, 100));
2420
2310
  return "";
2421
2311
  }
2422
2312
  return transcript;
@@ -2424,7 +2314,7 @@ Reason: ${(_a = result.reason) != null ? _a : "Unknown"}`,
2424
2314
 
2425
2315
  // src/utils/ai/get-transcript.ts
2426
2316
  async function getTranscript(model, args, cleanHallucinations = true) {
2427
- var _a, _b, _c, _d, _e, _f;
2317
+ var _a, _b, _c, _d, _e, _f, _g;
2428
2318
  const getGeminiTranscript = (_b = (_a = api).httpsCallable) == null ? void 0 : _b.call(_a, "getGeminiTranscript");
2429
2319
  const getAssemblyAITranscript = (_d = (_c = api).httpsCallable) == null ? void 0 : _d.call(_c, "transcribeAssemblyAIAudio");
2430
2320
  const getWhisperTranscript = (_f = (_e = api).httpsCallable) == null ? void 0 : _f.call(_e, "generateGroqTranscript");
@@ -2445,7 +2335,7 @@ async function getTranscript(model, args, cleanHallucinations = true) {
2445
2335
  const { data } = await (getGeminiTranscript == null ? void 0 : getGeminiTranscript({
2446
2336
  audioUrl: args.audioUrl,
2447
2337
  targetLanguage: args.language,
2448
- prompt: args.prompt
2338
+ prompt: (_g = args.prompt) != null ? _g : ""
2449
2339
  }));
2450
2340
  return cleanHallucinations ? cleanHallucinatedTranscript(data.transcript) : data.transcript;
2451
2341
  } catch (error) {