ei-tui 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ei-tui",
3
- "version": "0.5.0",
3
+ "version": "0.5.1",
4
4
  "author": "Flare576",
5
5
  "repository": {
6
6
  "type": "git",
@@ -282,8 +282,7 @@ function normalizeText(text: string): string {
282
282
  .replace(/[\u2018\u2019\u0060\u00B4]/g, "'") // curly single, backtick, acute accent
283
283
  .replace(/[\u2014\u2013\u2012]/g, '-') // em-dash, en-dash, figure dash
284
284
  .replace(/\u00A0/g, ' ') // non-breaking space
285
- .replace(/[\u2000-\u200F]/g, ' ') // unicode space variants
286
- .replace(/\u2026|\.\.\./g, '\u2026'); // normalize both ellipsis forms → unicode ellipsis (1:1)
285
+ .replace(/[\u2000-\u200F]/g, ' '); // unicode space variants
287
286
  }
288
287
 
289
288
  function stripPunctuation(text: string): string {
@@ -297,31 +296,46 @@ function stripPunctuation(text: string): string {
297
296
  .toLowerCase();
298
297
  }
299
298
 
300
- interface WordBoundaryMatch {
299
+ export interface WordBoundaryMatch {
301
300
  start: number;
302
301
  end: number;
303
302
  text: string;
304
303
  }
305
304
 
306
- function findQuoteByWords(quoteText: string, msgText: string): WordBoundaryMatch | null {
305
+ export function expandToWordBoundaries(text: string, start: number, end: number): WordBoundaryMatch {
306
+ // Only walk backward if start is mid-word (not already at a word boundary)
307
+ if (start > 0 && !/\s/.test(text[start]))
308
+ while (start > 0 && !/\s/.test(text[start - 1])) start--;
309
+ // Only walk forward if end is mid-word
310
+ if (end > 0 && !/\s/.test(text[end - 1]))
311
+ while (end < text.length && !/\s/.test(text[end])) end++;
312
+ return { start, end, text: text.slice(start, end) };
313
+ }
314
+
315
+ export function findQuoteByWords(quoteText: string, msgText: string): WordBoundaryMatch | null {
307
316
  const strippedQuote = stripPunctuation(quoteText);
308
317
  const quoteWords = strippedQuote.split(' ').filter(w => w.length > 0);
309
318
 
310
- if (quoteWords.length < 3) return null; // Too short to trust — require at least 3 words
319
+ if (quoteWords.length < 2) return null; // Too short to trust — require at least 2 words
311
320
 
312
- // Build word token list from original message with original positions
321
+ // Build word token list from original message with original positions.
322
+ // Each \S+ token is re-split into sub-tokens (sharing the parent's start/end)
323
+ // so that contractions stripped by stripPunctuation (e.g. don't → "don t")
324
+ // align correctly with quoteWords which is also split on spaces.
313
325
  const wordTokens: Array<{ word: string; start: number; end: number }> = [];
314
326
  const wordRegex = /\S+/g;
315
327
  let match: RegExpExecArray | null;
316
328
  while ((match = wordRegex.exec(msgText)) !== null) {
317
- wordTokens.push({
318
- word: stripPunctuation(match[0]),
319
- start: match.index,
320
- end: match.index + match[0].length,
321
- });
329
+ const tokenStart = match.index;
330
+ const tokenEnd = match.index + match[0].length;
331
+ const stripped = stripPunctuation(match[0]);
332
+ const subWords = stripped.split(' ').filter(w => w.length > 0);
333
+ for (const sub of subWords) {
334
+ wordTokens.push({ word: sub, start: tokenStart, end: tokenEnd });
335
+ }
322
336
  }
323
337
 
324
- // Find contiguous sequence of words matching the quote words
338
+ // Find contiguous sequence of word tokens matching the quote words
325
339
  for (let i = 0; i <= wordTokens.length - quoteWords.length; i++) {
326
340
  let allMatch = true;
327
341
  for (let j = 0; j < quoteWords.length; j++) {
@@ -333,11 +347,7 @@ function findQuoteByWords(quoteText: string, msgText: string): WordBoundaryMatch
333
347
  if (allMatch) {
334
348
  const startToken = wordTokens[i];
335
349
  const endToken = wordTokens[i + quoteWords.length - 1];
336
- return {
337
- start: startToken.start,
338
- end: endToken.end,
339
- text: msgText.slice(startToken.start, endToken.end),
340
- };
350
+ return expandToWordBoundaries(msgText, startToken.start, endToken.end);
341
351
  }
342
352
  }
343
353
 
@@ -370,9 +380,10 @@ async function validateAndStoreQuotes(
370
380
  let matchLevel: string;
371
381
 
372
382
  if (start !== -1) {
373
- matchStart = start;
374
- matchEnd = start + candidate.text.length;
375
- matchText = candidate.text;
383
+ const expanded = expandToWordBoundaries(msgText, start, start + candidate.text.length);
384
+ matchStart = expanded.start;
385
+ matchEnd = expanded.end;
386
+ matchText = expanded.text;
376
387
  matchLevel = "exact";
377
388
  } else {
378
389
  // Level 2: word-boundary fallback
@@ -440,7 +451,8 @@ async function validateAndStoreQuotes(
440
451
  data_item_ids: [dataItemId],
441
452
  persona_groups: [personaGroup || "General"],
442
453
  text: matchText,
443
- speaker: message.role === "human" ? "human" : personaName,
454
+ speaker: message.role === "human" ? "human" : (message.speaker_name ?? personaName),
455
+ channel: personaName,
444
456
  timestamp: message.timestamp,
445
457
  start: matchStart,
446
458
  end: matchEnd,
@@ -67,7 +67,9 @@ export interface Quote {
67
67
  data_item_ids: string[]; // FK[] to DataItemBase.id
68
68
  persona_groups: string[]; // Visibility groups
69
69
  text: string; // The quote content
70
- speaker: "human" | string; // Who said it (persona ID or "human")
70
+ speaker: "human" | string; // Actual speaker: "human" or the persona's display_name
71
+ channel?: string; // Display name of the Channel (persona or room) where captured.
72
+ // Undefined on pre-migration quotes.
71
73
  timestamp: string; // ISO timestamp (from original message)
72
74
  start: number | null; // Character offset in message (null = can't highlight)
73
75
  end: number | null; // Character offset in message (null = can't highlight)