ei-tui 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -282,8 +282,7 @@ function normalizeText(text: string): string {
|
|
|
282
282
|
.replace(/[\u2018\u2019\u0060\u00B4]/g, "'") // curly single, backtick, acute accent
|
|
283
283
|
.replace(/[\u2014\u2013\u2012]/g, '-') // em-dash, en-dash, figure dash
|
|
284
284
|
.replace(/\u00A0/g, ' ') // non-breaking space
|
|
285
|
-
.replace(/[\u2000-\u200F]/g, ' ') // unicode space variants
|
|
286
|
-
.replace(/\u2026|\.\.\./g, '\u2026'); // normalize both ellipsis forms → unicode ellipsis (1:1)
|
|
285
|
+
.replace(/[\u2000-\u200F]/g, ' '); // unicode space variants
|
|
287
286
|
}
|
|
288
287
|
|
|
289
288
|
function stripPunctuation(text: string): string {
|
|
@@ -297,31 +296,46 @@ function stripPunctuation(text: string): string {
|
|
|
297
296
|
.toLowerCase();
|
|
298
297
|
}
|
|
299
298
|
|
|
300
|
-
interface WordBoundaryMatch {
|
|
299
|
+
export interface WordBoundaryMatch {
|
|
301
300
|
start: number;
|
|
302
301
|
end: number;
|
|
303
302
|
text: string;
|
|
304
303
|
}
|
|
305
304
|
|
|
306
|
-
function
|
|
305
|
+
export function expandToWordBoundaries(text: string, start: number, end: number): WordBoundaryMatch {
|
|
306
|
+
// Only walk backward if start is mid-word (not already at a word boundary)
|
|
307
|
+
if (start > 0 && !/\s/.test(text[start]))
|
|
308
|
+
while (start > 0 && !/\s/.test(text[start - 1])) start--;
|
|
309
|
+
// Only walk forward if end is mid-word
|
|
310
|
+
if (end > 0 && !/\s/.test(text[end - 1]))
|
|
311
|
+
while (end < text.length && !/\s/.test(text[end])) end++;
|
|
312
|
+
return { start, end, text: text.slice(start, end) };
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
export function findQuoteByWords(quoteText: string, msgText: string): WordBoundaryMatch | null {
|
|
307
316
|
const strippedQuote = stripPunctuation(quoteText);
|
|
308
317
|
const quoteWords = strippedQuote.split(' ').filter(w => w.length > 0);
|
|
309
318
|
|
|
310
|
-
if (quoteWords.length <
|
|
319
|
+
if (quoteWords.length < 2) return null; // Too short to trust — require at least 2 words
|
|
311
320
|
|
|
312
|
-
// Build word token list from original message with original positions
|
|
321
|
+
// Build word token list from original message with original positions.
|
|
322
|
+
// Each \S+ token is re-split into sub-tokens (sharing the parent's start/end)
|
|
323
|
+
// so that contractions stripped by stripPunctuation (e.g. don't → "don t")
|
|
324
|
+
// align correctly with quoteWords which is also split on spaces.
|
|
313
325
|
const wordTokens: Array<{ word: string; start: number; end: number }> = [];
|
|
314
326
|
const wordRegex = /\S+/g;
|
|
315
327
|
let match: RegExpExecArray | null;
|
|
316
328
|
while ((match = wordRegex.exec(msgText)) !== null) {
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
329
|
+
const tokenStart = match.index;
|
|
330
|
+
const tokenEnd = match.index + match[0].length;
|
|
331
|
+
const stripped = stripPunctuation(match[0]);
|
|
332
|
+
const subWords = stripped.split(' ').filter(w => w.length > 0);
|
|
333
|
+
for (const sub of subWords) {
|
|
334
|
+
wordTokens.push({ word: sub, start: tokenStart, end: tokenEnd });
|
|
335
|
+
}
|
|
322
336
|
}
|
|
323
337
|
|
|
324
|
-
// Find contiguous sequence of
|
|
338
|
+
// Find contiguous sequence of word tokens matching the quote words
|
|
325
339
|
for (let i = 0; i <= wordTokens.length - quoteWords.length; i++) {
|
|
326
340
|
let allMatch = true;
|
|
327
341
|
for (let j = 0; j < quoteWords.length; j++) {
|
|
@@ -333,11 +347,7 @@ function findQuoteByWords(quoteText: string, msgText: string): WordBoundaryMatch
|
|
|
333
347
|
if (allMatch) {
|
|
334
348
|
const startToken = wordTokens[i];
|
|
335
349
|
const endToken = wordTokens[i + quoteWords.length - 1];
|
|
336
|
-
return
|
|
337
|
-
start: startToken.start,
|
|
338
|
-
end: endToken.end,
|
|
339
|
-
text: msgText.slice(startToken.start, endToken.end),
|
|
340
|
-
};
|
|
350
|
+
return expandToWordBoundaries(msgText, startToken.start, endToken.end);
|
|
341
351
|
}
|
|
342
352
|
}
|
|
343
353
|
|
|
@@ -370,9 +380,10 @@ async function validateAndStoreQuotes(
|
|
|
370
380
|
let matchLevel: string;
|
|
371
381
|
|
|
372
382
|
if (start !== -1) {
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
383
|
+
const expanded = expandToWordBoundaries(msgText, start, start + candidate.text.length);
|
|
384
|
+
matchStart = expanded.start;
|
|
385
|
+
matchEnd = expanded.end;
|
|
386
|
+
matchText = expanded.text;
|
|
376
387
|
matchLevel = "exact";
|
|
377
388
|
} else {
|
|
378
389
|
// Level 2: word-boundary fallback
|
|
@@ -440,7 +451,8 @@ async function validateAndStoreQuotes(
|
|
|
440
451
|
data_item_ids: [dataItemId],
|
|
441
452
|
persona_groups: [personaGroup || "General"],
|
|
442
453
|
text: matchText,
|
|
443
|
-
speaker: message.role === "human" ? "human" : personaName,
|
|
454
|
+
speaker: message.role === "human" ? "human" : (message.speaker_name ?? personaName),
|
|
455
|
+
channel: personaName,
|
|
444
456
|
timestamp: message.timestamp,
|
|
445
457
|
start: matchStart,
|
|
446
458
|
end: matchEnd,
|
|
@@ -67,7 +67,9 @@ export interface Quote {
|
|
|
67
67
|
data_item_ids: string[]; // FK[] to DataItemBase.id
|
|
68
68
|
persona_groups: string[]; // Visibility groups
|
|
69
69
|
text: string; // The quote content
|
|
70
|
-
speaker: "human" | string; //
|
|
70
|
+
speaker: "human" | string; // Actual speaker: "human" or the persona's display_name
|
|
71
|
+
channel?: string; // Display name of the Channel (persona or room) where captured.
|
|
72
|
+
// Undefined on pre-migration quotes.
|
|
71
73
|
timestamp: string; // ISO timestamp (from original message)
|
|
72
74
|
start: number | null; // Character offset in message (null = can't highlight)
|
|
73
75
|
end: number | null; // Character offset in message (null = can't highlight)
|