claude-memory-layer 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/commands/memory-forget.md +42 -0
- package/.claude-plugin/commands/memory-history.md +34 -0
- package/.claude-plugin/commands/memory-import.md +56 -0
- package/.claude-plugin/commands/memory-list.md +37 -0
- package/.claude-plugin/commands/memory-search.md +36 -0
- package/.claude-plugin/commands/memory-stats.md +34 -0
- package/.claude-plugin/hooks.json +59 -0
- package/.claude-plugin/plugin.json +24 -0
- package/.history/package_20260201112328.json +45 -0
- package/.history/package_20260201113602.json +45 -0
- package/.history/package_20260201113713.json +45 -0
- package/.history/package_20260201114110.json +45 -0
- package/Memo.txt +558 -0
- package/README.md +520 -0
- package/context.md +636 -0
- package/dist/.claude-plugin/commands/memory-forget.md +42 -0
- package/dist/.claude-plugin/commands/memory-history.md +34 -0
- package/dist/.claude-plugin/commands/memory-import.md +56 -0
- package/dist/.claude-plugin/commands/memory-list.md +37 -0
- package/dist/.claude-plugin/commands/memory-search.md +36 -0
- package/dist/.claude-plugin/commands/memory-stats.md +34 -0
- package/dist/.claude-plugin/hooks.json +59 -0
- package/dist/.claude-plugin/plugin.json +24 -0
- package/dist/cli/index.js +3539 -0
- package/dist/cli/index.js.map +7 -0
- package/dist/core/index.js +4408 -0
- package/dist/core/index.js.map +7 -0
- package/dist/hooks/session-end.js +2971 -0
- package/dist/hooks/session-end.js.map +7 -0
- package/dist/hooks/session-start.js +2969 -0
- package/dist/hooks/session-start.js.map +7 -0
- package/dist/hooks/stop.js +3123 -0
- package/dist/hooks/stop.js.map +7 -0
- package/dist/hooks/user-prompt-submit.js +2960 -0
- package/dist/hooks/user-prompt-submit.js.map +7 -0
- package/dist/services/memory-service.js +2931 -0
- package/dist/services/memory-service.js.map +7 -0
- package/package.json +45 -0
- package/plan.md +1642 -0
- package/scripts/build.ts +102 -0
- package/spec.md +624 -0
- package/specs/citations-system/context.md +243 -0
- package/specs/citations-system/plan.md +495 -0
- package/specs/citations-system/spec.md +371 -0
- package/specs/endless-mode/context.md +305 -0
- package/specs/endless-mode/plan.md +620 -0
- package/specs/endless-mode/spec.md +455 -0
- package/specs/entity-edge-model/context.md +401 -0
- package/specs/entity-edge-model/plan.md +459 -0
- package/specs/entity-edge-model/spec.md +391 -0
- package/specs/evidence-aligner-v2/context.md +401 -0
- package/specs/evidence-aligner-v2/plan.md +303 -0
- package/specs/evidence-aligner-v2/spec.md +312 -0
- package/specs/mcp-desktop-integration/context.md +278 -0
- package/specs/mcp-desktop-integration/plan.md +550 -0
- package/specs/mcp-desktop-integration/spec.md +494 -0
- package/specs/post-tool-use-hook/context.md +319 -0
- package/specs/post-tool-use-hook/plan.md +469 -0
- package/specs/post-tool-use-hook/spec.md +364 -0
- package/specs/private-tags/context.md +288 -0
- package/specs/private-tags/plan.md +412 -0
- package/specs/private-tags/spec.md +345 -0
- package/specs/progressive-disclosure/context.md +346 -0
- package/specs/progressive-disclosure/plan.md +663 -0
- package/specs/progressive-disclosure/spec.md +415 -0
- package/specs/task-entity-system/context.md +297 -0
- package/specs/task-entity-system/plan.md +301 -0
- package/specs/task-entity-system/spec.md +314 -0
- package/specs/vector-outbox-v2/context.md +470 -0
- package/specs/vector-outbox-v2/plan.md +562 -0
- package/specs/vector-outbox-v2/spec.md +466 -0
- package/specs/web-viewer-ui/context.md +384 -0
- package/specs/web-viewer-ui/plan.md +797 -0
- package/specs/web-viewer-ui/spec.md +516 -0
- package/src/cli/index.ts +570 -0
- package/src/core/canonical-key.ts +186 -0
- package/src/core/citation-generator.ts +63 -0
- package/src/core/consolidated-store.ts +279 -0
- package/src/core/consolidation-worker.ts +384 -0
- package/src/core/context-formatter.ts +276 -0
- package/src/core/continuity-manager.ts +336 -0
- package/src/core/edge-repo.ts +324 -0
- package/src/core/embedder.ts +124 -0
- package/src/core/entity-repo.ts +342 -0
- package/src/core/event-store.ts +672 -0
- package/src/core/evidence-aligner.ts +635 -0
- package/src/core/graduation.ts +365 -0
- package/src/core/index.ts +32 -0
- package/src/core/matcher.ts +210 -0
- package/src/core/metadata-extractor.ts +203 -0
- package/src/core/privacy/filter.ts +179 -0
- package/src/core/privacy/index.ts +20 -0
- package/src/core/privacy/tag-parser.ts +145 -0
- package/src/core/progressive-retriever.ts +415 -0
- package/src/core/retriever.ts +235 -0
- package/src/core/task/blocker-resolver.ts +325 -0
- package/src/core/task/index.ts +9 -0
- package/src/core/task/task-matcher.ts +238 -0
- package/src/core/task/task-projector.ts +345 -0
- package/src/core/task/task-resolver.ts +414 -0
- package/src/core/types.ts +841 -0
- package/src/core/vector-outbox.ts +295 -0
- package/src/core/vector-store.ts +182 -0
- package/src/core/vector-worker.ts +488 -0
- package/src/core/working-set-store.ts +244 -0
- package/src/hooks/post-tool-use.ts +127 -0
- package/src/hooks/session-end.ts +78 -0
- package/src/hooks/session-start.ts +57 -0
- package/src/hooks/stop.ts +78 -0
- package/src/hooks/user-prompt-submit.ts +54 -0
- package/src/mcp/handlers.ts +212 -0
- package/src/mcp/index.ts +47 -0
- package/src/mcp/tools.ts +78 -0
- package/src/server/api/citations.ts +101 -0
- package/src/server/api/events.ts +101 -0
- package/src/server/api/index.ts +18 -0
- package/src/server/api/search.ts +98 -0
- package/src/server/api/sessions.ts +111 -0
- package/src/server/api/stats.ts +97 -0
- package/src/server/index.ts +91 -0
- package/src/services/memory-service.ts +626 -0
- package/src/services/session-history-importer.ts +367 -0
- package/tests/canonical-key.test.ts +101 -0
- package/tests/evidence-aligner.test.ts +152 -0
- package/tests/matcher.test.ts +112 -0
- package/tsconfig.json +24 -0
- package/vitest.config.ts +15 -0
|
@@ -0,0 +1,635 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Evidence Aligner V2 - AXIOMMIND Principle 4
|
|
3
|
+
* Quote-only approach: LLM provides quote, pipeline calculates span
|
|
4
|
+
* 3-step alignment: exact → normalized → fuzzy
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { createHash } from 'crypto';
|
|
8
|
+
import type {
|
|
9
|
+
EvidenceSpan,
|
|
10
|
+
ExtractedEvidence,
|
|
11
|
+
AlignedEvidence,
|
|
12
|
+
FailedEvidence,
|
|
13
|
+
EvidenceAlignResult
|
|
14
|
+
} from './types.js';
|
|
15
|
+
|
|
16
|
+
export interface AlignmentResult {
|
|
17
|
+
isAligned: boolean;
|
|
18
|
+
confidence: number;
|
|
19
|
+
spans: EvidenceSpan[];
|
|
20
|
+
missingClaims: string[];
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export interface AlignmentOptions {
|
|
24
|
+
minMatchLength: number;
|
|
25
|
+
fuzzyThreshold: number;
|
|
26
|
+
maxMissingClaims: number;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// V2 Options
|
|
30
|
+
export interface AlignmentOptionsV2 {
|
|
31
|
+
minMatchLength: number;
|
|
32
|
+
exactMatchBonus: number;
|
|
33
|
+
normalizedThreshold: number;
|
|
34
|
+
fuzzyThreshold: number;
|
|
35
|
+
maxMissingRatio: number;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const DEFAULT_OPTIONS: AlignmentOptions = {
|
|
39
|
+
minMatchLength: 10,
|
|
40
|
+
fuzzyThreshold: 0.8,
|
|
41
|
+
maxMissingClaims: 2
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
const DEFAULT_OPTIONS_V2: AlignmentOptionsV2 = {
|
|
45
|
+
minMatchLength: 5,
|
|
46
|
+
exactMatchBonus: 1.0,
|
|
47
|
+
normalizedThreshold: 0.95,
|
|
48
|
+
fuzzyThreshold: 0.85,
|
|
49
|
+
maxMissingRatio: 0.2
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
// V2 Alignment result for entries
|
|
53
|
+
export interface AlignResultV2 {
|
|
54
|
+
evidenceAligned: boolean;
|
|
55
|
+
alignedCount: number;
|
|
56
|
+
failedCount: number;
|
|
57
|
+
results: EvidenceAlignResult[];
|
|
58
|
+
overallConfidence: number;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
export class EvidenceAligner {
|
|
62
|
+
private readonly options: AlignmentOptions;
|
|
63
|
+
|
|
64
|
+
constructor(options: Partial<AlignmentOptions> = {}) {
|
|
65
|
+
this.options = { ...DEFAULT_OPTIONS, ...options };
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Align claims against source content
|
|
70
|
+
* Returns evidence spans showing where claims are supported
|
|
71
|
+
*/
|
|
72
|
+
align(claims: string[], sourceContent: string): AlignmentResult {
|
|
73
|
+
const spans: EvidenceSpan[] = [];
|
|
74
|
+
const missingClaims: string[] = [];
|
|
75
|
+
const normalizedSource = this.normalize(sourceContent);
|
|
76
|
+
|
|
77
|
+
for (const claim of claims) {
|
|
78
|
+
const normalizedClaim = this.normalize(claim);
|
|
79
|
+
|
|
80
|
+
// Skip very short claims
|
|
81
|
+
if (normalizedClaim.length < this.options.minMatchLength) {
|
|
82
|
+
continue;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Try exact match first
|
|
86
|
+
const exactSpan = this.findExactMatch(normalizedClaim, normalizedSource, sourceContent);
|
|
87
|
+
if (exactSpan) {
|
|
88
|
+
spans.push(exactSpan);
|
|
89
|
+
continue;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Try fuzzy match
|
|
93
|
+
const fuzzySpan = this.findFuzzyMatch(normalizedClaim, normalizedSource, sourceContent);
|
|
94
|
+
if (fuzzySpan && fuzzySpan.confidence >= this.options.fuzzyThreshold) {
|
|
95
|
+
spans.push(fuzzySpan);
|
|
96
|
+
continue;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// Claim not found in source
|
|
100
|
+
missingClaims.push(claim);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Calculate overall alignment confidence
|
|
104
|
+
const totalClaims = claims.length;
|
|
105
|
+
const alignedClaims = spans.length;
|
|
106
|
+
const confidence = totalClaims > 0 ? alignedClaims / totalClaims : 1.0;
|
|
107
|
+
|
|
108
|
+
// Alignment is valid if missing claims are within threshold
|
|
109
|
+
const isAligned = missingClaims.length <= this.options.maxMissingClaims;
|
|
110
|
+
|
|
111
|
+
return {
|
|
112
|
+
isAligned,
|
|
113
|
+
confidence,
|
|
114
|
+
spans,
|
|
115
|
+
missingClaims
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Find exact substring match
|
|
121
|
+
*/
|
|
122
|
+
private findExactMatch(
|
|
123
|
+
normalizedClaim: string,
|
|
124
|
+
normalizedSource: string,
|
|
125
|
+
originalSource: string
|
|
126
|
+
): EvidenceSpan | null {
|
|
127
|
+
const index = normalizedSource.indexOf(normalizedClaim);
|
|
128
|
+
|
|
129
|
+
if (index === -1) {
|
|
130
|
+
return null;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
return {
|
|
134
|
+
start: index,
|
|
135
|
+
end: index + normalizedClaim.length,
|
|
136
|
+
confidence: 1.0,
|
|
137
|
+
matchType: 'exact',
|
|
138
|
+
originalQuote: originalSource.slice(index, index + normalizedClaim.length),
|
|
139
|
+
alignedText: normalizedClaim
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Find fuzzy match using sliding window
|
|
145
|
+
*/
|
|
146
|
+
private findFuzzyMatch(
|
|
147
|
+
normalizedClaim: string,
|
|
148
|
+
normalizedSource: string,
|
|
149
|
+
originalSource: string
|
|
150
|
+
): EvidenceSpan | null {
|
|
151
|
+
const windowSize = normalizedClaim.length;
|
|
152
|
+
let bestMatch: { index: number; similarity: number } | null = null;
|
|
153
|
+
|
|
154
|
+
// Slide window across source
|
|
155
|
+
for (let i = 0; i <= normalizedSource.length - windowSize; i++) {
|
|
156
|
+
const window = normalizedSource.slice(i, i + windowSize);
|
|
157
|
+
const similarity = this.calculateSimilarity(normalizedClaim, window);
|
|
158
|
+
|
|
159
|
+
if (!bestMatch || similarity > bestMatch.similarity) {
|
|
160
|
+
bestMatch = { index: i, similarity };
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
if (!bestMatch || bestMatch.similarity < this.options.fuzzyThreshold) {
|
|
165
|
+
return null;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
return {
|
|
169
|
+
start: bestMatch.index,
|
|
170
|
+
end: bestMatch.index + windowSize,
|
|
171
|
+
confidence: bestMatch.similarity,
|
|
172
|
+
matchType: 'fuzzy',
|
|
173
|
+
originalQuote: originalSource.slice(bestMatch.index, bestMatch.index + windowSize),
|
|
174
|
+
alignedText: normalizedClaim
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Calculate similarity between two strings using Jaccard coefficient
|
|
180
|
+
*/
|
|
181
|
+
private calculateSimilarity(a: string, b: string): number {
|
|
182
|
+
const setA = new Set(this.tokenize(a));
|
|
183
|
+
const setB = new Set(this.tokenize(b));
|
|
184
|
+
|
|
185
|
+
const intersection = new Set([...setA].filter(x => setB.has(x)));
|
|
186
|
+
const union = new Set([...setA, ...setB]);
|
|
187
|
+
|
|
188
|
+
return intersection.size / union.size;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Tokenize text into words
|
|
193
|
+
*/
|
|
194
|
+
private tokenize(text: string): string[] {
|
|
195
|
+
return text.toLowerCase().split(/\s+/).filter(t => t.length > 0);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/**
|
|
199
|
+
* Normalize text for comparison
|
|
200
|
+
*/
|
|
201
|
+
private normalize(text: string): string {
|
|
202
|
+
return text
|
|
203
|
+
.normalize('NFKC')
|
|
204
|
+
.toLowerCase()
|
|
205
|
+
.replace(/[^\p{L}\p{N}\s]/gu, '')
|
|
206
|
+
.replace(/\s+/g, ' ')
|
|
207
|
+
.trim();
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
/**
|
|
211
|
+
* Extract claims from a response text
|
|
212
|
+
* Splits on sentence boundaries and filters short sentences
|
|
213
|
+
*/
|
|
214
|
+
extractClaims(text: string): string[] {
|
|
215
|
+
// Split on sentence boundaries
|
|
216
|
+
const sentences = text.split(/[.!?]+/).map(s => s.trim()).filter(s => s.length > 0);
|
|
217
|
+
|
|
218
|
+
// Filter out very short sentences and questions
|
|
219
|
+
return sentences.filter(s => {
|
|
220
|
+
return s.length >= this.options.minMatchLength && !s.endsWith('?');
|
|
221
|
+
});
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Verify that a response is grounded in the provided context
|
|
226
|
+
*/
|
|
227
|
+
verifyGrounding(response: string, context: string[]): AlignmentResult {
|
|
228
|
+
const claims = this.extractClaims(response);
|
|
229
|
+
const combinedContext = context.join(' ');
|
|
230
|
+
|
|
231
|
+
return this.align(claims, combinedContext);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// ============================================================
|
|
235
|
+
// V2 Methods: Quote-only alignment
|
|
236
|
+
// ============================================================
|
|
237
|
+
|
|
238
|
+
private optionsV2: AlignmentOptionsV2 = DEFAULT_OPTIONS_V2;
|
|
239
|
+
|
|
240
|
+
/**
|
|
241
|
+
* Configure V2 options
|
|
242
|
+
*/
|
|
243
|
+
configureV2(options: Partial<AlignmentOptionsV2>): void {
|
|
244
|
+
this.optionsV2 = { ...DEFAULT_OPTIONS_V2, ...options };
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
/**
|
|
248
|
+
* Align V2: Process extracted evidence with messageIndex and quote
|
|
249
|
+
* @param sessionMessages - Array of session messages (original text)
|
|
250
|
+
* @param evidence - Array of extracted evidence (messageIndex + quote)
|
|
251
|
+
*/
|
|
252
|
+
alignV2(
|
|
253
|
+
sessionMessages: string[],
|
|
254
|
+
evidence: ExtractedEvidence[]
|
|
255
|
+
): AlignResultV2 {
|
|
256
|
+
const results: EvidenceAlignResult[] = [];
|
|
257
|
+
let alignedCount = 0;
|
|
258
|
+
let totalConfidence = 0;
|
|
259
|
+
|
|
260
|
+
for (const ev of evidence) {
|
|
261
|
+
const result = this.alignSingleEvidence(sessionMessages, ev);
|
|
262
|
+
results.push(result);
|
|
263
|
+
|
|
264
|
+
if (result.aligned) {
|
|
265
|
+
alignedCount++;
|
|
266
|
+
totalConfidence += result.evidence.confidence;
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
const failedCount = evidence.length - alignedCount;
|
|
271
|
+
const maxMissing = Math.floor(evidence.length * this.optionsV2.maxMissingRatio);
|
|
272
|
+
const evidenceAligned = failedCount <= maxMissing;
|
|
273
|
+
const overallConfidence = evidence.length > 0
|
|
274
|
+
? totalConfidence / evidence.length
|
|
275
|
+
: 1.0;
|
|
276
|
+
|
|
277
|
+
return {
|
|
278
|
+
evidenceAligned,
|
|
279
|
+
alignedCount,
|
|
280
|
+
failedCount,
|
|
281
|
+
results,
|
|
282
|
+
overallConfidence
|
|
283
|
+
};
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
/**
|
|
287
|
+
* Align a single evidence item
|
|
288
|
+
*/
|
|
289
|
+
private alignSingleEvidence(
|
|
290
|
+
sessionMessages: string[],
|
|
291
|
+
evidence: ExtractedEvidence
|
|
292
|
+
): EvidenceAlignResult {
|
|
293
|
+
const { messageIndex, quote } = evidence;
|
|
294
|
+
|
|
295
|
+
// Validate messageIndex
|
|
296
|
+
if (messageIndex < 0 || messageIndex >= sessionMessages.length) {
|
|
297
|
+
return {
|
|
298
|
+
aligned: false,
|
|
299
|
+
evidence: {
|
|
300
|
+
messageIndex,
|
|
301
|
+
quote,
|
|
302
|
+
failureReason: 'invalid_index'
|
|
303
|
+
}
|
|
304
|
+
};
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
// Validate quote
|
|
308
|
+
if (!quote || quote.trim().length === 0) {
|
|
309
|
+
return {
|
|
310
|
+
aligned: false,
|
|
311
|
+
evidence: {
|
|
312
|
+
messageIndex,
|
|
313
|
+
quote,
|
|
314
|
+
failureReason: 'empty_quote'
|
|
315
|
+
}
|
|
316
|
+
};
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
const sourceMessage = sessionMessages[messageIndex];
|
|
320
|
+
|
|
321
|
+
// Step 1: Try exact match
|
|
322
|
+
const exactResult = this.tryExactMatchV2(quote, sourceMessage, messageIndex);
|
|
323
|
+
if (exactResult) {
|
|
324
|
+
return exactResult;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
// Step 2: Try normalized match
|
|
328
|
+
const normalizedResult = this.tryNormalizedMatchV2(quote, sourceMessage, messageIndex);
|
|
329
|
+
if (normalizedResult) {
|
|
330
|
+
return normalizedResult;
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
// Step 3: Try fuzzy match
|
|
334
|
+
const fuzzyResult = this.tryFuzzyMatchV2(quote, sourceMessage, messageIndex);
|
|
335
|
+
if (fuzzyResult) {
|
|
336
|
+
return fuzzyResult;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
// No match found
|
|
340
|
+
return {
|
|
341
|
+
aligned: false,
|
|
342
|
+
evidence: {
|
|
343
|
+
messageIndex,
|
|
344
|
+
quote,
|
|
345
|
+
failureReason: 'not_found'
|
|
346
|
+
}
|
|
347
|
+
};
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
/**
|
|
351
|
+
* Try exact substring match
|
|
352
|
+
*/
|
|
353
|
+
private tryExactMatchV2(
|
|
354
|
+
quote: string,
|
|
355
|
+
source: string,
|
|
356
|
+
messageIndex: number
|
|
357
|
+
): EvidenceAlignResult | null {
|
|
358
|
+
const index = source.indexOf(quote);
|
|
359
|
+
|
|
360
|
+
if (index === -1) {
|
|
361
|
+
return null;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
return {
|
|
365
|
+
aligned: true,
|
|
366
|
+
evidence: {
|
|
367
|
+
messageIndex,
|
|
368
|
+
quote,
|
|
369
|
+
spanStart: index,
|
|
370
|
+
spanEnd: index + quote.length,
|
|
371
|
+
quoteHash: this.hashQuote(quote),
|
|
372
|
+
confidence: 1.0,
|
|
373
|
+
matchMethod: 'exact'
|
|
374
|
+
}
|
|
375
|
+
};
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
/**
|
|
379
|
+
* Try normalized match (whitespace collapsed)
|
|
380
|
+
*/
|
|
381
|
+
private tryNormalizedMatchV2(
|
|
382
|
+
quote: string,
|
|
383
|
+
source: string,
|
|
384
|
+
messageIndex: number
|
|
385
|
+
): EvidenceAlignResult | null {
|
|
386
|
+
const normalizedQuote = this.normalizeWhitespace(quote);
|
|
387
|
+
const normalizedSource = this.normalizeWhitespace(source);
|
|
388
|
+
|
|
389
|
+
const normalizedIndex = normalizedSource.indexOf(normalizedQuote);
|
|
390
|
+
if (normalizedIndex === -1) {
|
|
391
|
+
return null;
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
// Map back to original positions
|
|
395
|
+
const originalSpan = this.mapToOriginalPositions(
|
|
396
|
+
source,
|
|
397
|
+
normalizedSource,
|
|
398
|
+
normalizedIndex,
|
|
399
|
+
normalizedIndex + normalizedQuote.length
|
|
400
|
+
);
|
|
401
|
+
|
|
402
|
+
if (!originalSpan) {
|
|
403
|
+
return null;
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
return {
|
|
407
|
+
aligned: true,
|
|
408
|
+
evidence: {
|
|
409
|
+
messageIndex,
|
|
410
|
+
quote,
|
|
411
|
+
spanStart: originalSpan.start,
|
|
412
|
+
spanEnd: originalSpan.end,
|
|
413
|
+
quoteHash: this.hashQuote(quote),
|
|
414
|
+
confidence: 0.95,
|
|
415
|
+
matchMethod: 'normalized'
|
|
416
|
+
}
|
|
417
|
+
};
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
/**
|
|
421
|
+
* Try fuzzy match using sliding window
|
|
422
|
+
*/
|
|
423
|
+
private tryFuzzyMatchV2(
|
|
424
|
+
quote: string,
|
|
425
|
+
source: string,
|
|
426
|
+
messageIndex: number
|
|
427
|
+
): EvidenceAlignResult | null {
|
|
428
|
+
const normalizedQuote = this.normalize(quote);
|
|
429
|
+
const normalizedSource = this.normalize(source);
|
|
430
|
+
|
|
431
|
+
if (normalizedQuote.length < this.optionsV2.minMatchLength) {
|
|
432
|
+
return null;
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
// Try different window sizes
|
|
436
|
+
const windowSizes = [
|
|
437
|
+
normalizedQuote.length,
|
|
438
|
+
Math.floor(normalizedQuote.length * 1.1),
|
|
439
|
+
Math.floor(normalizedQuote.length * 1.2)
|
|
440
|
+
];
|
|
441
|
+
|
|
442
|
+
let bestMatch: {
|
|
443
|
+
index: number;
|
|
444
|
+
windowSize: number;
|
|
445
|
+
similarity: number;
|
|
446
|
+
} | null = null;
|
|
447
|
+
|
|
448
|
+
for (const windowSize of windowSizes) {
|
|
449
|
+
for (let i = 0; i <= normalizedSource.length - windowSize; i++) {
|
|
450
|
+
const window = normalizedSource.slice(i, i + windowSize);
|
|
451
|
+
const similarity = this.calculateLevenshteinSimilarity(normalizedQuote, window);
|
|
452
|
+
|
|
453
|
+
if (similarity >= this.optionsV2.fuzzyThreshold) {
|
|
454
|
+
if (!bestMatch || similarity > bestMatch.similarity) {
|
|
455
|
+
bestMatch = { index: i, windowSize, similarity };
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
if (!bestMatch) {
|
|
462
|
+
return null;
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
// Map back to original positions (approximate)
|
|
466
|
+
const originalSpan = this.mapToOriginalPositions(
|
|
467
|
+
source,
|
|
468
|
+
normalizedSource,
|
|
469
|
+
bestMatch.index,
|
|
470
|
+
bestMatch.index + bestMatch.windowSize
|
|
471
|
+
);
|
|
472
|
+
|
|
473
|
+
if (!originalSpan) {
|
|
474
|
+
return null;
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
return {
|
|
478
|
+
aligned: true,
|
|
479
|
+
evidence: {
|
|
480
|
+
messageIndex,
|
|
481
|
+
quote,
|
|
482
|
+
spanStart: originalSpan.start,
|
|
483
|
+
spanEnd: originalSpan.end,
|
|
484
|
+
quoteHash: this.hashQuote(quote),
|
|
485
|
+
confidence: bestMatch.similarity,
|
|
486
|
+
matchMethod: 'fuzzy'
|
|
487
|
+
}
|
|
488
|
+
};
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
/**
|
|
492
|
+
* Normalize whitespace only (preserve other characters)
|
|
493
|
+
*/
|
|
494
|
+
private normalizeWhitespace(text: string): string {
|
|
495
|
+
return text
|
|
496
|
+
.replace(/[\t\r]/g, ' ')
|
|
497
|
+
.replace(/\n+/g, ' ')
|
|
498
|
+
.replace(/ +/g, ' ')
|
|
499
|
+
.trim();
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
/**
|
|
503
|
+
* Map normalized positions back to original
|
|
504
|
+
*/
|
|
505
|
+
private mapToOriginalPositions(
|
|
506
|
+
original: string,
|
|
507
|
+
normalized: string,
|
|
508
|
+
normalizedStart: number,
|
|
509
|
+
normalizedEnd: number
|
|
510
|
+
): { start: number; end: number } | null {
|
|
511
|
+
// Build position map
|
|
512
|
+
const normalizedToOriginal: Map<number, number> = new Map();
|
|
513
|
+
let normalizedPos = 0;
|
|
514
|
+
|
|
515
|
+
for (let origPos = 0; origPos < original.length; origPos++) {
|
|
516
|
+
const char = original[origPos];
|
|
517
|
+
|
|
518
|
+
// Skip extra whitespace in original
|
|
519
|
+
if (/\s/.test(char)) {
|
|
520
|
+
// Check if this whitespace contributes to normalized
|
|
521
|
+
if (normalizedPos < normalized.length && /\s/.test(normalized[normalizedPos])) {
|
|
522
|
+
normalizedToOriginal.set(normalizedPos, origPos);
|
|
523
|
+
normalizedPos++;
|
|
524
|
+
|
|
525
|
+
// Skip consecutive whitespace in original
|
|
526
|
+
while (origPos + 1 < original.length && /\s/.test(original[origPos + 1])) {
|
|
527
|
+
origPos++;
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
} else {
|
|
531
|
+
normalizedToOriginal.set(normalizedPos, origPos);
|
|
532
|
+
normalizedPos++;
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
const startOrig = normalizedToOriginal.get(normalizedStart);
|
|
537
|
+
let endOrig = normalizedToOriginal.get(normalizedEnd - 1);
|
|
538
|
+
|
|
539
|
+
if (startOrig === undefined) {
|
|
540
|
+
return null;
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
if (endOrig === undefined) {
|
|
544
|
+
// Use end of string
|
|
545
|
+
endOrig = original.length - 1;
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
return {
|
|
549
|
+
start: startOrig,
|
|
550
|
+
end: endOrig + 1
|
|
551
|
+
};
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
/**
|
|
555
|
+
* Calculate Levenshtein distance similarity
|
|
556
|
+
*/
|
|
557
|
+
private calculateLevenshteinSimilarity(a: string, b: string): number {
|
|
558
|
+
const m = a.length;
|
|
559
|
+
const n = b.length;
|
|
560
|
+
|
|
561
|
+
if (m === 0) return n === 0 ? 1 : 0;
|
|
562
|
+
if (n === 0) return 0;
|
|
563
|
+
|
|
564
|
+
const dp: number[][] = Array(m + 1).fill(null).map(() => Array(n + 1).fill(0));
|
|
565
|
+
|
|
566
|
+
for (let i = 0; i <= m; i++) dp[i][0] = i;
|
|
567
|
+
for (let j = 0; j <= n; j++) dp[0][j] = j;
|
|
568
|
+
|
|
569
|
+
for (let i = 1; i <= m; i++) {
|
|
570
|
+
for (let j = 1; j <= n; j++) {
|
|
571
|
+
const cost = a[i - 1] === b[j - 1] ? 0 : 1;
|
|
572
|
+
dp[i][j] = Math.min(
|
|
573
|
+
dp[i - 1][j] + 1, // deletion
|
|
574
|
+
dp[i][j - 1] + 1, // insertion
|
|
575
|
+
dp[i - 1][j - 1] + cost // substitution
|
|
576
|
+
);
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
const distance = dp[m][n];
|
|
581
|
+
const maxLen = Math.max(m, n);
|
|
582
|
+
return 1 - distance / maxLen;
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
/**
|
|
586
|
+
* Hash quote for deduplication
|
|
587
|
+
*/
|
|
588
|
+
private hashQuote(quote: string): string {
|
|
589
|
+
return createHash('sha256').update(quote).digest('hex').slice(0, 16);
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
/**
|
|
593
|
+
* Convert V2 result to V1 format for backwards compatibility
|
|
594
|
+
*/
|
|
595
|
+
convertToV1Result(v2Result: AlignResultV2): AlignmentResult {
|
|
596
|
+
const spans: EvidenceSpan[] = [];
|
|
597
|
+
const missingClaims: string[] = [];
|
|
598
|
+
|
|
599
|
+
for (const result of v2Result.results) {
|
|
600
|
+
if (result.aligned) {
|
|
601
|
+
const ev = result.evidence as AlignedEvidence;
|
|
602
|
+
spans.push({
|
|
603
|
+
start: ev.spanStart,
|
|
604
|
+
end: ev.spanEnd,
|
|
605
|
+
confidence: ev.confidence,
|
|
606
|
+
matchType: ev.matchMethod === 'exact' ? 'exact' : 'fuzzy',
|
|
607
|
+
originalQuote: ev.quote,
|
|
608
|
+
alignedText: ev.quote
|
|
609
|
+
});
|
|
610
|
+
} else {
|
|
611
|
+
const ev = result.evidence as FailedEvidence;
|
|
612
|
+
missingClaims.push(ev.quote);
|
|
613
|
+
}
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
return {
|
|
617
|
+
isAligned: v2Result.evidenceAligned,
|
|
618
|
+
confidence: v2Result.overallConfidence,
|
|
619
|
+
spans,
|
|
620
|
+
missingClaims
|
|
621
|
+
};
|
|
622
|
+
}
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
/**
|
|
626
|
+
* Default evidence aligner instance
|
|
627
|
+
*/
|
|
628
|
+
let defaultAligner: EvidenceAligner | null = null;
|
|
629
|
+
|
|
630
|
+
export function getDefaultAligner(): EvidenceAligner {
|
|
631
|
+
if (!defaultAligner) {
|
|
632
|
+
defaultAligner = new EvidenceAligner();
|
|
633
|
+
}
|
|
634
|
+
return defaultAligner;
|
|
635
|
+
}
|