@usejunior/docx-core 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -2
- package/dist/.tsbuildinfo +1 -1
- package/dist/atomizer.d.ts.map +1 -1
- package/dist/atomizer.js +16 -3
- package/dist/atomizer.js.map +1 -1
- package/dist/baselines/atomizer/documentReconstructor.d.ts.map +1 -1
- package/dist/baselines/atomizer/documentReconstructor.js +11 -5
- package/dist/baselines/atomizer/documentReconstructor.js.map +1 -1
- package/dist/baselines/atomizer/inPlaceModifier.d.ts +81 -1
- package/dist/baselines/atomizer/inPlaceModifier.d.ts.map +1 -1
- package/dist/baselines/atomizer/inPlaceModifier.js +618 -34
- package/dist/baselines/atomizer/inPlaceModifier.js.map +1 -1
- package/dist/baselines/atomizer/pipeline.d.ts +1 -1
- package/dist/baselines/atomizer/pipeline.d.ts.map +1 -1
- package/dist/baselines/atomizer/pipeline.js +70 -1
- package/dist/baselines/atomizer/pipeline.js.map +1 -1
- package/dist/baselines/atomizer/premergeRuns.d.ts.map +1 -1
- package/dist/baselines/atomizer/premergeRuns.js +3 -0
- package/dist/baselines/atomizer/premergeRuns.js.map +1 -1
- package/dist/baselines/atomizer/trackChangesAcceptorAst.d.ts.map +1 -1
- package/dist/baselines/atomizer/trackChangesAcceptorAst.js +63 -33
- package/dist/baselines/atomizer/trackChangesAcceptorAst.js.map +1 -1
- package/dist/cli/compare-two.js +4 -4
- package/dist/cli/compare-two.js.map +1 -1
- package/dist/index.d.ts +6 -6
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +7 -7
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
- package/dist/baselines/diffmatch/documentBuilder.d.ts +0 -44
- package/dist/baselines/diffmatch/documentBuilder.d.ts.map +0 -1
- package/dist/baselines/diffmatch/documentBuilder.js +0 -227
- package/dist/baselines/diffmatch/documentBuilder.js.map +0 -1
- package/dist/baselines/diffmatch/paragraphAlignment.d.ts +0 -75
- package/dist/baselines/diffmatch/paragraphAlignment.d.ts.map +0 -1
- package/dist/baselines/diffmatch/paragraphAlignment.js +0 -206
- package/dist/baselines/diffmatch/paragraphAlignment.js.map +0 -1
- package/dist/baselines/diffmatch/pipeline.d.ts +0 -33
- package/dist/baselines/diffmatch/pipeline.d.ts.map +0 -1
- package/dist/baselines/diffmatch/pipeline.js +0 -84
- package/dist/baselines/diffmatch/pipeline.js.map +0 -1
- package/dist/baselines/diffmatch/runDiff.d.ts +0 -53
- package/dist/baselines/diffmatch/runDiff.d.ts.map +0 -1
- package/dist/baselines/diffmatch/runDiff.js +0 -253
- package/dist/baselines/diffmatch/runDiff.js.map +0 -1
- package/dist/baselines/diffmatch/trackChangesRenderer.d.ts +0 -64
- package/dist/baselines/diffmatch/trackChangesRenderer.d.ts.map +0 -1
- package/dist/baselines/diffmatch/trackChangesRenderer.js +0 -178
- package/dist/baselines/diffmatch/trackChangesRenderer.js.map +0 -1
- package/dist/baselines/diffmatch/xmlParser.d.ts +0 -45
- package/dist/baselines/diffmatch/xmlParser.d.ts.map +0 -1
- package/dist/baselines/diffmatch/xmlParser.js +0 -216
- package/dist/baselines/diffmatch/xmlParser.js.map +0 -1
- package/dist/benchmark/gates.d.ts +0 -17
- package/dist/benchmark/gates.d.ts.map +0 -1
- package/dist/benchmark/gates.js +0 -260
- package/dist/benchmark/gates.js.map +0 -1
- package/dist/benchmark/metrics.d.ts +0 -15
- package/dist/benchmark/metrics.d.ts.map +0 -1
- package/dist/benchmark/metrics.js +0 -24
- package/dist/benchmark/metrics.js.map +0 -1
- package/dist/benchmark/reporter.d.ts +0 -10
- package/dist/benchmark/reporter.d.ts.map +0 -1
- package/dist/benchmark/reporter.js +0 -94
- package/dist/benchmark/reporter.js.map +0 -1
- package/dist/benchmark/runner.d.ts +0 -15
- package/dist/benchmark/runner.d.ts.map +0 -1
- package/dist/benchmark/runner.js +0 -241
- package/dist/benchmark/runner.js.map +0 -1
- package/dist/benchmark/scores.d.ts +0 -24
- package/dist/benchmark/scores.d.ts.map +0 -1
- package/dist/benchmark/scores.js +0 -103
- package/dist/benchmark/scores.js.map +0 -1
- package/dist/benchmark/types.d.ts +0 -81
- package/dist/benchmark/types.d.ts.map +0 -1
- package/dist/benchmark/types.js +0 -7
- package/dist/benchmark/types.js.map +0 -1
|
@@ -1,227 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Document builder for reconstructing DOCX with track changes.
|
|
3
|
-
*
|
|
4
|
-
* Takes alignment results and produces a new document.xml with
|
|
5
|
-
* insertions and deletions marked using OOXML track changes.
|
|
6
|
-
*/
|
|
7
|
-
import { diffRuns } from './runDiff.js';
|
|
8
|
-
import { renderTrackChanges, generateDeletedParagraph, generateInsertedParagraph, wrapInParagraph, } from './trackChangesRenderer.js';
|
|
9
|
-
import { extractSectPr } from './xmlParser.js';
|
|
10
|
-
/**
|
|
11
|
-
* Generate paragraph operations from alignment result.
|
|
12
|
-
*
|
|
13
|
-
* This converts the alignment result into a list of XML paragraph operations
|
|
14
|
-
* that can be used to build the output document.
|
|
15
|
-
*/
|
|
16
|
-
export function generateParagraphOperations(alignment, originalParagraphs, revisedParagraphs, options) {
|
|
17
|
-
const operations = [];
|
|
18
|
-
const { author, date } = options;
|
|
19
|
-
// Build lookup maps for original paragraphs by index
|
|
20
|
-
const originalByIndex = new Map();
|
|
21
|
-
for (const p of originalParagraphs) {
|
|
22
|
-
originalByIndex.set(p.originalIndex, p);
|
|
23
|
-
}
|
|
24
|
-
// Build lookup maps for revised paragraphs by index
|
|
25
|
-
const revisedByIndex = new Map();
|
|
26
|
-
for (const p of revisedParagraphs) {
|
|
27
|
-
revisedByIndex.set(p.originalIndex, p);
|
|
28
|
-
}
|
|
29
|
-
// Track which original paragraphs have been matched
|
|
30
|
-
const matchedOriginalIndices = new Set();
|
|
31
|
-
const matchedRevisedIndices = new Set();
|
|
32
|
-
// Build a map from revised index to the matching original paragraph
|
|
33
|
-
const revisedToOriginal = new Map();
|
|
34
|
-
for (const match of alignment.matched) {
|
|
35
|
-
const origPara = match.original;
|
|
36
|
-
const revPara = match.revised;
|
|
37
|
-
matchedOriginalIndices.add(origPara.originalIndex);
|
|
38
|
-
matchedRevisedIndices.add(revPara.originalIndex);
|
|
39
|
-
revisedToOriginal.set(revPara.originalIndex, origPara);
|
|
40
|
-
}
|
|
41
|
-
// Build output in revised document order
|
|
42
|
-
// For each position in the revised document, we output:
|
|
43
|
-
// 1. Any deleted paragraphs that came before the matching original
|
|
44
|
-
// 2. The matched/inserted paragraph
|
|
45
|
-
let lastOriginalIndex = -1;
|
|
46
|
-
for (let revIdx = 0; revIdx < revisedParagraphs.length; revIdx++) {
|
|
47
|
-
const revPara = revisedParagraphs[revIdx];
|
|
48
|
-
if (matchedRevisedIndices.has(revIdx)) {
|
|
49
|
-
// This is a matched paragraph
|
|
50
|
-
const origPara = revisedToOriginal.get(revIdx);
|
|
51
|
-
// First, output any deleted paragraphs between last and this
|
|
52
|
-
for (let i = lastOriginalIndex + 1; i < origPara.originalIndex; i++) {
|
|
53
|
-
if (!matchedOriginalIndices.has(i)) {
|
|
54
|
-
const deletedPara = originalByIndex.get(i);
|
|
55
|
-
if (deletedPara) {
|
|
56
|
-
operations.push({
|
|
57
|
-
type: 'deleted',
|
|
58
|
-
xml: generateDeletedParagraph(deletedPara.runs, author, date, extractPPrContent(deletedPara.pPrXml)),
|
|
59
|
-
originalIndex: i,
|
|
60
|
-
});
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
}
|
|
64
|
-
// Check if paragraphs are identical or modified
|
|
65
|
-
const similarity = alignment.matched.find(m => m.revised.originalIndex === revIdx)?.similarity ?? 0;
|
|
66
|
-
if (similarity >= 0.9999) {
|
|
67
|
-
// Unchanged paragraph - use revised content as-is
|
|
68
|
-
operations.push({
|
|
69
|
-
type: 'unchanged',
|
|
70
|
-
xml: generateParagraphXml(revPara),
|
|
71
|
-
originalIndex: origPara.originalIndex,
|
|
72
|
-
revisedIndex: revIdx,
|
|
73
|
-
});
|
|
74
|
-
}
|
|
75
|
-
else {
|
|
76
|
-
// Modified paragraph - diff the runs
|
|
77
|
-
const diffResult = diffRuns(origPara.runs, revPara.runs);
|
|
78
|
-
const trackChangesContent = renderTrackChanges(diffResult.mergedRuns, {
|
|
79
|
-
author,
|
|
80
|
-
date,
|
|
81
|
-
});
|
|
82
|
-
operations.push({
|
|
83
|
-
type: 'modified',
|
|
84
|
-
xml: wrapInParagraph(trackChangesContent, extractPPrContent(revPara.pPrXml)),
|
|
85
|
-
originalIndex: origPara.originalIndex,
|
|
86
|
-
revisedIndex: revIdx,
|
|
87
|
-
});
|
|
88
|
-
}
|
|
89
|
-
lastOriginalIndex = origPara.originalIndex;
|
|
90
|
-
}
|
|
91
|
-
else {
|
|
92
|
-
// This is an inserted paragraph
|
|
93
|
-
operations.push({
|
|
94
|
-
type: 'inserted',
|
|
95
|
-
xml: generateInsertedParagraph(revPara.runs, author, date, extractPPrContent(revPara.pPrXml)),
|
|
96
|
-
revisedIndex: revIdx,
|
|
97
|
-
});
|
|
98
|
-
}
|
|
99
|
-
}
|
|
100
|
-
// Output any remaining deleted paragraphs at the end
|
|
101
|
-
for (let i = lastOriginalIndex + 1; i < originalParagraphs.length; i++) {
|
|
102
|
-
if (!matchedOriginalIndices.has(i)) {
|
|
103
|
-
const deletedPara = originalByIndex.get(i);
|
|
104
|
-
if (deletedPara) {
|
|
105
|
-
operations.push({
|
|
106
|
-
type: 'deleted',
|
|
107
|
-
xml: generateDeletedParagraph(deletedPara.runs, author, date, extractPPrContent(deletedPara.pPrXml)),
|
|
108
|
-
originalIndex: i,
|
|
109
|
-
});
|
|
110
|
-
}
|
|
111
|
-
}
|
|
112
|
-
}
|
|
113
|
-
return operations;
|
|
114
|
-
}
|
|
115
|
-
/**
|
|
116
|
-
* Build a new document.xml with track changes applied.
|
|
117
|
-
*
|
|
118
|
-
* @param originalXml - The original document.xml content
|
|
119
|
-
* @param operations - Ordered list of paragraph operations
|
|
120
|
-
* @returns The new document.xml content
|
|
121
|
-
*/
|
|
122
|
-
export function buildDocumentWithTrackChanges(originalXml, operations) {
|
|
123
|
-
// Extract document structure
|
|
124
|
-
const { beforeBody, bodyContent, afterBody } = getDocumentParts(originalXml);
|
|
125
|
-
// Extract sectPr from body (must be preserved at end)
|
|
126
|
-
const { sectPr } = extractSectPr(bodyContent);
|
|
127
|
-
// Build new body content from operations
|
|
128
|
-
const paragraphXmls = operations.map(op => op.xml);
|
|
129
|
-
// Combine: paragraphs + sectPr (if present) + closing
|
|
130
|
-
let newBodyContent = paragraphXmls.join('\n');
|
|
131
|
-
if (sectPr) {
|
|
132
|
-
newBodyContent += '\n' + sectPr;
|
|
133
|
-
}
|
|
134
|
-
// Reconstruct document
|
|
135
|
-
return beforeBody + newBodyContent + afterBody;
|
|
136
|
-
}
|
|
137
|
-
/**
|
|
138
|
-
* Extract document parts for reconstruction.
|
|
139
|
-
*/
|
|
140
|
-
function getDocumentParts(documentXml) {
|
|
141
|
-
// Find w:body opening and closing
|
|
142
|
-
const bodyOpenMatch = documentXml.match(/<w:body[^>]*>/);
|
|
143
|
-
const bodyCloseMatch = documentXml.match(/<\/w:body>/);
|
|
144
|
-
if (!bodyOpenMatch || !bodyCloseMatch) {
|
|
145
|
-
return {
|
|
146
|
-
beforeBody: documentXml,
|
|
147
|
-
bodyContent: '',
|
|
148
|
-
afterBody: '',
|
|
149
|
-
};
|
|
150
|
-
}
|
|
151
|
-
const bodyOpenEnd = documentXml.indexOf(bodyOpenMatch[0]) + bodyOpenMatch[0].length;
|
|
152
|
-
const bodyCloseStart = documentXml.lastIndexOf('</w:body>');
|
|
153
|
-
return {
|
|
154
|
-
beforeBody: documentXml.slice(0, bodyOpenEnd),
|
|
155
|
-
bodyContent: documentXml.slice(bodyOpenEnd, bodyCloseStart),
|
|
156
|
-
afterBody: documentXml.slice(bodyCloseStart),
|
|
157
|
-
};
|
|
158
|
-
}
|
|
159
|
-
/**
|
|
160
|
-
* Generate XML for an unchanged paragraph.
|
|
161
|
-
*/
|
|
162
|
-
function generateParagraphXml(para) {
|
|
163
|
-
// Build paragraph from runs
|
|
164
|
-
const runsXml = para.runs.map(run => generateRunXml(run)).join('');
|
|
165
|
-
const pPr = para.pPrXml ? para.pPrXml : '';
|
|
166
|
-
return `<w:p>${pPr}${runsXml}</w:p>`;
|
|
167
|
-
}
|
|
168
|
-
/**
|
|
169
|
-
* Generate XML for a single run.
|
|
170
|
-
*/
|
|
171
|
-
function generateRunXml(run) {
|
|
172
|
-
const rPr = generateRunPropertiesXml(run.properties);
|
|
173
|
-
// Handle whitespace preservation
|
|
174
|
-
const needsSpace = run.text.startsWith(' ') || run.text.endsWith(' ') || run.text.includes(' ');
|
|
175
|
-
const spaceAttr = needsSpace ? ' xml:space="preserve"' : '';
|
|
176
|
-
return `<w:r>${rPr}<w:t${spaceAttr}>${escapeXml(run.text)}</w:t></w:r>`;
|
|
177
|
-
}
|
|
178
|
-
/**
|
|
179
|
-
* Generate run properties XML.
|
|
180
|
-
*/
|
|
181
|
-
function generateRunPropertiesXml(props) {
|
|
182
|
-
if (!props)
|
|
183
|
-
return '';
|
|
184
|
-
const parts = [];
|
|
185
|
-
if (props.bold)
|
|
186
|
-
parts.push('<w:b/>');
|
|
187
|
-
if (props.italic)
|
|
188
|
-
parts.push('<w:i/>');
|
|
189
|
-
if (props.underline)
|
|
190
|
-
parts.push(`<w:u w:val="${escapeXml(props.underline)}"/>`);
|
|
191
|
-
if (props.strikethrough)
|
|
192
|
-
parts.push('<w:strike/>');
|
|
193
|
-
if (props.highlight)
|
|
194
|
-
parts.push(`<w:highlight w:val="${escapeXml(props.highlight)}"/>`);
|
|
195
|
-
if (props.color)
|
|
196
|
-
parts.push(`<w:color w:val="${escapeXml(props.color)}"/>`);
|
|
197
|
-
if (props.fontSize !== undefined)
|
|
198
|
-
parts.push(`<w:sz w:val="${props.fontSize}"/>`);
|
|
199
|
-
if (props.fontFamily)
|
|
200
|
-
parts.push(`<w:rFonts w:ascii="${escapeXml(props.fontFamily)}" w:hAnsi="${escapeXml(props.fontFamily)}"/>`);
|
|
201
|
-
if (parts.length === 0)
|
|
202
|
-
return '';
|
|
203
|
-
return `<w:rPr>${parts.join('')}</w:rPr>`;
|
|
204
|
-
}
|
|
205
|
-
/**
|
|
206
|
-
* Extract inner pPr content from full pPr XML.
|
|
207
|
-
* Removes the outer <w:pPr> tags.
|
|
208
|
-
*/
|
|
209
|
-
function extractPPrContent(pPrXml) {
|
|
210
|
-
if (!pPrXml)
|
|
211
|
-
return undefined;
|
|
212
|
-
// Remove outer tags
|
|
213
|
-
const match = pPrXml.match(/<w:pPr[^>]*>([\s\S]*)<\/w:pPr>/);
|
|
214
|
-
return match ? match[1] : undefined;
|
|
215
|
-
}
|
|
216
|
-
/**
|
|
217
|
-
* Escape XML special characters.
|
|
218
|
-
*/
|
|
219
|
-
function escapeXml(text) {
|
|
220
|
-
return text
|
|
221
|
-
.replace(/&/g, '&')
|
|
222
|
-
.replace(/</g, '<')
|
|
223
|
-
.replace(/>/g, '>')
|
|
224
|
-
.replace(/"/g, '"')
|
|
225
|
-
.replace(/'/g, ''');
|
|
226
|
-
}
|
|
227
|
-
//# sourceMappingURL=documentBuilder.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"documentBuilder.js","sourceRoot":"","sources":["../../../src/baselines/diffmatch/documentBuilder.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,EAAE,QAAQ,EAAE,MAAM,cAAc,CAAC;AACxC,OAAO,EACL,kBAAkB,EAClB,wBAAwB,EACxB,yBAAyB,EACzB,eAAe,GAChB,MAAM,2BAA2B,CAAC;AACnC,OAAO,EAAE,aAAa,EAA8B,MAAM,gBAAgB,CAAC;AAwB3E;;;;;GAKG;AACH,MAAM,UAAU,2BAA2B,CACzC,SAA0B,EAC1B,kBAA2C,EAC3C,iBAA0C,EAC1C,OAAqB;IAErB,MAAM,UAAU,GAAyB,EAAE,CAAC;IAC5C,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,GAAG,OAAO,CAAC;IAEjC,qDAAqD;IACrD,MAAM,eAAe,GAAG,IAAI,GAAG,EAAiC,CAAC;IACjE,KAAK,MAAM,CAAC,IAAI,kBAAkB,EAAE,CAAC;QACnC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC;IAC1C,CAAC;IAED,oDAAoD;IACpD,MAAM,cAAc,GAAG,IAAI,GAAG,EAAiC,CAAC;IAChE,KAAK,MAAM,CAAC,IAAI,iBAAiB,EAAE,CAAC;QAClC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC;IACzC,CAAC;IAED,oDAAoD;IACpD,MAAM,sBAAsB,GAAG,IAAI,GAAG,EAAU,CAAC;IACjD,MAAM,qBAAqB,GAAG,IAAI,GAAG,EAAU,CAAC;IAEhD,oEAAoE;IACpE,MAAM,iBAAiB,GAAG,IAAI,GAAG,EAAiC,CAAC;IAEnE,KAAK,MAAM,KAAK,IAAI,SAAS,CAAC,OAAO,EAAE,CAAC;QACtC,MAAM,QAAQ,GAAG,KAAK,CAAC,QAAiC,CAAC;QACzD,MAAM,OAAO,GAAG,KAAK,CAAC,OAAgC,CAAC;QAEvD,sBAAsB,CAAC,GAAG,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QACnD,qBAAqB,CAAC,GAAG,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;QACjD,iBAAiB,CAAC,GAAG,CAAC,OAAO,CAAC,aAAa,EAAE,QAAQ,CAAC,CAAC;IACzD,CAAC;IAED,yCAAyC;IACzC,wDAAwD;IACxD,mEAAmE;IACnE,oCAAoC;IAEpC,IAAI,iBAAiB,GAAG,CAAC,CAAC,CAAC;IAE3B,KAAK,IAAI,MAAM,GAAG,CAAC,EAAE,MAAM,GAAG,iBAAiB,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,CAAC;QACjE,MAAM,OAAO,GAAG,iBAAiB,CAAC,MAAM,CAAE,CAAC;QAE3C,IAAI,qBAAqB,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;YACtC,8BAA8B;YAC9B,MAAM,QAAQ,GAAG,iBAAiB,CAAC,GAAG,CAAC,MAAM,CAAE,CAAC;YAEhD,6DAA6D;YAC7D,KAAK,IAAI,CAAC,GAAG,iBAAiB,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,aAAa,EAAE,CAAC,EAAE,EAAE,CAAC;gBACpE,IAAI,CAAC,sBAAsB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;oBACnC,MAAM,WAAW,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;oBAC3C,IAAI,WAAW,EAAE,CAAC;wBAChB,UAAU,CAAC,IAAI,CAAC;4BACd,IAAI,EAAE,SAAS;4BACf,GAAG,EAAE,wBAAwB,CAC3B,WAAW,CAAC,IAAI,EAChB,MAAM,EACN,IAAI,EACJ,iBAAiB,CAAC,WAAW,CAAC,MAAM,CAAC,CACtC;4BACD,aAAa,EAAE,CAAC;yBACjB,CAAC,CAAC;oBACL,CAAC;gBACH,CAAC;YACH,CAAC;YAED,gDAAgD;YAChD,MAAM,UAAU,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,CACvC,CAAC,CAAC,EAAE,CAAE,CAAC,CAAC,OAAiC,CAAC,aAAa,KAAK,MAAM,CACnE,EAAE,UAAU,IAAI,CAAC,CAAC;YAEnB,IAAI,UAAU,IAAI,MAAM,EAAE,CAAC;gBACzB,kDAAkD;gBAClD,UAAU,CAAC,IAAI,CAAC;oBACd,IAAI,EAAE,WAAW;oBACjB,GAAG,EAAE,oBAAoB,CAAC,OAAO,CAAC;oBAClC,aAAa,EAAE,QAAQ,CAAC,aAAa;oBACrC,YAAY,EAAE,MAAM;iBACrB,CAAC,CAAC;YACL,CAAC;iBAAM,CAAC;gBACN,qCAAqC;gBACrC,MAAM,UAAU,GAAG,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC;gBACzD,MAAM,mBAAmB,GAAG,kBAAkB,CAAC,UAAU,CAAC,UAAU,EAAE;oBACpE,MAAM;oBACN,IAAI;iBACL,CAAC,CAAC;gBAEH,UAAU,CAAC,IAAI,CAAC;oBACd,IAAI,EAAE,UAAU;oBAChB,GAAG,EAAE,eAAe,CAAC,mBAAmB,EAAE,iBAAiB,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;oBAC5E,aAAa,EAAE,QAAQ,CAAC,aAAa;oBACrC,YAAY,EAAE,MAAM;iBACrB,CAAC,CAAC;YACL,CAAC;YAED,iBAAiB,GAAG,QAAQ,CAAC,aAAa,CAAC;QAC7C,CAAC;aAAM,CAAC;YACN,gCAAgC;YAChC,UAAU,CAAC,IAAI,CAAC;gBACd,IAAI,EAAE,UAAU;gBAChB,GAAG,EAAE,yBAAyB,CAC5B,OAAO,CAAC,IAAI,EACZ,MAAM,EACN,IAAI,EACJ,iBAAiB,CAAC,OAAO,CAAC,MAAM,CAAC,CAClC;gBACD,YAAY,EAAE,MAAM;aACrB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,qDAAqD;IACrD,KAAK,IAAI,CAAC,GAAG,iBAAiB,GAAG,CAAC,EAAE,CAAC,GAAG,kBAAkB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvE,IAAI,CAAC,sBAAsB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;YACnC,MAAM,WAAW,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YAC3C,IAAI,WAAW,EAAE,CAAC;gBAChB,UAAU,CAAC,IAAI,CAAC;oBACd,IAAI,EAAE,SAAS;oBACf,GAAG,EAAE,wBAAwB,CAC3B,WAAW,CAAC,IAAI,EAChB,MAAM,EACN,IAAI,EACJ,iBAAiB,CAAC,WAAW,CAAC,MAAM,CAAC,CACtC;oBACD,aAAa,EAAE,CAAC;iBACjB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,6BAA6B,CAC3C,WAAmB,EACnB,UAAgC;IAEhC,6BAA6B;IAC7B,MAAM,EAAE,UAAU,EAAE,WAAW,EAAE,SAAS,EAAE,GAAG,gBAAgB,CAAC,WAAW,CAAC,CAAC;IAE7E,sDAAsD;IACtD,MAAM,EAAE,MAAM,EAAE,GAAG,aAAa,CAAC,WAAW,CAAC,CAAC;IAE9C,yCAAyC;IACzC,MAAM,aAAa,GAAG,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC;IAEnD,sDAAsD;IACtD,IAAI,cAAc,GAAG,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC9C,IAAI,MAAM,EAAE,CAAC;QACX,cAAc,IAAI,IAAI,GAAG,MAAM,CAAC;IAClC,CAAC;IAED,uBAAuB;IACvB,OAAO,UAAU,GAAG,cAAc,GAAG,SAAS,CAAC;AACjD,CAAC;AAED;;GAEG;AACH,SAAS,gBAAgB,CAAC,WAAmB;IAK3C,kCAAkC;IAClC,MAAM,aAAa,GAAG,WAAW,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC;IACzD,MAAM,cAAc,GAAG,WAAW,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;IAEvD,IAAI,CAAC,aAAa,IAAI,CAAC,cAAc,EAAE,CAAC;QACtC,OAAO;YACL,UAAU,EAAE,WAAW;YACvB,WAAW,EAAE,EAAE;YACf,SAAS,EAAE,EAAE;SACd,CAAC;IACJ,CAAC;IAED,MAAM,WAAW,GAAG,WAAW,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;IACpF,MAAM,cAAc,GAAG,WAAW,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC;IAE5D,OAAO;QACL,UAAU,EAAE,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,WAAW,CAAC;QAC7C,WAAW,EAAE,WAAW,CAAC,KAAK,CAAC,WAAW,EAAE,cAAc,CAAC;QAC3D,SAAS,EAAE,WAAW,CAAC,KAAK,CAAC,cAAc,CAAC;KAC7C,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,oBAAoB,CAAC,IAA2B;IACvD,4BAA4B;IAC5B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACnE,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;IAE3C,OAAO,QAAQ,GAAG,GAAG,OAAO,QAAQ,CAAC;AACvC,CAAC;AAED;;GAEG;AACH,SAAS,cAAc,CAAC,GAAY;IAClC,MAAM,GAAG,GAAG,wBAAwB,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC;IAErD,iCAAiC;IACjC,MAAM,UAAU,GAAG,GAAG,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,IAAI,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;IACjG,MAAM,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,uBAAuB,CAAC,CAAC,CAAC,EAAE,CAAC;IAE5D,OAAO,QAAQ,GAAG,OAAO,SAAS,IAAI,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,cAAc,CAAC;AAC1E,CAAC;AAED;;GAEG;AACH,SAAS,wBAAwB,CAAC,KAA2D;IAC3F,IAAI,CAAC,KAAK;QAAE,OAAO,EAAE,CAAC;IAEtB,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,IAAI,KAAK,CAAC,IAAI;QAAE,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACrC,IAAI,KAAK,CAAC,MAAM;QAAE,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACvC,IAAI,KAAK,CAAC,SAAS;QAAE,KAAK,CAAC,IAAI,CAAC,eAAe,SAAS,CAAC,KAAK,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;IAChF,IAAI,KAAK,CAAC,aAAa;QAAE,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;IACnD,IAAI,KAAK,CAAC,SAAS;QAAE,KAAK,CAAC,IAAI,CAAC,uBAAuB,SAAS,CAAC,KAAK,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;IACxF,IAAI,KAAK,CAAC,KAAK;QAAE,KAAK,CAAC,IAAI,CAAC,mBAAmB,SAAS,CAAC,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAC5E,IAAI,KAAK,CAAC,QAAQ,KAAK,SAAS;QAAE,KAAK,CAAC,IAAI,CAAC,gBAAgB,KAAK,CAAC,QAAQ,KAAK,CAAC,CAAC;IAClF,IAAI,KAAK,CAAC,UAAU;QAAE,KAAK,CAAC,IAAI,CAAC,sBAAsB,SAAS,CAAC,KAAK,CAAC,UAAU,CAAC,cAAc,SAAS,CAAC,KAAK,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;IAElI,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAElC,OAAO,UAAU,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC;AAC5C,CAAC;AAED;;;GAGG;AACH,SAAS,iBAAiB,CAAC,MAAe;IACxC,IAAI,CAAC,MAAM;QAAE,OAAO,SAAS,CAAC;IAE9B,oBAAoB;IACpB,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;IAC7D,OAAO,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;AACtC,CAAC;AAED;;GAEG;AACH,SAAS,SAAS,CAAC,IAAY;IAC7B,OAAO,IAAI;SACR,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC;SACtB,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC;SACrB,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC;SACrB,OAAO,CAAC,IAAI,EAAE,QAAQ,CAAC;SACvB,OAAO,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;AAC7B,CAAC"}
|
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Baseline B: Paragraph alignment using LCS (Longest Common Subsequence).
|
|
3
|
-
*
|
|
4
|
-
* Aligns paragraphs between original and revised documents to identify:
|
|
5
|
-
* - Matched paragraphs (same or similar content)
|
|
6
|
-
* - Inserted paragraphs (only in revised)
|
|
7
|
-
* - Deleted paragraphs (only in original)
|
|
8
|
-
* - Modified paragraphs (matched but with differences)
|
|
9
|
-
*/
|
|
10
|
-
import type { ParagraphInfo, AlignmentResult } from '../../shared/ooxml/types.js';
|
|
11
|
-
/**
|
|
12
|
-
* Compute a hash for a paragraph's normalized text.
|
|
13
|
-
*
|
|
14
|
-
* Used for fast comparison during LCS.
|
|
15
|
-
*/
|
|
16
|
-
export declare function hashParagraph(text: string): string;
|
|
17
|
-
/**
|
|
18
|
-
* Normalize paragraph text for comparison.
|
|
19
|
-
*
|
|
20
|
-
* - Trim whitespace
|
|
21
|
-
* - Collapse multiple spaces
|
|
22
|
-
* - Lowercase for case-insensitive comparison
|
|
23
|
-
*/
|
|
24
|
-
export declare function normalizeParagraphText(text: string): string;
|
|
25
|
-
/**
|
|
26
|
-
* Compute similarity between two strings using Jaccard index on words.
|
|
27
|
-
*
|
|
28
|
-
* @returns Value between 0 (completely different) and 1 (identical)
|
|
29
|
-
*/
|
|
30
|
-
export declare function computeSimilarity(a: string, b: string): number;
|
|
31
|
-
/**
|
|
32
|
-
* Compute the Longest Common Subsequence of two arrays using dynamic programming.
|
|
33
|
-
*
|
|
34
|
-
* @param a - First array
|
|
35
|
-
* @param b - Second array
|
|
36
|
-
* @param keyFn - Function to extract comparison key from elements
|
|
37
|
-
* @returns Array of [indexA, indexB] pairs representing the LCS
|
|
38
|
-
*/
|
|
39
|
-
export declare function lcs<T>(a: T[], b: T[], keyFn: (item: T) => string): Array<[number, number]>;
|
|
40
|
-
/**
|
|
41
|
-
* Align paragraphs between original and revised documents.
|
|
42
|
-
*
|
|
43
|
-
* Uses hash-based LCS to find matching paragraphs, then classifies
|
|
44
|
-
* unmatched paragraphs as inserted or deleted.
|
|
45
|
-
*
|
|
46
|
-
* @param original - Paragraphs from original document
|
|
47
|
-
* @param revised - Paragraphs from revised document
|
|
48
|
-
* @param similarityThreshold - Minimum similarity to consider a match (default: 0.5)
|
|
49
|
-
*/
|
|
50
|
-
export declare function alignParagraphs(original: ParagraphInfo[], revised: ParagraphInfo[], similarityThreshold?: number): AlignmentResult;
|
|
51
|
-
/**
|
|
52
|
-
* Classify paragraph matches into categories.
|
|
53
|
-
*/
|
|
54
|
-
export interface ParagraphClassification {
|
|
55
|
-
/** Identical paragraphs (similarity = 1.0) */
|
|
56
|
-
identical: Array<{
|
|
57
|
-
original: ParagraphInfo;
|
|
58
|
-
revised: ParagraphInfo;
|
|
59
|
-
}>;
|
|
60
|
-
/** Modified paragraphs (0 < similarity < 1.0) */
|
|
61
|
-
modified: Array<{
|
|
62
|
-
original: ParagraphInfo;
|
|
63
|
-
revised: ParagraphInfo;
|
|
64
|
-
similarity: number;
|
|
65
|
-
}>;
|
|
66
|
-
/** Deleted paragraphs */
|
|
67
|
-
deleted: ParagraphInfo[];
|
|
68
|
-
/** Inserted paragraphs */
|
|
69
|
-
inserted: ParagraphInfo[];
|
|
70
|
-
}
|
|
71
|
-
/**
|
|
72
|
-
* Classify alignment result into more granular categories.
|
|
73
|
-
*/
|
|
74
|
-
export declare function classifyAlignment(alignment: AlignmentResult): ParagraphClassification;
|
|
75
|
-
//# sourceMappingURL=paragraphAlignment.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"paragraphAlignment.d.ts","sourceRoot":"","sources":["../../../src/baselines/diffmatch/paragraphAlignment.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAGH,OAAO,KAAK,EAAE,aAAa,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAElF;;;;GAIG;AACH,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAGlD;AAED;;;;;;GAMG;AACH,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAK3D;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,GAAG,MAAM,CAe9D;AAED;;;;;;;GAOG;AACH,wBAAgB,GAAG,CAAC,CAAC,EACnB,CAAC,EAAE,CAAC,EAAE,EACN,CAAC,EAAE,CAAC,EAAE,EACN,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC,KAAK,MAAM,GACzB,KAAK,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAqCzB;AAED;;;;;;;;;GASG;AACH,wBAAgB,eAAe,CAC7B,QAAQ,EAAE,aAAa,EAAE,EACzB,OAAO,EAAE,aAAa,EAAE,EACxB,mBAAmB,SAAM,GACxB,eAAe,CA6FjB;AAED;;GAEG;AACH,MAAM,WAAW,uBAAuB;IACtC,8CAA8C;IAC9C,SAAS,EAAE,KAAK,CAAC;QAAE,QAAQ,EAAE,aAAa,CAAC;QAAC,OAAO,EAAE,aAAa,CAAA;KAAE,CAAC,CAAC;IACtE,iDAAiD;IACjD,QAAQ,EAAE,KAAK,CAAC;QACd,QAAQ,EAAE,aAAa,CAAC;QACxB,OAAO,EAAE,aAAa,CAAC;QACvB,UAAU,EAAE,MAAM,CAAC;KACpB,CAAC,CAAC;IACH,yBAAyB;IACzB,OAAO,EAAE,aAAa,EAAE,CAAC;IACzB,0BAA0B;IAC1B,QAAQ,EAAE,aAAa,EAAE,CAAC;CAC3B;AAED;;GAEG;AACH,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,eAAe,GAAG,uBAAuB,CAoBrF"}
|
|
@@ -1,206 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Baseline B: Paragraph alignment using LCS (Longest Common Subsequence).
|
|
3
|
-
*
|
|
4
|
-
* Aligns paragraphs between original and revised documents to identify:
|
|
5
|
-
* - Matched paragraphs (same or similar content)
|
|
6
|
-
* - Inserted paragraphs (only in revised)
|
|
7
|
-
* - Deleted paragraphs (only in original)
|
|
8
|
-
* - Modified paragraphs (matched but with differences)
|
|
9
|
-
*/
|
|
10
|
-
import { createHash } from 'crypto';
|
|
11
|
-
/**
|
|
12
|
-
* Compute a hash for a paragraph's normalized text.
|
|
13
|
-
*
|
|
14
|
-
* Used for fast comparison during LCS.
|
|
15
|
-
*/
|
|
16
|
-
export function hashParagraph(text) {
|
|
17
|
-
const normalized = normalizeParagraphText(text);
|
|
18
|
-
return createHash('sha1').update(normalized).digest('hex');
|
|
19
|
-
}
|
|
20
|
-
/**
|
|
21
|
-
* Normalize paragraph text for comparison.
|
|
22
|
-
*
|
|
23
|
-
* - Trim whitespace
|
|
24
|
-
* - Collapse multiple spaces
|
|
25
|
-
* - Lowercase for case-insensitive comparison
|
|
26
|
-
*/
|
|
27
|
-
export function normalizeParagraphText(text) {
|
|
28
|
-
return text
|
|
29
|
-
.trim()
|
|
30
|
-
.replace(/\s+/g, ' ')
|
|
31
|
-
.toLowerCase();
|
|
32
|
-
}
|
|
33
|
-
/**
|
|
34
|
-
* Compute similarity between two strings using Jaccard index on words.
|
|
35
|
-
*
|
|
36
|
-
* @returns Value between 0 (completely different) and 1 (identical)
|
|
37
|
-
*/
|
|
38
|
-
export function computeSimilarity(a, b) {
|
|
39
|
-
const wordsA = new Set(normalizeParagraphText(a).split(' ').filter(w => w.length > 0));
|
|
40
|
-
const wordsB = new Set(normalizeParagraphText(b).split(' ').filter(w => w.length > 0));
|
|
41
|
-
if (wordsA.size === 0 && wordsB.size === 0) {
|
|
42
|
-
return 1; // Both empty
|
|
43
|
-
}
|
|
44
|
-
if (wordsA.size === 0 || wordsB.size === 0) {
|
|
45
|
-
return 0; // One empty
|
|
46
|
-
}
|
|
47
|
-
const intersection = new Set([...wordsA].filter(x => wordsB.has(x)));
|
|
48
|
-
const union = new Set([...wordsA, ...wordsB]);
|
|
49
|
-
return intersection.size / union.size;
|
|
50
|
-
}
|
|
51
|
-
/**
|
|
52
|
-
* Compute the Longest Common Subsequence of two arrays using dynamic programming.
|
|
53
|
-
*
|
|
54
|
-
* @param a - First array
|
|
55
|
-
* @param b - Second array
|
|
56
|
-
* @param keyFn - Function to extract comparison key from elements
|
|
57
|
-
* @returns Array of [indexA, indexB] pairs representing the LCS
|
|
58
|
-
*/
|
|
59
|
-
export function lcs(a, b, keyFn) {
|
|
60
|
-
const m = a.length;
|
|
61
|
-
const n = b.length;
|
|
62
|
-
// Build DP table
|
|
63
|
-
const dp = Array.from({ length: m + 1 }, () => Array.from({ length: n + 1 }, () => 0));
|
|
64
|
-
for (let i = 1; i <= m; i++) {
|
|
65
|
-
for (let j = 1; j <= n; j++) {
|
|
66
|
-
if (keyFn(a[i - 1]) === keyFn(b[j - 1])) {
|
|
67
|
-
dp[i][j] = dp[i - 1][j - 1] + 1;
|
|
68
|
-
}
|
|
69
|
-
else {
|
|
70
|
-
dp[i][j] = Math.max(dp[i - 1][j], dp[i][j - 1]);
|
|
71
|
-
}
|
|
72
|
-
}
|
|
73
|
-
}
|
|
74
|
-
// Backtrack to find LCS
|
|
75
|
-
const result = [];
|
|
76
|
-
let i = m;
|
|
77
|
-
let j = n;
|
|
78
|
-
while (i > 0 && j > 0) {
|
|
79
|
-
if (keyFn(a[i - 1]) === keyFn(b[j - 1])) {
|
|
80
|
-
result.push([i - 1, j - 1]);
|
|
81
|
-
i--;
|
|
82
|
-
j--;
|
|
83
|
-
}
|
|
84
|
-
else if (dp[i - 1][j] > dp[i][j - 1]) {
|
|
85
|
-
i--;
|
|
86
|
-
}
|
|
87
|
-
else {
|
|
88
|
-
j--;
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
return result.reverse();
|
|
92
|
-
}
|
|
93
|
-
/**
|
|
94
|
-
* Align paragraphs between original and revised documents.
|
|
95
|
-
*
|
|
96
|
-
* Uses hash-based LCS to find matching paragraphs, then classifies
|
|
97
|
-
* unmatched paragraphs as inserted or deleted.
|
|
98
|
-
*
|
|
99
|
-
* @param original - Paragraphs from original document
|
|
100
|
-
* @param revised - Paragraphs from revised document
|
|
101
|
-
* @param similarityThreshold - Minimum similarity to consider a match (default: 0.5)
|
|
102
|
-
*/
|
|
103
|
-
export function alignParagraphs(original, revised, similarityThreshold = 0.5) {
|
|
104
|
-
// Compute hashes for all paragraphs
|
|
105
|
-
const originalHashes = original.map(p => ({
|
|
106
|
-
para: p,
|
|
107
|
-
hash: hashParagraph(p.text),
|
|
108
|
-
}));
|
|
109
|
-
const revisedHashes = revised.map(p => ({
|
|
110
|
-
para: p,
|
|
111
|
-
hash: hashParagraph(p.text),
|
|
112
|
-
}));
|
|
113
|
-
// Find LCS based on hashes
|
|
114
|
-
const lcsResult = lcs(originalHashes, revisedHashes, item => item.hash);
|
|
115
|
-
// Build sets of matched indices
|
|
116
|
-
const matchedOriginal = new Set(lcsResult.map(([i]) => i));
|
|
117
|
-
const matchedRevised = new Set(lcsResult.map(([, j]) => j));
|
|
118
|
-
// Build alignment result
|
|
119
|
-
const result = {
|
|
120
|
-
matched: [],
|
|
121
|
-
deleted: [],
|
|
122
|
-
inserted: [],
|
|
123
|
-
};
|
|
124
|
-
// Add matched paragraphs
|
|
125
|
-
for (const [origIdx, revIdx] of lcsResult) {
|
|
126
|
-
const origPara = original[origIdx];
|
|
127
|
-
const revPara = revised[revIdx];
|
|
128
|
-
const similarity = computeSimilarity(origPara.text, revPara.text);
|
|
129
|
-
result.matched.push({
|
|
130
|
-
original: origPara,
|
|
131
|
-
revised: revPara,
|
|
132
|
-
similarity,
|
|
133
|
-
});
|
|
134
|
-
}
|
|
135
|
-
// Add deleted paragraphs (in original but not matched)
|
|
136
|
-
for (let i = 0; i < original.length; i++) {
|
|
137
|
-
if (!matchedOriginal.has(i)) {
|
|
138
|
-
result.deleted.push(original[i]);
|
|
139
|
-
}
|
|
140
|
-
}
|
|
141
|
-
// Add inserted paragraphs (in revised but not matched)
|
|
142
|
-
for (let j = 0; j < revised.length; j++) {
|
|
143
|
-
if (!matchedRevised.has(j)) {
|
|
144
|
-
result.inserted.push(revised[j]);
|
|
145
|
-
}
|
|
146
|
-
}
|
|
147
|
-
// Try to find near-matches for unmatched paragraphs
|
|
148
|
-
// This helps with paragraphs that were modified significantly
|
|
149
|
-
const unmatchedOriginal = [...result.deleted];
|
|
150
|
-
const unmatchedRevised = [...result.inserted];
|
|
151
|
-
result.deleted = [];
|
|
152
|
-
result.inserted = [];
|
|
153
|
-
for (const origPara of unmatchedOriginal) {
|
|
154
|
-
let bestMatch = null;
|
|
155
|
-
for (const revPara of unmatchedRevised) {
|
|
156
|
-
const similarity = computeSimilarity(origPara.text, revPara.text);
|
|
157
|
-
if (similarity >= similarityThreshold) {
|
|
158
|
-
if (!bestMatch || similarity > bestMatch.similarity) {
|
|
159
|
-
bestMatch = { para: revPara, similarity };
|
|
160
|
-
}
|
|
161
|
-
}
|
|
162
|
-
}
|
|
163
|
-
if (bestMatch) {
|
|
164
|
-
// Remove from unmatched revised
|
|
165
|
-
const idx = unmatchedRevised.indexOf(bestMatch.para);
|
|
166
|
-
if (idx !== -1) {
|
|
167
|
-
unmatchedRevised.splice(idx, 1);
|
|
168
|
-
}
|
|
169
|
-
result.matched.push({
|
|
170
|
-
original: origPara,
|
|
171
|
-
revised: bestMatch.para,
|
|
172
|
-
similarity: bestMatch.similarity,
|
|
173
|
-
});
|
|
174
|
-
}
|
|
175
|
-
else {
|
|
176
|
-
result.deleted.push(origPara);
|
|
177
|
-
}
|
|
178
|
-
}
|
|
179
|
-
// Remaining unmatched revised are insertions
|
|
180
|
-
result.inserted = unmatchedRevised;
|
|
181
|
-
return result;
|
|
182
|
-
}
|
|
183
|
-
/**
|
|
184
|
-
* Classify alignment result into more granular categories.
|
|
185
|
-
*/
|
|
186
|
-
export function classifyAlignment(alignment) {
|
|
187
|
-
const result = {
|
|
188
|
-
identical: [],
|
|
189
|
-
modified: [],
|
|
190
|
-
deleted: alignment.deleted,
|
|
191
|
-
inserted: alignment.inserted,
|
|
192
|
-
};
|
|
193
|
-
for (const match of alignment.matched) {
|
|
194
|
-
if (match.similarity >= 0.9999) {
|
|
195
|
-
result.identical.push({
|
|
196
|
-
original: match.original,
|
|
197
|
-
revised: match.revised,
|
|
198
|
-
});
|
|
199
|
-
}
|
|
200
|
-
else {
|
|
201
|
-
result.modified.push(match);
|
|
202
|
-
}
|
|
203
|
-
}
|
|
204
|
-
return result;
|
|
205
|
-
}
|
|
206
|
-
//# sourceMappingURL=paragraphAlignment.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"paragraphAlignment.js","sourceRoot":"","sources":["../../../src/baselines/diffmatch/paragraphAlignment.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAGpC;;;;GAIG;AACH,MAAM,UAAU,aAAa,CAAC,IAAY;IACxC,MAAM,UAAU,GAAG,sBAAsB,CAAC,IAAI,CAAC,CAAC;IAChD,OAAO,UAAU,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AAC7D,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,sBAAsB,CAAC,IAAY;IACjD,OAAO,IAAI;SACR,IAAI,EAAE;SACN,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,WAAW,EAAE,CAAC;AACnB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,iBAAiB,CAAC,CAAS,EAAE,CAAS;IACpD,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;IACvF,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,sBAAsB,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;IAEvF,IAAI,MAAM,CAAC,IAAI,KAAK,CAAC,IAAI,MAAM,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;QAC3C,OAAO,CAAC,CAAC,CAAC,aAAa;IACzB,CAAC;IACD,IAAI,MAAM,CAAC,IAAI,KAAK,CAAC,IAAI,MAAM,CAAC,IAAI,KAAK,CAAC,EAAE,CAAC;QAC3C,OAAO,CAAC,CAAC,CAAC,YAAY;IACxB,CAAC;IAED,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACrE,MAAM,KAAK,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,MAAM,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC;IAE9C,OAAO,YAAY,CAAC,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC;AACxC,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,GAAG,CACjB,CAAM,EACN,CAAM,EACN,KAA0B;IAE1B,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC;IACnB,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC;IAEnB,iBAAiB;IACjB,MAAM,EAAE,GAAe,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,GAAG,CAAC,EAAE,EAAE,GAAG,EAAE,CACxD,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC,GAAG,CAAC,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC,CACvC,CAAC;IAEF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC5B,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC,EAAE,CAAC;gBAC1C,EAAE,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,GAAG,EAAE,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC,CAAC,GAAG,CAAC,CAAE,GAAG,CAAC,CAAC;YACrC,CAAC;iBAAM,CAAC;gBACN,EAAE,CAAC,CAAC,CAAE,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC,CAAC,CAAE,EAAE,EAAE,CAAC,CAAC,CAAE,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC,CAAC;YACvD,CAAC;QACH,CAAC;IACH,CAAC;IAED,wBAAwB;IACxB,MAAM,MAAM,GAA4B,EAAE,CAAC;IAC3C,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,IAAI,CAAC,GAAG,CAAC,CAAC;IAEV,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;QACtB,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC,KAAK,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC,EAAE,CAAC;YAC1C,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YAC5B,CAAC,EAAE,CAAC;YACJ,CAAC,EAAE,CAAC;QACN,CAAC;aAAM,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAE,CAAC,CAAC,CAAE,GAAG,EAAE,CAAC,CAAC,CAAE,CAAC,CAAC,GAAG,CAAC,CAAE,EAAE,CAAC;YAC3C,CAAC,EAAE,CAAC;QACN,CAAC;aAAM,CAAC;YACN,CAAC,EAAE,CAAC;QACN,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC,OAAO,EAAE,CAAC;AAC1B,CAAC;AAED;;;;;;;;;GASG;AACH,MAAM,UAAU,eAAe,CAC7B,QAAyB,EACzB,OAAwB,EACxB,mBAAmB,GAAG,GAAG;IAEzB,oCAAoC;IACpC,MAAM,cAAc,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACxC,IAAI,EAAE,CAAC;QACP,IAAI,EAAE,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC;KAC5B,CAAC,CAAC,CAAC;IACJ,MAAM,aAAa,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QACtC,IAAI,EAAE,CAAC;QACP,IAAI,EAAE,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC;KAC5B,CAAC,CAAC,CAAC;IAEJ,2BAA2B;IAC3B,MAAM,SAAS,GAAG,GAAG,CAAC,cAAc,EAAE,aAAa,EAAE,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAExE,gCAAgC;IAChC,MAAM,eAAe,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAC3D,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;IAE5D,yBAAyB;IACzB,MAAM,MAAM,GAAoB;QAC9B,OAAO,EAAE,EAAE;QACX,OAAO,EAAE,EAAE;QACX,QAAQ,EAAE,EAAE;KACb,CAAC;IAEF,yBAAyB;IACzB,KAAK,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,IAAI,SAAS,EAAE,CAAC;QAC1C,MAAM,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAE,CAAC;QACpC,MAAM,OAAO,GAAG,OAAO,CAAC,MAAM,CAAE,CAAC;QACjC,MAAM,UAAU,GAAG,iBAAiB,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC;QAElE,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC;YAClB,QAAQ,EAAE,QAAQ;YAClB,OAAO,EAAE,OAAO;YAChB,UAAU;SACX,CAAC,CAAC;IACL,CAAC;IAED,uDAAuD;IACvD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACzC,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;YAC5B,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAE,CAAC,CAAC;QACpC,CAAC;IACH,CAAC;IAED,uDAAuD;IACvD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACxC,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;YAC3B,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAE,CAAC,CAAC;QACpC,CAAC;IACH,CAAC;IAED,oDAAoD;IACpD,8DAA8D;IAC9D,MAAM,iBAAiB,GAAG,CAAC,GAAG,MAAM,CAAC,OAAO,CAAC,CAAC;IAC9C,MAAM,gBAAgB,GAAG,CAAC,GAAG,MAAM,CAAC,QAAQ,CAAC,CAAC;IAE9C,MAAM,CAAC,OAAO,GAAG,EAAE,CAAC;IACpB,MAAM,CAAC,QAAQ,GAAG,EAAE,CAAC;IAErB,KAAK,MAAM,QAAQ,IAAI,iBAAiB,EAAE,CAAC;QACzC,IAAI,SAAS,GAAuD,IAAI,CAAC;QAEzE,KAAK,MAAM,OAAO,IAAI,gBAAgB,EAAE,CAAC;YACvC,MAAM,UAAU,GAAG,iBAAiB,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC;YAClE,IAAI,UAAU,IAAI,mBAAmB,EAAE,CAAC;gBACtC,IAAI,CAAC,SAAS,IAAI,UAAU,GAAG,SAAS,CAAC,UAAU,EAAE,CAAC;oBACpD,SAAS,GAAG,EAAE,IAAI,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC;gBAC5C,CAAC;YACH,CAAC;QACH,CAAC;QAED,IAAI,SAAS,EAAE,CAAC;YACd,gCAAgC;YAChC,MAAM,GAAG,GAAG,gBAAgB,CAAC,OAAO,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;YACrD,IAAI,GAAG,KAAK,CAAC,CAAC,EAAE,CAAC;gBACf,gBAAgB,CAAC,MAAM,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;YAClC,CAAC;YAED,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC;gBAClB,QAAQ,EAAE,QAAQ;gBAClB,OAAO,EAAE,SAAS,CAAC,IAAI;gBACvB,UAAU,EAAE,SAAS,CAAC,UAAU;aACjC,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAChC,CAAC;IACH,CAAC;IAED,6CAA6C;IAC7C,MAAM,CAAC,QAAQ,GAAG,gBAAgB,CAAC;IAEnC,OAAO,MAAM,CAAC;AAChB,CAAC;AAoBD;;GAEG;AACH,MAAM,UAAU,iBAAiB,CAAC,SAA0B;IAC1D,MAAM,MAAM,GAA4B;QACtC,SAAS,EAAE,EAAE;QACb,QAAQ,EAAE,EAAE;QACZ,OAAO,EAAE,SAAS,CAAC,OAAO;QAC1B,QAAQ,EAAE,SAAS,CAAC,QAAQ;KAC7B,CAAC;IAEF,KAAK,MAAM,KAAK,IAAI,SAAS,CAAC,OAAO,EAAE,CAAC;QACtC,IAAI,KAAK,CAAC,UAAU,IAAI,MAAM,EAAE,CAAC;YAC/B,MAAM,CAAC,SAAS,CAAC,IAAI,CAAC;gBACpB,QAAQ,EAAE,KAAK,CAAC,QAAQ;gBACxB,OAAO,EAAE,KAAK,CAAC,OAAO;aACvB,CAAC,CAAC;QACL,CAAC;aAAM,CAAC;YACN,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC9B,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Baseline B: Pure TypeScript document comparison pipeline.
|
|
3
|
-
*
|
|
4
|
-
* Orchestrates the comparison flow:
|
|
5
|
-
* 1. Load DOCX archives
|
|
6
|
-
* 2. Extract paragraphs
|
|
7
|
-
* 3. Align paragraphs (LCS + similarity)
|
|
8
|
-
* 4. Diff runs within matched paragraphs
|
|
9
|
-
* 5. Render track changes
|
|
10
|
-
* 6. Build output document
|
|
11
|
-
*/
|
|
12
|
-
import type { CompareResult } from '../../index.js';
|
|
13
|
-
/**
|
|
14
|
-
* Options for Baseline B comparison.
|
|
15
|
-
*/
|
|
16
|
-
export interface BaselineBOptions {
|
|
17
|
-
/** Author name for track changes. Default: "Comparison" */
|
|
18
|
-
author?: string;
|
|
19
|
-
/** Timestamp for track changes. Default: current time */
|
|
20
|
-
date?: Date;
|
|
21
|
-
/** Minimum similarity to consider paragraphs matched. Default: 0.5 */
|
|
22
|
-
similarityThreshold?: number;
|
|
23
|
-
}
|
|
24
|
-
/**
|
|
25
|
-
* Compare two DOCX documents using the Baseline B (pure TypeScript) approach.
|
|
26
|
-
*
|
|
27
|
-
* @param original - Original document as Buffer
|
|
28
|
-
* @param revised - Revised document as Buffer
|
|
29
|
-
* @param options - Comparison options
|
|
30
|
-
* @returns Comparison result with track changes document
|
|
31
|
-
*/
|
|
32
|
-
export declare function compareDocumentsBaselineB(original: Buffer, revised: Buffer, options?: BaselineBOptions): Promise<CompareResult>;
|
|
33
|
-
//# sourceMappingURL=pipeline.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../../src/baselines/diffmatch/pipeline.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAGH,OAAO,KAAK,EAAE,aAAa,EAAgB,MAAM,gBAAgB,CAAC;AAWlE;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,2DAA2D;IAC3D,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,yDAAyD;IACzD,IAAI,CAAC,EAAE,IAAI,CAAC;IACZ,sEAAsE;IACtE,mBAAmB,CAAC,EAAE,MAAM,CAAC;CAC9B;AAED;;;;;;;GAOG;AACH,wBAAsB,yBAAyB,CAC7C,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EACf,OAAO,GAAE,gBAAqB,GAC7B,OAAO,CAAC,aAAa,CAAC,CAqDxB"}
|
|
@@ -1,84 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Baseline B: Pure TypeScript document comparison pipeline.
|
|
3
|
-
*
|
|
4
|
-
* Orchestrates the comparison flow:
|
|
5
|
-
* 1. Load DOCX archives
|
|
6
|
-
* 2. Extract paragraphs
|
|
7
|
-
* 3. Align paragraphs (LCS + similarity)
|
|
8
|
-
* 4. Diff runs within matched paragraphs
|
|
9
|
-
* 5. Render track changes
|
|
10
|
-
* 6. Build output document
|
|
11
|
-
*/
|
|
12
|
-
import { DocxArchive } from '../../shared/docx/DocxArchive.js';
|
|
13
|
-
import { extractParagraphs } from './xmlParser.js';
|
|
14
|
-
import { alignParagraphs, classifyAlignment } from './paragraphAlignment.js';
|
|
15
|
-
import { resetRevisionIds } from './trackChangesRenderer.js';
|
|
16
|
-
import { generateParagraphOperations, buildDocumentWithTrackChanges, } from './documentBuilder.js';
|
|
17
|
-
/**
|
|
18
|
-
* Compare two DOCX documents using the Baseline B (pure TypeScript) approach.
|
|
19
|
-
*
|
|
20
|
-
* @param original - Original document as Buffer
|
|
21
|
-
* @param revised - Revised document as Buffer
|
|
22
|
-
* @param options - Comparison options
|
|
23
|
-
* @returns Comparison result with track changes document
|
|
24
|
-
*/
|
|
25
|
-
export async function compareDocumentsBaselineB(original, revised, options = {}) {
|
|
26
|
-
const { author = 'Comparison', date = new Date(), similarityThreshold = 0.5, } = options;
|
|
27
|
-
// 1. Load archives
|
|
28
|
-
const originalArchive = await DocxArchive.load(original);
|
|
29
|
-
const revisedArchive = await DocxArchive.load(revised);
|
|
30
|
-
// 2. Extract document XML
|
|
31
|
-
const originalXml = await originalArchive.getDocumentXml();
|
|
32
|
-
const revisedXml = await revisedArchive.getDocumentXml();
|
|
33
|
-
// 3. Extract paragraphs from both documents
|
|
34
|
-
const originalParagraphs = extractParagraphs(originalXml);
|
|
35
|
-
const revisedParagraphs = extractParagraphs(revisedXml);
|
|
36
|
-
// 4. Reset revision ID counter for this comparison
|
|
37
|
-
resetRevisionIds();
|
|
38
|
-
// 5. Align paragraphs between documents
|
|
39
|
-
const alignment = alignParagraphs(originalParagraphs, revisedParagraphs, similarityThreshold);
|
|
40
|
-
// 6. Generate paragraph operations with track changes
|
|
41
|
-
const operations = generateParagraphOperations(alignment, originalParagraphs, revisedParagraphs, { author, date });
|
|
42
|
-
// 7. Build new document.xml with track changes
|
|
43
|
-
const newDocumentXml = buildDocumentWithTrackChanges(originalXml, operations);
|
|
44
|
-
// 8. Clone original archive and update document.xml
|
|
45
|
-
const resultArchive = await originalArchive.clone();
|
|
46
|
-
resultArchive.setDocumentXml(newDocumentXml);
|
|
47
|
-
// 9. Save result and compute stats
|
|
48
|
-
const resultBuffer = await resultArchive.save();
|
|
49
|
-
const stats = computeStats(alignment, operations);
|
|
50
|
-
return {
|
|
51
|
-
document: resultBuffer,
|
|
52
|
-
stats,
|
|
53
|
-
engine: 'diffmatch',
|
|
54
|
-
};
|
|
55
|
-
}
|
|
56
|
-
/**
|
|
57
|
-
* Compute comparison statistics from alignment and operations.
|
|
58
|
-
*/
|
|
59
|
-
function computeStats(alignment, operations) {
|
|
60
|
-
// Count operations
|
|
61
|
-
let insertions = 0;
|
|
62
|
-
let deletions = 0;
|
|
63
|
-
let modifications = 0;
|
|
64
|
-
for (const op of operations) {
|
|
65
|
-
if (op.type === 'inserted') {
|
|
66
|
-
insertions++;
|
|
67
|
-
}
|
|
68
|
-
else if (op.type === 'deleted') {
|
|
69
|
-
deletions++;
|
|
70
|
-
}
|
|
71
|
-
else if (op.type === 'modified') {
|
|
72
|
-
modifications++;
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
// Also count modifications from alignment for more accurate stats
|
|
76
|
-
const classification = classifyAlignment(alignment);
|
|
77
|
-
modifications = classification.modified.length;
|
|
78
|
-
return {
|
|
79
|
-
insertions,
|
|
80
|
-
deletions,
|
|
81
|
-
modifications,
|
|
82
|
-
};
|
|
83
|
-
}
|
|
84
|
-
//# sourceMappingURL=pipeline.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../../src/baselines/diffmatch/pipeline.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,EAAE,WAAW,EAAE,MAAM,kCAAkC,CAAC;AAG/D,OAAO,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAC;AACnD,OAAO,EAAE,eAAe,EAAE,iBAAiB,EAAE,MAAM,yBAAyB,CAAC;AAC7E,OAAO,EAAE,gBAAgB,EAAE,MAAM,2BAA2B,CAAC;AAC7D,OAAO,EACL,2BAA2B,EAC3B,6BAA6B,GAE9B,MAAM,sBAAsB,CAAC;AAc9B;;;;;;;GAOG;AACH,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAC7C,QAAgB,EAChB,OAAe,EACf,UAA4B,EAAE;IAE9B,MAAM,EACJ,MAAM,GAAG,YAAY,EACrB,IAAI,GAAG,IAAI,IAAI,EAAE,EACjB,mBAAmB,GAAG,GAAG,GAC1B,GAAG,OAAO,CAAC;IAEZ,mBAAmB;IACnB,MAAM,eAAe,GAAG,MAAM,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IACzD,MAAM,cAAc,GAAG,MAAM,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAEvD,0BAA0B;IAC1B,MAAM,WAAW,GAAG,MAAM,eAAe,CAAC,cAAc,EAAE,CAAC;IAC3D,MAAM,UAAU,GAAG,MAAM,cAAc,CAAC,cAAc,EAAE,CAAC;IAEzD,4CAA4C;IAC5C,MAAM,kBAAkB,GAAG,iBAAiB,CAAC,WAAW,CAAC,CAAC;IAC1D,MAAM,iBAAiB,GAAG,iBAAiB,CAAC,UAAU,CAAC,CAAC;IAExD,mDAAmD;IACnD,gBAAgB,EAAE,CAAC;IAEnB,wCAAwC;IACxC,MAAM,SAAS,GAAG,eAAe,CAC/B,kBAAkB,EAClB,iBAAiB,EACjB,mBAAmB,CACpB,CAAC;IAEF,sDAAsD;IACtD,MAAM,UAAU,GAAG,2BAA2B,CAC5C,SAAS,EACT,kBAAkB,EAClB,iBAAiB,EACjB,EAAE,MAAM,EAAE,IAAI,EAAE,CACjB,CAAC;IAEF,+CAA+C;IAC/C,MAAM,cAAc,GAAG,6BAA6B,CAAC,WAAW,EAAE,UAAU,CAAC,CAAC;IAE9E,oDAAoD;IACpD,MAAM,aAAa,GAAG,MAAM,eAAe,CAAC,KAAK,EAAE,CAAC;IACpD,aAAa,CAAC,cAAc,CAAC,cAAc,CAAC,CAAC;IAE7C,mCAAmC;IACnC,MAAM,YAAY,GAAG,MAAM,aAAa,CAAC,IAAI,EAAE,CAAC;IAChD,MAAM,KAAK,GAAG,YAAY,CAAC,SAAS,EAAE,UAAU,CAAC,CAAC;IAElD,OAAO;QACL,QAAQ,EAAE,YAAY;QACtB,KAAK;QACL,MAAM,EAAE,WAAW;KACpB,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,YAAY,CACnB,SAA0B,EAC1B,UAAgC;IAEhC,mBAAmB;IACnB,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,aAAa,GAAG,CAAC,CAAC;IAEtB,KAAK,MAAM,EAAE,IAAI,UAAU,EAAE,CAAC;QAC5B,IAAI,EAAE,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;YAC3B,UAAU,EAAE,CAAC;QACf,CAAC;aAAM,IAAI,EAAE,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;YACjC,SAAS,EAAE,CAAC;QACd,CAAC;aAAM,IAAI,EAAE,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;YAClC,aAAa,EAAE,CAAC;QAClB,CAAC;IACH,CAAC;IAED,kEAAkE;IAClE,MAAM,cAAc,GAAG,iBAAiB,CAAC,SAAS,CAAC,CAAC;IACpD,aAAa,GAAG,cAAc,CAAC,QAAQ,CAAC,MAAM,CAAC;IAE/C,OAAO;QACL,UAAU;QACV,SAAS;QACT,aAAa;KACd,CAAC;AACJ,CAAC"}
|