@adeu/core 1.6.8 → 1.6.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@adeu/core",
3
- "version": "1.6.8",
3
+ "version": "1.6.9",
4
4
  "description": "",
5
5
  "main": "./dist/index.js",
6
6
  "types": "./dist/index.d.ts",
@@ -0,0 +1,134 @@
1
+ import { describe, it, expect } from "vitest";
2
+ import {
3
+ readFileSync,
4
+ existsSync,
5
+ readdirSync,
6
+ writeFileSync,
7
+ unlinkSync,
8
+ } from "node:fs";
9
+ import { resolve, dirname } from "node:path";
10
+ import { fileURLToPath } from "node:url";
11
+ import { execSync } from "node:child_process";
12
+ import { tmpdir } from "node:os";
13
+
14
+ import { DocumentObject } from "./docx/bridge.js";
15
+ import { RedlineEngine } from "./engine.js";
16
+ import { extractTextFromBuffer } from "./ingest.js";
17
+
18
+ const __filename = fileURLToPath(import.meta.url);
19
+ const __dirname = dirname(__filename);
20
+
21
+ const CORPUS_DIR = resolve(
22
+ __dirname,
23
+ "../../../../shared/cross_platform_tests",
24
+ );
25
+ const PYTHON_ABSTRACT_CMD = resolve(
26
+ __dirname,
27
+ "../../../../python/scripts/abstract_xml.py",
28
+ );
29
+ const PYTHON_DIR = resolve(__dirname, "../../../../python");
30
+
31
+ function normalizeMdTimestamps(mdText: string): string {
32
+ return mdText.replace(/@ \d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z/g, "@ DATE");
33
+ }
34
+
35
+ describe("Polyglot Consistency Framework (TS vs Python)", () => {
36
+ if (!existsSync(CORPUS_DIR)) {
37
+ it.skip("Cross-platform test corpus not found", () => {});
38
+ return;
39
+ }
40
+
41
+ const testFolders = readdirSync(CORPUS_DIR, { withFileTypes: true })
42
+ .filter((dirent) => dirent.isDirectory())
43
+ .map((dirent) => dirent.name);
44
+
45
+ for (const folder of testFolders) {
46
+ const testDir = resolve(CORPUS_DIR, folder);
47
+ const testJsonPath = resolve(testDir, "test.json");
48
+ const inputDocxPath = resolve(testDir, "input.docx");
49
+
50
+ if (!existsSync(testJsonPath) || !existsSync(inputDocxPath)) {
51
+ continue;
52
+ }
53
+
54
+ const testConfig = JSON.parse(readFileSync(testJsonPath, "utf-8"));
55
+ const isReadOnly = testConfig.read_only || false;
56
+ // CRITICAL: We must inherit the author from the JSON so the XML Abstraction comparison
57
+ // doesn't fail on `w:author="Adeu AI"` vs `w:author="Adeu AI (TS)"`.
58
+ const author = testConfig.author || "Adeu AI";
59
+
60
+ describe(`Corpus Scenario: [${folder}]`, () => {
61
+ it("Strictly matches the Python Golden Masters", async () => {
62
+ const inputBuffer = readFileSync(inputDocxPath);
63
+ let outBuffer: Buffer;
64
+
65
+ // 1. Process Edits (if not read-only)
66
+ if (isReadOnly) {
67
+ outBuffer = inputBuffer;
68
+ } else {
69
+ const doc = await DocumentObject.load(inputBuffer);
70
+ const engine = new RedlineEngine(doc, author);
71
+
72
+ engine.process_batch(testConfig.changes || []);
73
+ outBuffer = await doc.save();
74
+
75
+ // 2. Assert XML Structure Parity (via Python Bridge)
76
+ const goldenXmlPath = resolve(testDir, "golden_abstract.xml");
77
+ if (existsSync(goldenXmlPath)) {
78
+ const expectedXml = readFileSync(goldenXmlPath, "utf-8");
79
+
80
+ const tmpDocx = resolve(
81
+ tmpdir(),
82
+ `adeu_test_${folder}_${Date.now()}.docx`,
83
+ );
84
+ writeFileSync(tmpDocx, outBuffer);
85
+
86
+ try {
87
+ // Pipe to Python to bypass Node vs Python XML serialization differences
88
+ const cmd = `uv run python "${PYTHON_ABSTRACT_CMD}" "${tmpDocx}"`;
89
+ const actualXml = execSync(cmd, {
90
+ cwd: PYTHON_DIR,
91
+ encoding: "utf-8",
92
+ stdio: ["pipe", "pipe", "inherit"],
93
+ env: { ...process.env, PYTHONIOENCODING: "utf-8" },
94
+ });
95
+ // Normalize line endings for reliable string comparison
96
+ const normExpected = expectedXml.replace(/\r\n/g, "\n").trim();
97
+ const normActual = actualXml.replace(/\r\n/g, "\n").trim();
98
+
99
+ expect(normActual).toBe(normExpected);
100
+ } finally {
101
+ if (existsSync(tmpDocx)) unlinkSync(tmpDocx);
102
+ }
103
+ }
104
+ }
105
+
106
+ // 3. Assert Markdown Extraction Parity (Raw View)
107
+ const rawMdPath = resolve(testDir, "golden_raw.md");
108
+ if (existsSync(rawMdPath)) {
109
+ const expectedRaw = readFileSync(rawMdPath, "utf-8").replace(
110
+ /\r\n/g,
111
+ "\n",
112
+ );
113
+ const actualRaw = normalizeMdTimestamps(
114
+ await extractTextFromBuffer(outBuffer, false),
115
+ ).replace(/\r\n/g, "\n");
116
+ expect(actualRaw).toBe(expectedRaw);
117
+ }
118
+
119
+ // 4. Assert Markdown Extraction Parity (Clean View)
120
+ const cleanMdPath = resolve(testDir, "golden_clean.md");
121
+ if (existsSync(cleanMdPath)) {
122
+ const expectedClean = readFileSync(cleanMdPath, "utf-8").replace(
123
+ /\r\n/g,
124
+ "\n",
125
+ );
126
+ const actualClean = normalizeMdTimestamps(
127
+ await extractTextFromBuffer(outBuffer, true),
128
+ ).replace(/\r\n/g, "\n");
129
+ expect(actualClean).toBe(expectedClean);
130
+ }
131
+ });
132
+ });
133
+ }
134
+ });
package/src/diff.test.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  import { describe, it, expect } from 'vitest';
2
- import { trim_common_context, generate_edits_from_text } from './diff.js';
2
+ import { trim_common_context, generate_edits_from_text, create_word_patch_diff } from './diff.js';
3
3
 
4
4
  describe('Diff Logic & Context Trimming', () => {
5
5
  it('handles basic prefix and suffix', () => {
@@ -59,4 +59,16 @@ describe('Diff Logic & Context Trimming', () => {
59
59
  expect(edit.new_text).toContain('Big');
60
60
  }
61
61
  });
62
+
63
+ it('generates a Word Patch formatted diff matching Python parity', () => {
64
+ const original = "This agreement is made between the Company and the Contractor.";
65
+ const modified = "This agreement is made between the Corporation and the Contractor.";
66
+
67
+ const diff = create_word_patch_diff(original, modified);
68
+
69
+ expect(diff).toContain("@@ Word Patch @@");
70
+ expect(diff).toContain("- Company");
71
+ expect(diff).toContain("+ Corporation");
72
+ expect(diff).toContain(" This agreement is made between the"); // Within 40-char context window so no truncation
73
+ });
62
74
  });
package/src/diff.ts CHANGED
@@ -1,7 +1,10 @@
1
- import diff_match_patch from 'diff-match-patch';
2
- import { ModifyText } from './models.js';
1
+ import diff_match_patch from "diff-match-patch";
2
+ import { ModifyText } from "./models.js";
3
3
 
4
- export function trim_common_context(target: string, new_val: string): [number, number] {
4
+ export function trim_common_context(
5
+ target: string,
6
+ new_val: string,
7
+ ): [number, number] {
5
8
  if (!target || !new_val) return [0, 0];
6
9
 
7
10
  const isSpace = (char: string) => /\s/.test(char);
@@ -16,8 +19,10 @@ export function trim_common_context(target: string, new_val: string): [number, n
16
19
  // Backtrack to nearest whitespace if we split a word
17
20
  if (prefix_len < target.length && prefix_len < new_val.length) {
18
21
  while (prefix_len > 0) {
19
- const target_split = !isSpace(target[prefix_len - 1]) && !isSpace(target[prefix_len]);
20
- const new_split = !isSpace(new_val[prefix_len - 1]) && !isSpace(new_val[prefix_len]);
22
+ const target_split =
23
+ !isSpace(target[prefix_len - 1]) && !isSpace(target[prefix_len]);
24
+ const new_split =
25
+ !isSpace(new_val[prefix_len - 1]) && !isSpace(new_val[prefix_len]);
21
26
  if (target_split || new_split) {
22
27
  prefix_len--;
23
28
  } else {
@@ -30,7 +35,7 @@ export function trim_common_context(target: string, new_val: string): [number, n
30
35
  while (prefix_len > 0) {
31
36
  if (prefix_len < target.length) {
32
37
  const charSeq = target.substring(prefix_len - 1, prefix_len + 1);
33
- if (charSeq === '**' || charSeq === '__') {
38
+ if (charSeq === "**" || charSeq === "__") {
34
39
  prefix_len--;
35
40
  continue;
36
41
  }
@@ -39,22 +44,24 @@ export function trim_common_context(target: string, new_val: string): [number, n
39
44
  const left = target.substring(0, prefix_len);
40
45
  const b_count = (left.match(/\*\*/g) || []).length;
41
46
  const u2_count = (left.match(/__/g) || []).length;
42
- const u1_count = (left.replace(/__/g, '').match(/_/g) || []).length;
47
+ const u1_count = (left.replace(/__/g, "").match(/_/g) || []).length;
43
48
 
44
49
  if (b_count % 2 !== 0) {
45
- prefix_len = left.lastIndexOf('**');
50
+ prefix_len = left.lastIndexOf("**");
46
51
  continue;
47
52
  }
48
53
  if (u2_count % 2 !== 0) {
49
- prefix_len = left.lastIndexOf('__');
54
+ prefix_len = left.lastIndexOf("__");
50
55
  continue;
51
56
  }
52
57
  if (u1_count % 2 !== 0) {
53
58
  let idx = left.length - 1;
54
59
  while (idx >= 0) {
55
- if (left[idx] === '_' &&
56
- (idx === 0 || left[idx - 1] !== '_') &&
57
- (idx === left.length - 1 || left[idx + 1] !== '_')) {
60
+ if (
61
+ left[idx] === "_" &&
62
+ (idx === 0 || left[idx - 1] !== "_") &&
63
+ (idx === left.length - 1 || left[idx + 1] !== "_")
64
+ ) {
58
65
  prefix_len = idx;
59
66
  break;
60
67
  }
@@ -68,15 +75,15 @@ export function trim_common_context(target: string, new_val: string): [number, n
68
75
  let hit_header = false;
69
76
  while (temp_len > 0) {
70
77
  const char = target[temp_len - 1];
71
- if (char === '#') {
78
+ if (char === "#") {
72
79
  prefix_len = temp_len - 1;
73
- while (prefix_len > 0 && target[prefix_len - 1] !== '\n') {
80
+ while (prefix_len > 0 && target[prefix_len - 1] !== "\n") {
74
81
  prefix_len--;
75
82
  }
76
83
  hit_header = true;
77
84
  break;
78
85
  }
79
- if (char === '\n') break;
86
+ if (char === "\n") break;
80
87
  temp_len--;
81
88
  }
82
89
  if (hit_header) continue;
@@ -90,7 +97,11 @@ export function trim_common_context(target: string, new_val: string): [number, n
90
97
  const new_rem_len = new_val.length - prefix_len;
91
98
  const limit_suffix = Math.min(target_rem_len, new_rem_len);
92
99
 
93
- while (suffix_len < limit_suffix && target[target.length - 1 - suffix_len] === new_val[new_val.length - 1 - suffix_len]) {
100
+ while (
101
+ suffix_len < limit_suffix &&
102
+ target[target.length - 1 - suffix_len] ===
103
+ new_val[new_val.length - 1 - suffix_len]
104
+ ) {
94
105
  suffix_len++;
95
106
  }
96
107
 
@@ -98,11 +109,15 @@ export function trim_common_context(target: string, new_val: string): [number, n
98
109
  while (suffix_len > 0) {
99
110
  let target_split = false;
100
111
  if (suffix_len < target.length) {
101
- target_split = !isSpace(target[target.length - 1 - suffix_len]) && !isSpace(target[target.length - suffix_len]);
112
+ target_split =
113
+ !isSpace(target[target.length - 1 - suffix_len]) &&
114
+ !isSpace(target[target.length - suffix_len]);
102
115
  }
103
116
  let new_split = false;
104
117
  if (suffix_len < new_val.length) {
105
- new_split = !isSpace(new_val[new_val.length - 1 - suffix_len]) && !isSpace(new_val[new_val.length - suffix_len]);
118
+ new_split =
119
+ !isSpace(new_val[new_val.length - 1 - suffix_len]) &&
120
+ !isSpace(new_val[new_val.length - suffix_len]);
106
121
  }
107
122
  if (target_split || new_split) {
108
123
  suffix_len--;
@@ -116,7 +131,7 @@ export function trim_common_context(target: string, new_val: string): [number, n
116
131
  const idx = target.length - suffix_len;
117
132
  if (idx > 0) {
118
133
  const charSeq = target.substring(idx - 1, idx + 1);
119
- if (charSeq === '**' || charSeq === '__') {
134
+ if (charSeq === "**" || charSeq === "__") {
120
135
  suffix_len--;
121
136
  continue;
122
137
  }
@@ -125,22 +140,24 @@ export function trim_common_context(target: string, new_val: string): [number, n
125
140
  const right = target.substring(target.length - suffix_len);
126
141
  const b_count = (right.match(/\*\*/g) || []).length;
127
142
  const u2_count = (right.match(/__/g) || []).length;
128
- const u1_count = (right.replace(/__/g, '').match(/_/g) || []).length;
143
+ const u1_count = (right.replace(/__/g, "").match(/_/g) || []).length;
129
144
 
130
145
  if (b_count % 2 !== 0) {
131
- suffix_len -= right.indexOf('**') + 2;
146
+ suffix_len -= right.indexOf("**") + 2;
132
147
  continue;
133
148
  }
134
149
  if (u2_count % 2 !== 0) {
135
- suffix_len -= right.indexOf('__') + 2;
150
+ suffix_len -= right.indexOf("__") + 2;
136
151
  continue;
137
152
  }
138
153
  if (u1_count % 2 !== 0) {
139
154
  let idx_in_right = 0;
140
155
  while (idx_in_right < right.length) {
141
- if (right[idx_in_right] === '_' &&
142
- (idx_in_right === 0 || right[idx_in_right - 1] !== '_') &&
143
- (idx_in_right === right.length - 1 || right[idx_in_right + 1] !== '_')) {
156
+ if (
157
+ right[idx_in_right] === "_" &&
158
+ (idx_in_right === 0 || right[idx_in_right - 1] !== "_") &&
159
+ (idx_in_right === right.length - 1 || right[idx_in_right + 1] !== "_")
160
+ ) {
144
161
  suffix_len -= idx_in_right + 1;
145
162
  break;
146
163
  }
@@ -151,20 +168,26 @@ export function trim_common_context(target: string, new_val: string): [number, n
151
168
  break;
152
169
  }
153
170
 
154
- if (suffix_len > 0 && /^\s+$/.test(target.substring(target.length - suffix_len))) {
171
+ if (
172
+ suffix_len > 0 &&
173
+ /^\s+$/.test(target.substring(target.length - suffix_len))
174
+ ) {
155
175
  suffix_len = 0;
156
176
  }
157
177
 
158
178
  // Absorb balanced wrappers
159
- for (const marker of ['**', '__', '_']) {
179
+ for (const marker of ["**", "__", "_"]) {
160
180
  const mlen = marker.length;
161
181
  const tgt_rem = target.substring(prefix_len, target.length - suffix_len);
162
182
  const new_rem = new_val.substring(prefix_len, new_val.length - suffix_len);
163
183
 
164
184
  if (
165
- tgt_rem.startsWith(marker) && new_rem.startsWith(marker) &&
166
- tgt_rem.endsWith(marker) && new_rem.endsWith(marker) &&
167
- tgt_rem.length >= 2 * mlen && new_rem.length >= 2 * mlen
185
+ tgt_rem.startsWith(marker) &&
186
+ new_rem.startsWith(marker) &&
187
+ tgt_rem.endsWith(marker) &&
188
+ new_rem.endsWith(marker) &&
189
+ tgt_rem.length >= 2 * mlen &&
190
+ new_rem.length >= 2 * mlen
168
191
  ) {
169
192
  prefix_len += mlen;
170
193
  suffix_len += mlen;
@@ -174,17 +197,20 @@ export function trim_common_context(target: string, new_val: string): [number, n
174
197
  return [prefix_len, suffix_len];
175
198
  }
176
199
 
177
- function _words_to_chars(text1: string, text2: string): [string, string, string[]] {
200
+ function _words_to_chars(
201
+ text1: string,
202
+ text2: string,
203
+ ): [string, string, string[]] {
178
204
  const token_array: string[] = [];
179
205
  const token_hash: Record<string, number> = {};
180
-
206
+
181
207
  // RegExp equivalent to Python's r"(\s+|\w+|[^\w\s])" with unicode support
182
208
  const split_pattern = /(\s+|[\p{L}\p{N}_]+|[^\p{L}\p{N}_\s])/gu;
183
209
 
184
210
  const encode_text = (text: string) => {
185
211
  // Keep delimiters via capture group in split
186
212
  const tokens = text.split(split_pattern).filter(Boolean);
187
- let encoded_chars = '';
213
+ let encoded_chars = "";
188
214
  for (const token of tokens) {
189
215
  if (token in token_hash) {
190
216
  encoded_chars += String.fromCharCode(token_hash[token]);
@@ -201,18 +227,26 @@ function _words_to_chars(text1: string, text2: string): [string, string, string[
201
227
  return [encode_text(text1), encode_text(text2), token_array];
202
228
  }
203
229
 
204
- export function generate_edits_from_text(original_text: string, modified_text: string): ModifyText[] {
230
+ export function generate_edits_from_text(
231
+ original_text: string,
232
+ modified_text: string,
233
+ ): ModifyText[] {
205
234
  const dmp = new diff_match_patch.diff_match_patch();
206
-
207
- const [chars1, chars2, token_array] = _words_to_chars(original_text, modified_text);
235
+ dmp.Diff_Timeout = 2.0; // Enforce strict 2-second timeout to prevent deep recursion hangs
236
+
237
+ const [chars1, chars2, token_array] = _words_to_chars(
238
+ original_text,
239
+ modified_text,
240
+ );
208
241
  const diffs = dmp.diff_main(chars1, chars2, false);
209
242
  dmp.diff_cleanupSemantic(diffs);
210
-
243
+
211
244
  // Manually map characters back to words to bypass prototype volatility (diff_charsToLines_)
212
245
  for (let i = 0; i < diffs.length; i++) {
213
246
  const chars = diffs[i][1];
214
- let text = '';
215
- for (let j = 0; j < chars.length; j++) text += token_array[chars.charCodeAt(j)];
247
+ let text = "";
248
+ for (let j = 0; j < chars.length; j++)
249
+ text += token_array[chars.charCodeAt(j)];
216
250
  diffs[i][1] = text;
217
251
  }
218
252
 
@@ -221,85 +255,170 @@ export function generate_edits_from_text(original_text: string, modified_text: s
221
255
  let pending_delete: [number, string] | null = null;
222
256
 
223
257
  for (const [op, text] of diffs) {
224
- if (op === 0) { // Equal
258
+ if (op === 0) {
259
+ // Equal
225
260
  if (pending_delete) {
226
261
  const [idx, del_txt] = pending_delete;
227
- edits.push({ type: 'modify', target_text: del_txt, new_text: '', comment: 'Diff: Text deleted', _match_start_index: idx });
262
+ edits.push({
263
+ type: "modify",
264
+ target_text: del_txt,
265
+ new_text: "",
266
+ comment: "Diff: Text deleted",
267
+ _match_start_index: idx,
268
+ });
228
269
  pending_delete = null;
229
270
  }
230
271
  current_original_index += text.length;
231
- } else if (op === -1) { // Delete
272
+ } else if (op === -1) {
273
+ // Delete
232
274
  pending_delete = [current_original_index, text];
233
275
  current_original_index += text.length;
234
- } else if (op === 1) { // Insert
276
+ } else if (op === 1) {
277
+ // Insert
235
278
  if (pending_delete) {
236
279
  const [idx, del_txt] = pending_delete;
237
- edits.push({ type: 'modify', target_text: del_txt, new_text: text, comment: 'Diff: Replacement', _match_start_index: idx });
280
+ edits.push({
281
+ type: "modify",
282
+ target_text: del_txt,
283
+ new_text: text,
284
+ comment: "Diff: Replacement",
285
+ _match_start_index: idx,
286
+ });
238
287
  pending_delete = null;
239
288
  } else {
240
- edits.push({ type: 'modify', target_text: '', new_text: text, comment: 'Diff: Text inserted', _match_start_index: current_original_index });
289
+ edits.push({
290
+ type: "modify",
291
+ target_text: "",
292
+ new_text: text,
293
+ comment: "Diff: Text inserted",
294
+ _match_start_index: current_original_index,
295
+ });
241
296
  }
242
297
  }
243
298
  }
244
299
 
245
300
  if (pending_delete) {
246
301
  const [idx, del_txt] = pending_delete;
247
- edits.push({ type: 'modify', target_text: del_txt, new_text: '', comment: 'Diff: Text deleted', _match_start_index: idx });
302
+ edits.push({
303
+ type: "modify",
304
+ target_text: del_txt,
305
+ new_text: "",
306
+ comment: "Diff: Text deleted",
307
+ _match_start_index: idx,
308
+ });
248
309
  }
249
310
 
250
311
  return edits;
251
312
  }
252
-
253
- export function create_unified_diff(original_text: string, modified_text: string, context_lines: number = 3): string {
313
+ export function create_unified_diff(
314
+ original_text: string,
315
+ modified_text: string,
316
+ context_lines: number = 3,
317
+ ): string {
254
318
  const dmp = new diff_match_patch.diff_match_patch();
319
+ dmp.Diff_Timeout = 2.0;
320
+
255
321
  const a = dmp.diff_linesToChars_(original_text, modified_text);
256
322
  const diffs = dmp.diff_main(a.chars1, a.chars2, false);
257
323
  dmp.diff_charsToLines_(diffs, a.lineArray);
258
-
324
+
259
325
  const output: string[] = [];
260
- output.push('--- Original');
261
- output.push('+++ Modified');
262
-
326
+ output.push("--- Original");
327
+ output.push("+++ Modified");
328
+
263
329
  let i = 0;
264
330
  while (i < diffs.length) {
265
331
  while (i < diffs.length && diffs[i][0] === 0) i++;
266
332
  if (i >= diffs.length) break;
267
-
333
+
268
334
  let start = i;
269
335
  let preContext: string[] = [];
270
336
  if (start > 0 && diffs[start - 1][0] === 0) {
271
- const lines = diffs[start - 1][1].replace(/\n$/, '').split('\n');
337
+ const lines = diffs[start - 1][1].replace(/\n$/, "").split("\n");
272
338
  preContext = lines.slice(-context_lines);
273
339
  }
274
-
340
+
275
341
  const chunk: string[] = [];
276
- chunk.push(...preContext.map(l => ` ${l}`));
277
-
342
+ chunk.push(...preContext.map((l) => ` ${l}`));
343
+
278
344
  while (i < diffs.length) {
279
345
  const [op, text] = diffs[i];
280
- const lines = text.replace(/\n$/, '').split('\n');
281
-
346
+ const lines = text.replace(/\n$/, "").split("\n");
347
+
282
348
  if (op === 0) {
283
349
  if (lines.length > context_lines * 2) break;
284
- chunk.push(...lines.map(l => ` ${l}`));
350
+ chunk.push(...lines.map((l) => ` ${l}`));
285
351
  } else {
286
- const prefix = op === -1 ? '-' : '+';
287
- chunk.push(...lines.map(l => `${prefix}${l}`));
352
+ const prefix = op === -1 ? "-" : "+";
353
+ chunk.push(...lines.map((l) => `${prefix}${l}`));
288
354
  }
289
355
  i++;
290
356
  }
291
-
357
+
292
358
  let postContext: string[] = [];
293
359
  if (i < diffs.length && diffs[i][0] === 0) {
294
- const lines = diffs[i][1].replace(/\n$/, '').split('\n');
360
+ const lines = diffs[i][1].replace(/\n$/, "").split("\n");
295
361
  postContext = lines.slice(0, context_lines);
296
362
  }
297
- chunk.push(...postContext.map(l => ` ${l}`));
298
-
299
- output.push('@@ ... @@');
363
+ chunk.push(...postContext.map((l) => ` ${l}`));
364
+
365
+ output.push("@@ ... @@");
300
366
  output.push(...chunk);
301
367
  }
368
+
369
+ if (output.length === 2) return ""; // No changes
370
+ return output.join("\n");
371
+ }
372
+
373
+ export function create_word_patch_diff(
374
+ original_text: string,
375
+ modified_text: string,
376
+ original_path: string = "Original",
377
+ modified_path: string = "Modified"
378
+ ): string {
379
+ const edits = generate_edits_from_text(original_text, modified_text);
380
+ const output: string[] = [
381
+ `--- ${original_path}`,
382
+ `+++ ${modified_path}`,
383
+ ""
384
+ ];
302
385
 
303
- if (output.length === 2) return ''; // No changes
304
- return output.join('\n');
305
- }
386
+ const CONTEXT_SIZE = 40;
387
+
388
+ for (const edit of edits) {
389
+ const raw_start = edit._match_start_index || 0;
390
+ const raw_target = edit.target_text || "";
391
+ const raw_new = edit.new_text || "";
392
+
393
+ const [prefix_len, suffix_len] = trim_common_context(raw_target, raw_new);
394
+
395
+ const target_end_in_target = raw_target.length - suffix_len;
396
+ const new_end_in_new = raw_new.length - suffix_len;
397
+
398
+ const display_target = raw_target.substring(prefix_len, target_end_in_target);
399
+ const display_new = raw_new.substring(prefix_len, new_end_in_new);
400
+
401
+ const change_start = raw_start + prefix_len;
402
+ const change_end = change_start + display_target.length;
403
+
404
+ let pre_start = Math.max(0, change_start - CONTEXT_SIZE);
405
+ let pre_context = original_text.substring(pre_start, change_start);
406
+ if (pre_start > 0) pre_context = "..." + pre_context;
407
+
408
+ let post_end = Math.min(original_text.length, change_end + CONTEXT_SIZE);
409
+ let post_context = original_text.substring(change_end, post_end);
410
+ if (post_end < original_text.length) post_context = post_context + "...";
411
+
412
+ pre_context = pre_context.replace(/\n/g, " ").replace(/\r/g, "");
413
+ post_context = post_context.replace(/\n/g, " ").replace(/\r/g, "");
414
+
415
+ output.push("@@ Word Patch @@");
416
+ output.push(` ${pre_context}`);
417
+ if (display_target) output.push(`- ${display_target}`);
418
+ if (display_new) output.push(`+ ${display_new}`);
419
+ output.push(` ${post_context}`);
420
+ output.push("");
421
+ }
422
+
423
+ return output.join("\n");
424
+ }