@adeu/core 1.6.8 → 1.6.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -5,7 +5,7 @@ export function identifyEngine() {
5
5
  export { DocumentObject } from './docx/bridge.js';
6
6
  export { DocumentMapper, TextSpan } from './mapper.js';
7
7
  export { RedlineEngine, BatchValidationError } from './engine.js';
8
- export { generate_edits_from_text, trim_common_context, create_unified_diff } from './diff.js';
8
+ export { generate_edits_from_text, trim_common_context, create_unified_diff, create_word_patch_diff } from './diff.js';
9
9
  export { apply_edits_to_markdown } from './markup.js';
10
10
  export { paginate, split_structural_appendix, PaginationResult, PageInfo } from './pagination.js';
11
11
  export { extract_outline, OutlineNode } from './outline.js';
package/src/markup.ts CHANGED
@@ -1,6 +1,7 @@
1
- import { trim_common_context } from './diff.js';
2
- import { ModifyText } from './models.js';
3
-
1
+ import { trim_common_context } from "./diff.js";
2
+ import { ModifyText } from "./models.js";
3
+ export const AMBIGUITY_EXAMPLES_CAP = 5;
4
+ export const AMBIGUITY_CONTEXT_CHARS = 50;
4
5
  function _should_strip_markers(text: string, marker: string): boolean {
5
6
  if (!text.startsWith(marker) || !text.endsWith(marker)) return false;
6
7
  if (text.length < marker.length * 2) return false;
@@ -11,9 +12,9 @@ function _should_strip_markers(text: string, marker: string): boolean {
11
12
  if (inner.includes(marker)) return false;
12
13
  if (!/[a-zA-Z]/.test(inner)) return false;
13
14
 
14
- if (marker === '__' && /^\w+$/.test(inner)) return false;
15
- if (marker === '_') {
16
- if (inner.includes('_')) return false;
15
+ if (marker === "__" && /^\w+$/.test(inner)) return false;
16
+ if (marker === "_") {
17
+ if (inner.includes("_")) return false;
17
18
  if (/^[0-9_]+$/.test(inner)) return false;
18
19
  }
19
20
 
@@ -21,17 +22,20 @@ function _should_strip_markers(text: string, marker: string): boolean {
21
22
  }
22
23
 
23
24
  function _strip_balanced_markers(text: string): [string, string, string] {
24
- let prefix_markup = '';
25
- let suffix_markup = '';
25
+ let prefix_markup = "";
26
+ let suffix_markup = "";
26
27
  let clean_text = text;
27
28
 
28
- const markers = ['**', '__', '_', '*'];
29
+ const markers = ["**", "__", "_", "*"];
29
30
 
30
31
  for (const marker of markers) {
31
32
  if (_should_strip_markers(clean_text, marker)) {
32
33
  prefix_markup += marker;
33
34
  suffix_markup = marker + suffix_markup;
34
- clean_text = clean_text.substring(marker.length, clean_text.length - marker.length);
35
+ clean_text = clean_text.substring(
36
+ marker.length,
37
+ clean_text.length - marker.length,
38
+ );
35
39
  break;
36
40
  }
37
41
  }
@@ -40,17 +44,27 @@ function _strip_balanced_markers(text: string): [string, string, string] {
40
44
  }
41
45
 
42
46
  export function _replace_smart_quotes(text: string): string {
43
- return text.replace(/“/g, '"').replace(/”/g, '"').replace(/‘/g, "'").replace(/’/g, "'");
47
+ return text
48
+ .replace(/“/g, '"')
49
+ .replace(/”/g, '"')
50
+ .replace(/‘/g, "'")
51
+ .replace(/’/g, "'");
44
52
  }
45
53
 
46
- function _find_safe_boundaries(text: string, start: number, end: number): [number, number] {
54
+ function _find_safe_boundaries(
55
+ text: string,
56
+ start: number,
57
+ end: number,
58
+ ): [number, number] {
47
59
  let new_start = start;
48
60
  let new_end = end;
49
61
 
50
62
  const expand_if_unbalanced = (marker: string) => {
51
63
  const current_match = text.substring(new_start, new_end);
52
- const count = (current_match.match(new RegExp(marker.replace(/\*/g, '\\*'), 'g')) || []).length;
53
-
64
+ const count = (
65
+ current_match.match(new RegExp(marker.replace(/\*/g, "\\*"), "g")) || []
66
+ ).length;
67
+
54
68
  if (count % 2 !== 0) {
55
69
  const suffix = text.substring(new_end);
56
70
  if (suffix.startsWith(marker)) {
@@ -66,22 +80,27 @@ function _find_safe_boundaries(text: string, start: number, end: number): [numbe
66
80
  };
67
81
 
68
82
  for (let i = 0; i < 2; i++) {
69
- expand_if_unbalanced('**');
70
- expand_if_unbalanced('__');
71
- expand_if_unbalanced('_');
72
- expand_if_unbalanced('*');
83
+ expand_if_unbalanced("**");
84
+ expand_if_unbalanced("__");
85
+ expand_if_unbalanced("_");
86
+ expand_if_unbalanced("*");
73
87
  }
74
88
 
75
89
  return [new_start, new_end];
76
90
  }
77
91
 
78
- function _refine_match_boundaries(text: string, start: number, end: number): [number, number] {
79
- const markers = ['**', '__', '*', '_'];
92
+ function _refine_match_boundaries(
93
+ text: string,
94
+ start: number,
95
+ end: number,
96
+ ): [number, number] {
97
+ const markers = ["**", "__", "*", "_"];
80
98
  let current_text = text.substring(start, end);
81
99
  let best_start = start;
82
100
  let best_end = end;
83
101
 
84
- const countMarker = (str: string, mk: string) => (str.match(new RegExp(mk.replace(/\*/g, '\\*'), 'g')) || []).length;
102
+ const countMarker = (str: string, mk: string) =>
103
+ (str.match(new RegExp(mk.replace(/\*/g, "\\*"), "g")) || []).length;
85
104
 
86
105
  for (const marker of markers) {
87
106
  if (current_text.startsWith(marker)) {
@@ -99,7 +118,10 @@ function _refine_match_boundaries(text: string, start: number, end: number): [nu
99
118
  for (const marker of markers) {
100
119
  if (current_text.endsWith(marker)) {
101
120
  const current_score = countMarker(current_text, marker) % 2;
102
- const trimmed_text = current_text.substring(0, current_text.length - marker.length);
121
+ const trimmed_text = current_text.substring(
122
+ 0,
123
+ current_text.length - marker.length,
124
+ );
103
125
  const trimmed_score = countMarker(trimmed_text, marker) % 2;
104
126
 
105
127
  if (current_score === 1 && trimmed_score === 0) {
@@ -117,9 +139,9 @@ export function _make_fuzzy_regex(target_text: string): string {
117
139
 
118
140
  const parts: string[] = [];
119
141
  const token_pattern = /(_+)|(\s+)|(['"])|([.,;:\/])/g;
120
-
121
- // Note: JS does not support atomic groups (?>...).
122
- // However, because we only match markdown characters * and _,
142
+
143
+ // Note: JS does not support atomic groups (?>...).
144
+ // However, because we only match markdown characters * and _,
123
145
  // we can use a character class `[*_]*` which is mathematically equivalent
124
146
  // to `(?:\*\*|__|\*|_)*` but fundamentally immune to catastrophic backtracking!
125
147
  const md_noise = "[*_]*";
@@ -131,8 +153,9 @@ export function _make_fuzzy_regex(target_text: string): string {
131
153
 
132
154
  let last_idx = 0;
133
155
  let match;
134
-
135
- const escapeRegExp = (str: string) => str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
156
+
157
+ const escapeRegExp = (str: string) =>
158
+ str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
136
159
 
137
160
  while ((match = token_pattern.exec(target_text)) !== null) {
138
161
  const literal = target_text.substring(last_idx, match.index);
@@ -147,15 +170,15 @@ export function _make_fuzzy_regex(target_text: string): string {
147
170
  const g_punct = match[4];
148
171
 
149
172
  if (g_underscore) {
150
- parts.push('_+');
173
+ parts.push("_+");
151
174
  } else if (g_space) {
152
- if (g_space.includes('\n')) {
175
+ if (g_space.includes("\n")) {
153
176
  parts.push(`(?:${structural_noise}|\\s+)+`);
154
177
  } else {
155
- parts.push('\\s+');
178
+ parts.push("\\s+");
156
179
  }
157
180
  } else if (g_quote) {
158
- if (g_quote === "'") parts.push('[\u2018\u2019\']');
181
+ if (g_quote === "'") parts.push("[\u2018\u2019']");
159
182
  else parts.push('["\u201c\u201d]');
160
183
  } else if (g_punct) {
161
184
  parts.push(escapeRegExp(g_punct));
@@ -168,10 +191,13 @@ export function _make_fuzzy_regex(target_text: string): string {
168
191
  const remaining = target_text.substring(last_idx);
169
192
  if (remaining) parts.push(escapeRegExp(remaining));
170
193
 
171
- return parts.join('');
194
+ return parts.join("");
172
195
  }
173
196
 
174
- export function _find_match_in_text(text: string, target: string): [number, number] {
197
+ export function _find_match_in_text(
198
+ text: string,
199
+ target: string,
200
+ ): [number, number] {
175
201
  if (!target) return [-1, -1];
176
202
 
177
203
  let idx = text.indexOf(target);
@@ -180,7 +206,8 @@ export function _find_match_in_text(text: string, target: string): [number, numb
180
206
  const norm_text = _replace_smart_quotes(text);
181
207
  const norm_target = _replace_smart_quotes(target);
182
208
  idx = norm_text.indexOf(norm_target);
183
- if (idx !== -1) return _find_safe_boundaries(text, idx, idx + norm_target.length);
209
+ if (idx !== -1)
210
+ return _find_safe_boundaries(text, idx, idx + norm_target.length);
184
211
 
185
212
  try {
186
213
  const pattern = new RegExp(_make_fuzzy_regex(target));
@@ -188,7 +215,11 @@ export function _find_match_in_text(text: string, target: string): [number, numb
188
215
  if (match) {
189
216
  const raw_start = match.index;
190
217
  const raw_end = match.index + match[0].length;
191
- const [refined_start, refined_end] = _refine_match_boundaries(text, raw_start, raw_end);
218
+ const [refined_start, refined_end] = _refine_match_boundaries(
219
+ text,
220
+ raw_start,
221
+ raw_end,
222
+ );
192
223
  return _find_safe_boundaries(text, refined_start, refined_end);
193
224
  }
194
225
  } catch (e) {
@@ -204,17 +235,24 @@ export function _build_critic_markup(
204
235
  comment: string | null | undefined,
205
236
  edit_index: number,
206
237
  include_index: boolean,
207
- highlight_only: boolean
238
+ highlight_only: boolean,
208
239
  ): string {
209
240
  const parts: string[] = [];
210
241
 
211
- let [prefix_markup, clean_target, suffix_markup] = _strip_balanced_markers(target_text);
242
+ let [prefix_markup, clean_target, suffix_markup] =
243
+ _strip_balanced_markers(target_text);
212
244
 
213
245
  let clean_new = new_text;
214
246
  if (prefix_markup && new_text) {
215
- if (new_text.startsWith(prefix_markup) && new_text.endsWith(suffix_markup)) {
247
+ if (
248
+ new_text.startsWith(prefix_markup) &&
249
+ new_text.endsWith(suffix_markup)
250
+ ) {
216
251
  const inner_len = prefix_markup.length;
217
- clean_new = new_text.length > inner_len * 2 ? new_text.substring(inner_len, new_text.length - inner_len) : new_text;
252
+ clean_new =
253
+ new_text.length > inner_len * 2
254
+ ? new_text.substring(inner_len, new_text.length - inner_len)
255
+ : new_text;
218
256
  }
219
257
  }
220
258
 
@@ -228,7 +266,8 @@ export function _build_critic_markup(
228
266
 
229
267
  if (has_target && !has_new) parts.push(`{--${clean_target}--}`);
230
268
  else if (!has_target && has_new) parts.push(`{++${clean_new}++}`);
231
- else if (has_target && has_new) parts.push(`{--${clean_target}--}{++${clean_new}++}`);
269
+ else if (has_target && has_new)
270
+ parts.push(`{--${clean_target}--}{++${clean_new}++}`);
232
271
  }
233
272
 
234
273
  parts.push(suffix_markup);
@@ -238,17 +277,17 @@ export function _build_critic_markup(
238
277
  if (include_index) meta_parts.push(`[Edit:${edit_index}]`);
239
278
 
240
279
  if (meta_parts.length > 0) {
241
- parts.push(`{>>${meta_parts.join(' ')}<<}`);
280
+ parts.push(`{>>${meta_parts.join(" ")}<<}`);
242
281
  }
243
282
 
244
- return parts.join('');
283
+ return parts.join("");
245
284
  }
246
285
 
247
286
  export function apply_edits_to_markdown(
248
287
  markdown_text: string,
249
288
  edits: ModifyText[],
250
289
  include_index = false,
251
- highlight_only = false
290
+ highlight_only = false,
252
291
  ): string {
253
292
  if (!edits || edits.length === 0) return markdown_text;
254
293
 
@@ -256,7 +295,7 @@ export function apply_edits_to_markdown(
256
295
 
257
296
  for (let idx = 0; idx < edits.length; idx++) {
258
297
  const edit = edits[idx];
259
- const target = edit.target_text || '';
298
+ const target = edit.target_text || "";
260
299
 
261
300
  if (!target) {
262
301
  continue;
@@ -269,7 +308,8 @@ export function apply_edits_to_markdown(
269
308
  matched_edits.push([start, end, actual_matched_text, edit, idx]);
270
309
  }
271
310
 
272
- const matched_edits_filtered: [number, number, string, ModifyText, number][] = [];
311
+ const matched_edits_filtered: [number, number, string, ModifyText, number][] =
312
+ [];
273
313
  const occupied_ranges: [number, number][] = [];
274
314
 
275
315
  matched_edits.sort((a, b) => a[4] - b[4]);
@@ -293,16 +333,26 @@ export function apply_edits_to_markdown(
293
333
 
294
334
  let result = markdown_text;
295
335
 
296
- for (const [start, end, actual_text, edit, orig_idx] of matched_edits_filtered) {
297
- const new_txt = edit.new_text || '';
336
+ for (const [
337
+ start,
338
+ end,
339
+ actual_text,
340
+ edit,
341
+ orig_idx,
342
+ ] of matched_edits_filtered) {
343
+ const new_txt = edit.new_text || "";
298
344
  const [prefix_len, suffix_len] = trim_common_context(actual_text, new_txt);
299
345
 
300
- const unmodified_prefix = prefix_len > 0 ? actual_text.substring(0, prefix_len) : '';
301
- const unmodified_suffix = suffix_len > 0 ? actual_text.substring(actual_text.length - suffix_len) : '';
346
+ const unmodified_prefix =
347
+ prefix_len > 0 ? actual_text.substring(0, prefix_len) : "";
348
+ const unmodified_suffix =
349
+ suffix_len > 0
350
+ ? actual_text.substring(actual_text.length - suffix_len)
351
+ : "";
302
352
 
303
353
  const t_end = actual_text.length - suffix_len;
304
354
  const n_end = new_txt.length - suffix_len;
305
-
355
+
306
356
  const isolated_target = actual_text.substring(prefix_len, t_end);
307
357
  const isolated_new = new_txt.substring(prefix_len, n_end);
308
358
 
@@ -312,12 +362,69 @@ export function apply_edits_to_markdown(
312
362
  edit.comment,
313
363
  orig_idx,
314
364
  include_index,
315
- highlight_only
365
+ highlight_only,
316
366
  );
317
367
 
318
368
  const full_replacement = unmodified_prefix + markup + unmodified_suffix;
319
- result = result.substring(0, start) + full_replacement + result.substring(end);
369
+ result =
370
+ result.substring(0, start) + full_replacement + result.substring(end);
320
371
  }
321
372
 
322
373
  return result;
323
- }
374
+ }
375
+ export function format_ambiguity_error(
376
+ edit_index: number,
377
+ target_text: string,
378
+ haystack: string,
379
+ match_positions: [number, number][],
380
+ ): string {
381
+ const total = match_positions.length;
382
+ if (total < 2) {
383
+ throw new Error(
384
+ `format_ambiguity_error requires at least 2 matches, got ${total}`,
385
+ );
386
+ }
387
+
388
+ const shown = match_positions.slice(0, AMBIGUITY_EXAMPLES_CAP);
389
+ const remaining = total - shown.length;
390
+
391
+ const lines: string[] = [
392
+ `- Edit ${edit_index} Failed: Ambiguous match. Target text appears ${total} times. First ${shown.length} occurrences:`,
393
+ ];
394
+
395
+ for (let i = 0; i < shown.length; i++) {
396
+ const [start, end] = shown[i];
397
+ const pre_start = Math.max(0, start - AMBIGUITY_CONTEXT_CHARS);
398
+ const post_end = Math.min(haystack.length, end + AMBIGUITY_CONTEXT_CHARS);
399
+
400
+ const pre_context = haystack
401
+ .substring(pre_start, start)
402
+ .replace(/\n/g, " ");
403
+ const post_context = haystack.substring(end, post_end).replace(/\n/g, " ");
404
+ let match_text = haystack.substring(start, end).replace(/\n/g, " ");
405
+
406
+ if (match_text.length > 50) {
407
+ match_text =
408
+ match_text.substring(0, 25) +
409
+ "..." +
410
+ match_text.substring(match_text.length - 20);
411
+ }
412
+
413
+ const prefix_marker = pre_start > 0 ? "..." : "";
414
+ const suffix_marker = post_end < haystack.length ? "..." : "";
415
+
416
+ lines.push(
417
+ ` ${i + 1}. "${prefix_marker}${pre_context}[${match_text}]${post_context}${suffix_marker}"`,
418
+ );
419
+ }
420
+
421
+ if (remaining > 0) {
422
+ lines.push(` ... and ${remaining} more occurrence(s) not shown.`);
423
+ }
424
+
425
+ lines.push(
426
+ " Please provide more surrounding context in your target_text to uniquely identify the location.",
427
+ );
428
+
429
+ return lines.join("\n");
430
+ }