npm - @adeu/core - Versions diffs - 1.6.2 - Mend

@adeu/core 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/dist/index.cjs +3627 -0
package/dist/index.cjs.map +1 -0
package/dist/index.d.cts +247 -0
package/dist/index.d.ts +247 -0
package/dist/index.js +3579 -0
package/dist/index.js.map +1 -0
package/package.json +38 -0
package/src/comments.test.ts +38 -0
package/src/comments.ts +451 -0
package/src/diff.test.ts +62 -0
package/src/diff.ts +251 -0
package/src/docx/bridge.ts +189 -0
package/src/docx/dom.ts +54 -0
package/src/docx/primitives.ts +65 -0
package/src/domain.ts +11 -0
package/src/engine.atomic.test.ts +58 -0
package/src/engine.batch.test.ts +93 -0
package/src/engine.safety.test.ts +42 -0
package/src/engine.tables.test.ts +166 -0
package/src/engine.ts +735 -0
package/src/index.test.ts +8 -0
package/src/index.ts +14 -0
package/src/ingest.test.ts +44 -0
package/src/ingest.ts +400 -0
package/src/mapper.test.ts +66 -0
package/src/mapper.ts +835 -0
package/src/markup.test.ts +150 -0
package/src/markup.ts +323 -0
package/src/models.ts +51 -0
package/src/outline.ts +377 -0
package/src/pagination.ts +239 -0
package/src/test-utils.ts +142 -0
package/src/utils/docx.ts +478 -0
package/tsconfig.json +21 -0
package/tsup.config.ts +10 -0
package/vitest.config.ts +12 -0

package/src/markup.test.ts ADDED Viewed

@@ -0,0 +1,150 @@
+import { describe, it, expect } from 'vitest';
+import {
+  _replace_smart_quotes,
+  _make_fuzzy_regex,
+  _find_match_in_text,
+  _build_critic_markup,
+  apply_edits_to_markdown
+} from './markup.js';
+import { ModifyText } from './models.js';
+describe('Markup Helpers', () => {
+  it.each([
+    ['"Hello" and \'World\'', '"Hello" and \'World\''],
+    ['Smart “quotes” and ‘apostrophes’', '"Hello" and \'World\''.replace('Hello', 'quotes').replace('World', 'apostrophes')], // Workaround for JS literal parsing in table
+  ])('replace_smart_quotes(%s)', (input, expected) => {
+    // Quick override for the manual table definition above
+    if (input.includes('Smart')) expected = 'Smart "quotes" and \'apostrophes\'';
+    expect(_replace_smart_quotes(input)).toBe(expected);
+  });
+  it.each([
+    ['hello world', ['hello world', 'hello  world', 'hello   world']],
+    ['[___]', ['[___]', '[_____]', '[__________]']],
+  ])('make_fuzzy_regex(%s)', (inputStr, matches) => {
+    const pattern = new RegExp(_make_fuzzy_regex(inputStr));
+    for (const m of matches) {
+      expect(m).toMatch(pattern);
+    }
+  });
+  it.each([
+    ['The quick brown fox', 'quick', 4, 9],
+    ['"Hello" said the fox', '"Hello"', 0, 7],
+    ['hello   world', 'hello world', 0, 13],
+    ['The quick brown fox', 'elephant', -1, -1],
+    ['Some text', '', -1, -1],
+  ])('find_match_in_text: %s targets %s', (text, target, expectedStart, expectedEnd) => {
+    const [start, end] = _find_match_in_text(text, target);
+    expect(start).toBe(expectedStart);
+    expect(end).toBe(expectedEnd);
+  });
+});
+describe('build_critic_markup', () => {
+  it.each([
+    { t: 'old', n: '', expected: '{--old--}' },
+    { t: '', n: 'new', expected: '{++new++}' },
+    { t: 'old', n: 'new', expected: '{--old--}{++new++}' },
+    { t: 'old', n: 'new', c: 'Changed this', expected: '{--old--}{++new++}{>>Changed this<<}' },
+    { t: 'old', n: 'new', idx: 3, incIdx: true, expected: '{--old--}{++new++}{>>[Edit:3]<<}' },
+    { t: 'old', n: 'new', c: 'Reason', idx: 5, incIdx: true, expected: '{--old--}{++new++}{>>Reason [Edit:5]<<}' },
+    { t: 'target', n: 'ignored', highlight: true, expected: '{==target==}' },
+    { t: 'target', n: 'ignored', c: 'Note', idx: 2, incIdx: true, highlight: true, expected: '{==target==}{>>Note [Edit:2]<<}' },
+    // Formatting
+    { t: '**Important**', n: '**Critical**', expected: '**{--Important--}{++Critical++}**' },
+    { t: '_emphasis_', n: '_strong emphasis_', expected: '_{--emphasis--}{++strong emphasis++}_' },
+    { t: '**_nested_**', n: '**_deeply nested_**', expected: '**{--_nested_--}{++_deeply nested_++}**' },
+    { t: '**unbalanced', n: '**still unbalanced', expected: '{--**unbalanced--}{++**still unbalanced++}' },
+    { t: '__0__', n: '__1__', expected: '{--__0__--}{++__1__++}' },
+    // Edge Cases
+    { t: '', n: '', expected: '' },
+    { t: '   ', n: 'text', expected: '{--   --}{++text++}' },
+    { t: 'Line1\nLine2', n: 'SingleLine', expected: '{--Line1\nLine2--}{++SingleLine++}' },
+    { t: 'C++', n: 'Python', expected: '{--C++--}{++Python++}' },
+    { t: 'old', n: 'new', c: '   ', expected: '{--old--}{++new++}{>>   <<}' },
+    { t: 'old', n: 'new', c: '', expected: '{--old--}{++new++}' },
+    { t: '', n: 'ignored', highlight: true, expected: '' }
+  ])('builds correct markup for $t -> $n', ({ t, n, c, idx = 0, incIdx = false, highlight = false, expected }) => {
+    const result = _build_critic_markup(t, n, c, idx, incIdx, highlight);
+    if (highlight && !t) {
+      expect(['', '{====}']).toContain(result);
+    } else {
+      expect(result).toBe(expected);
+    }
+  });
+});
+describe('apply_edits_to_markdown', () => {
+  it.each([
+    ['Notice of Termination', 'Notice of Termination', 'Notice of Immediate Termination', 'Notice of {++Immediate ++}Termination'],
+    ['Hello World', 'Hello World', 'Hello Universe', 'Hello {--World--}{++Universe++}'],
+    ['Old Item', 'Old Item', 'New Item', '{--Old--}{++New++} Item'],
+    ['Original text', 'none', 'none', 'Original text'],
+    ['Remove this word please.', 'this ', '', 'Remove {--this --}word please.'],
+    ['', 'x', 'y', ''],
+    ['Price is $100.00 (USD).', '$100.00', '$200.00', '{--$100.00--}{++$200.00++}'],
+    ['Use {curly} and [square] brackets.', '{curly}', '{braces}', '{--{curly}--}{++{braces}++}']
+  ])('basic edge cases: %s', (text, target, newText, expected) => {
+    const result = target === 'none' ? apply_edits_to_markdown(text, []) : apply_edits_to_markdown(text, [{ type: 'modify', target_text: target, new_text: newText }]);
+    expect(result).toContain(expected);
+  });
+  it('handles modification with comment', () => {
+    const text = 'The quick brown fox.';
+    const edits: ModifyText[] = [{ type: 'modify', target_text: 'quick', new_text: 'slow', comment: 'Speed change' }];
+    expect(apply_edits_to_markdown(text, edits)).toContain('{--quick--}{++slow++}{>>Speed change<<}');
+  });
+  it('handles highlight_only mode', () => {
+    const text = 'Highlight this section please.';
+    const edits: ModifyText[] = [{ type: 'modify', target_text: 'this section', new_text: 'ignored' }];
+    const result = apply_edits_to_markdown(text, edits, false, true);
+    expect(result).toContain('{==this section==}');
+    expect(result).not.toContain('{--');
+  });
+  it('preserves order of multiple edits', () => {
+    const text = 'A B C';
+    const edits: ModifyText[] = [
+      { type: 'modify', target_text: 'A', new_text: 'X' },
+      { type: 'modify', target_text: 'B', new_text: 'Y' },
+      { type: 'modify', target_text: 'C', new_text: 'Z' }
+    ];
+    const result = apply_edits_to_markdown(text, edits, true);
+    expect(result).toContain('[Edit:0]');
+    expect(result.indexOf('{++X++}')).toBeLessThan(result.indexOf('{++Y++}'));
+    expect(result.indexOf('{++Y++}')).toBeLessThan(result.indexOf('{++Z++}'));
+  });
+  it('skips overlapping edits (first wins)', () => {
+    const text = 'The quick brown fox';
+    const edits: ModifyText[] = [
+      { type: 'modify', target_text: 'quick brown', new_text: 'slow red' },
+      { type: 'modify', target_text: 'brown fox', new_text: 'green dog' }
+    ];
+    const result = apply_edits_to_markdown(text, edits);
+    expect(result).toContain('{--quick brown--}{++slow red++}');
+    expect(result).not.toContain('green dog');
+  });
+  it.each([
+    ['hello   world', 'hello world', '{--hello   world--}'],
+    ['Sign here: [__________]', '[___]', '{--[__________]--}'],
+    ['"Hello" said the fox.', '"Hello"', '{--"Hello"--}']
+  ])('fuzzy and smart quotes: %s', (text, target, expectedSubstring) => {
+    const result = apply_edits_to_markdown(text, [{ type: 'modify', target_text: target, new_text: 'replacement' }]);
+    expect(result).toContain(expectedSubstring);
+  });
+  it.each([
+    ['The **quick brown fox** jumped.', 'quick brown fox', 'slow red dog', 'The **{--quick brown fox--}{++slow red dog++}** jumped.'],
+    ['This is _emphasized_ text.', 'emphasized', 'highlighted', '_{--emphasized--}{++highlighted++}_'],
+    ['Variable __init__ is special.', '__init__', '__setup__', '__{--init--}{++setup++}__'],
+  ])('formatting noise and preservation: %s', (text, target, newText, expectedSubstring) => {
+    const result = apply_edits_to_markdown(text, [{ type: 'modify', target_text: target, new_text: newText }]);
+    expect(result).toContain(expectedSubstring);
+  });
+});

package/src/markup.ts ADDED Viewed

@@ -0,0 +1,323 @@
+import { trim_common_context } from './diff.js';
+import { ModifyText } from './models.js';
+function _should_strip_markers(text: string, marker: string): boolean {
+  if (!text.startsWith(marker) || !text.endsWith(marker)) return false;
+  if (text.length < marker.length * 2) return false;
+  const inner = text.substring(marker.length, text.length - marker.length);
+  if (!inner) return false;
+  if (inner.includes(marker)) return false;
+  if (!/[a-zA-Z]/.test(inner)) return false;
+  if (marker === '__' && /^\w+$/.test(inner)) return false;
+  if (marker === '_') {
+    if (inner.includes('_')) return false;
+    if (/^[0-9_]+$/.test(inner)) return false;
+  }
+  return true;
+}
+function _strip_balanced_markers(text: string): [string, string, string] {
+  let prefix_markup = '';
+  let suffix_markup = '';
+  let clean_text = text;
+  const markers = ['**', '__', '_', '*'];
+  for (const marker of markers) {
+    if (_should_strip_markers(clean_text, marker)) {
+      prefix_markup += marker;
+      suffix_markup = marker + suffix_markup;
+      clean_text = clean_text.substring(marker.length, clean_text.length - marker.length);
+      break;
+    }
+  }
+  return [prefix_markup, clean_text, suffix_markup];
+}
+export function _replace_smart_quotes(text: string): string {
+  return text.replace(/“/g, '"').replace(/”/g, '"').replace(/‘/g, "'").replace(/’/g, "'");
+}
+function _find_safe_boundaries(text: string, start: number, end: number): [number, number] {
+  let new_start = start;
+  let new_end = end;
+  const expand_if_unbalanced = (marker: string) => {
+    const current_match = text.substring(new_start, new_end);
+    const count = (current_match.match(new RegExp(marker.replace(/\*/g, '\\*'), 'g')) || []).length;
+    if (count % 2 !== 0) {
+      const suffix = text.substring(new_end);
+      if (suffix.startsWith(marker)) {
+        new_end += marker.length;
+        return;
+      }
+      const prefix = text.substring(0, new_start);
+      if (prefix.endsWith(marker)) {
+        new_start -= marker.length;
+        return;
+      }
+    }
+  };
+  for (let i = 0; i < 2; i++) {
+    expand_if_unbalanced('**');
+    expand_if_unbalanced('__');
+    expand_if_unbalanced('_');
+    expand_if_unbalanced('*');
+  }
+  return [new_start, new_end];
+}
+function _refine_match_boundaries(text: string, start: number, end: number): [number, number] {
+  const markers = ['**', '__', '*', '_'];
+  let current_text = text.substring(start, end);
+  let best_start = start;
+  let best_end = end;
+  const countMarker = (str: string, mk: string) => (str.match(new RegExp(mk.replace(/\*/g, '\\*'), 'g')) || []).length;
+  for (const marker of markers) {
+    if (current_text.startsWith(marker)) {
+      const current_score = countMarker(current_text, marker) % 2;
+      const trimmed_text = current_text.substring(marker.length);
+      const trimmed_score = countMarker(trimmed_text, marker) % 2;
+      if (current_score === 1 && trimmed_score === 0) {
+        best_start += marker.length;
+        current_text = trimmed_text;
+      }
+    }
+  }
+  for (const marker of markers) {
+    if (current_text.endsWith(marker)) {
+      const current_score = countMarker(current_text, marker) % 2;
+      const trimmed_text = current_text.substring(0, current_text.length - marker.length);
+      const trimmed_score = countMarker(trimmed_text, marker) % 2;
+      if (current_score === 1 && trimmed_score === 0) {
+        best_end -= marker.length;
+        current_text = trimmed_text;
+      }
+    }
+  }
+  return [best_start, best_end];
+}
+export function _make_fuzzy_regex(target_text: string): string {
+  target_text = _replace_smart_quotes(target_text);
+  const parts: string[] = [];
+  const token_pattern = /(_+)|(\s+)|(['"])|([.,;:\/])/g;
+  // Note: JS does not support atomic groups (?>...).
+  // However, because we only match markdown characters * and _,
+  // we can use a character class `[*_]*` which is mathematically equivalent
+  // to `(?:\*\*|__|\*|_)*` but fundamentally immune to catastrophic backtracking!
+  const md_noise = "[*_]*";
+  const structural_noise = "(?:\\s*(?:[*+\\->]|\\d+\\.)\\s+|\\s*\\n\\s*)";
+  const start_list_marker = "(?:[ \\t]*(?:[*+\\->]|\\d+\\.)\\s+)?";
+  parts.push(start_list_marker);
+  parts.push(md_noise);
+  let last_idx = 0;
+  let match;
+  const escapeRegExp = (str: string) => str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+  while ((match = token_pattern.exec(target_text)) !== null) {
+    const literal = target_text.substring(last_idx, match.index);
+    if (literal) {
+      parts.push(escapeRegExp(literal));
+      parts.push(md_noise);
+    }
+    const g_underscore = match[1];
+    const g_space = match[2];
+    const g_quote = match[3];
+    const g_punct = match[4];
+    if (g_underscore) {
+      parts.push('_+');
+    } else if (g_space) {
+      if (g_space.includes('\n')) {
+        parts.push(`(?:${structural_noise}|\\s+)+`);
+      } else {
+        parts.push('\\s+');
+      }
+    } else if (g_quote) {
+      if (g_quote === "'") parts.push('[\u2018\u2019\']');
+      else parts.push('["\u201c\u201d]');
+    } else if (g_punct) {
+      parts.push(escapeRegExp(g_punct));
+    }
+    parts.push(md_noise);
+    last_idx = token_pattern.lastIndex;
+  }
+  const remaining = target_text.substring(last_idx);
+  if (remaining) parts.push(escapeRegExp(remaining));
+  return parts.join('');
+}
+export function _find_match_in_text(text: string, target: string): [number, number] {
+  if (!target) return [-1, -1];
+  let idx = text.indexOf(target);
+  if (idx !== -1) return _find_safe_boundaries(text, idx, idx + target.length);
+  const norm_text = _replace_smart_quotes(text);
+  const norm_target = _replace_smart_quotes(target);
+  idx = norm_text.indexOf(norm_target);
+  if (idx !== -1) return _find_safe_boundaries(text, idx, idx + norm_target.length);
+  try {
+    const pattern = new RegExp(_make_fuzzy_regex(target));
+    const match = pattern.exec(text);
+    if (match) {
+      const raw_start = match.index;
+      const raw_end = match.index + match[0].length;
+      const [refined_start, refined_end] = _refine_match_boundaries(text, raw_start, raw_end);
+      return _find_safe_boundaries(text, refined_start, refined_end);
+    }
+  } catch (e) {
+    // Ignore regex compilation errors from edge cases
+  }
+  return [-1, -1];
+}
+export function _build_critic_markup(
+  target_text: string,
+  new_text: string,
+  comment: string | null | undefined,
+  edit_index: number,
+  include_index: boolean,
+  highlight_only: boolean
+): string {
+  const parts: string[] = [];
+  let [prefix_markup, clean_target, suffix_markup] = _strip_balanced_markers(target_text);
+  let clean_new = new_text;
+  if (prefix_markup && new_text) {
+    if (new_text.startsWith(prefix_markup) && new_text.endsWith(suffix_markup)) {
+      const inner_len = prefix_markup.length;
+      clean_new = new_text.length > inner_len * 2 ? new_text.substring(inner_len, new_text.length - inner_len) : new_text;
+    }
+  }
+  parts.push(prefix_markup);
+  if (highlight_only) {
+    parts.push(`{==${clean_target}==}`);
+  } else {
+    const has_target = Boolean(clean_target);
+    const has_new = Boolean(clean_new);
+    if (has_target && !has_new) parts.push(`{--${clean_target}--}`);
+    else if (!has_target && has_new) parts.push(`{++${clean_new}++}`);
+    else if (has_target && has_new) parts.push(`{--${clean_target}--}{++${clean_new}++}`);
+  }
+  parts.push(suffix_markup);
+  const meta_parts: string[] = [];
+  if (comment) meta_parts.push(comment);
+  if (include_index) meta_parts.push(`[Edit:${edit_index}]`);
+  if (meta_parts.length > 0) {
+    parts.push(`{>>${meta_parts.join(' ')}<<}`);
+  }
+  return parts.join('');
+}
+export function apply_edits_to_markdown(
+  markdown_text: string,
+  edits: ModifyText[],
+  include_index = false,
+  highlight_only = false
+): string {
+  if (!edits || edits.length === 0) return markdown_text;
+  const matched_edits: [number, number, string, ModifyText, number][] = [];
+  for (let idx = 0; idx < edits.length; idx++) {
+    const edit = edits[idx];
+    const target = edit.target_text || '';
+    if (!target) {
+      continue;
+    }
+    const [start, end] = _find_match_in_text(markdown_text, target);
+    if (start === -1) continue;
+    const actual_matched_text = markdown_text.substring(start, end);
+    matched_edits.push([start, end, actual_matched_text, edit, idx]);
+  }
+  const matched_edits_filtered: [number, number, string, ModifyText, number][] = [];
+  const occupied_ranges: [number, number][] = [];
+  matched_edits.sort((a, b) => a[4] - b[4]);
+  for (const [start, end, actual_text, edit, orig_idx] of matched_edits) {
+    let overlaps = false;
+    for (const [occ_start, occ_end] of occupied_ranges) {
+      if (start < occ_end && end > occ_start) {
+        overlaps = true;
+        break;
+      }
+    }
+    if (!overlaps) {
+      matched_edits_filtered.push([start, end, actual_text, edit, orig_idx]);
+      occupied_ranges.push([start, end]);
+    }
+  }
+  matched_edits_filtered.sort((a, b) => b[0] - a[0]);
+  let result = markdown_text;
+  for (const [start, end, actual_text, edit, orig_idx] of matched_edits_filtered) {
+    const new_txt = edit.new_text || '';
+    const [prefix_len, suffix_len] = trim_common_context(actual_text, new_txt);
+    const unmodified_prefix = prefix_len > 0 ? actual_text.substring(0, prefix_len) : '';
+    const unmodified_suffix = suffix_len > 0 ? actual_text.substring(actual_text.length - suffix_len) : '';
+    const t_end = actual_text.length - suffix_len;
+    const n_end = new_txt.length - suffix_len;
+    const isolated_target = actual_text.substring(prefix_len, t_end);
+    const isolated_new = new_txt.substring(prefix_len, n_end);
+    const markup = _build_critic_markup(
+      isolated_target,
+      isolated_new,
+      edit.comment,
+      orig_idx,
+      include_index,
+      highlight_only
+    );
+    const full_replacement = unmodified_prefix + markup + unmodified_suffix;
+    result = result.substring(0, start) + full_replacement + result.substring(end);
+  }
+  return result;
+}

package/src/models.ts ADDED Viewed

@@ -0,0 +1,51 @@
+export interface ModifyText {
+  type: 'modify';
+  target_text: string;
+  new_text: string;
+  comment?: string | null;
+  _match_start_index?: number | null;
+  _internal_op?: string | null;
+  _active_mapper_ref?: any | null; // Typed as DocumentMapper later
+  _original_target_text?: string;
+  _is_table_edit?: boolean;
+}
+export interface AcceptChange {
+  type: 'accept';
+  target_id: string;
+  comment?: string | null;
+}
+export interface RejectChange {
+  type: 'reject';
+  target_id: string;
+  comment?: string | null;
+}
+export interface ReplyComment {
+  type: 'reply';
+  target_id: string;
+  text: string;
+}
+export interface InsertTableRow {
+  type: 'insert_row';
+  target_text: string;
+  position: 'above' | 'below';
+  cells: string[];
+  _match_start_index?: number | null;
+}
+export interface DeleteTableRow {
+  type: 'delete_row';
+  target_text: string;
+  _match_start_index?: number | null;
+}
+export type DocumentChange =
+  | ModifyText
+  | AcceptChange
+  | RejectChange
+  | ReplyComment
+  | InsertTableRow
+  | DeleteTableRow;