npm - docrev - Versions diffs - 0.9.15 → 0.9.16 - Mend

docrev 0.9.15 → 0.9.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

package/README.md +41 -46
package/dist/lib/wordcomments.d.ts.map +1 -1
package/dist/lib/wordcomments.js +25 -2
package/dist/lib/wordcomments.js.map +1 -1
package/docs-src/build.py +113 -0
package/docs-src/extra.css +208 -0
package/docs-src/md-to-html.lua +6 -0
package/docs-src/template.html +116 -0
package/lib/wordcomments.ts +25 -2
package/mkdocs.yml +64 -0
package/package.json +1 -1
package/site/assets/extra.css +208 -0
package/site/commands.html +926 -0
package/site/configuration.html +469 -0
package/site/index.html +288 -0
package/site/troubleshooting.html +461 -0
package/site/workflow.html +518 -0
package/dev_notes/bug_repro_comment_parser.md +0 -71
package/dev_notes/stress2/adversarial.docx +0 -0
package/dev_notes/stress2/build_adversarial.ts +0 -186
package/dev_notes/stress2/drift_matcher.ts +0 -62
package/dev_notes/stress2/probe_anchors.ts +0 -35
package/dev_notes/stress2/project/adversarial.docx +0 -0
package/dev_notes/stress2/project/discussion.before.md +0 -3
package/dev_notes/stress2/project/discussion.md +0 -3
package/dev_notes/stress2/project/methods.before.md +0 -20
package/dev_notes/stress2/project/methods.md +0 -20
package/dev_notes/stress2/project/rev.yaml +0 -5
package/dev_notes/stress2/project/sections.yaml +0 -4
package/dev_notes/stress2/sections.yaml +0 -5
package/dev_notes/stress2/trace_placement.ts +0 -50
package/dev_notes/stresstest_boundaries.ts +0 -27
package/dev_notes/stresstest_drift_apply.ts +0 -43
package/dev_notes/stresstest_drift_compare.ts +0 -43
package/dev_notes/stresstest_drift_v2.ts +0 -54
package/dev_notes/stresstest_inspect.ts +0 -54
package/dev_notes/stresstest_pstyle.ts +0 -55
package/dev_notes/stresstest_section_debug.ts +0 -23
package/dev_notes/stresstest_split.ts +0 -70
package/dev_notes/stresstest_trace.ts +0 -19
package/dev_notes/stresstest_verify_no_overwrite.ts +0 -40

package/dev_notes/stress2/build_adversarial.ts DELETED Viewed

@@ -1,186 +0,0 @@
-/**
- * Build an adversarial DOCX with hand-crafted commentRangeStart/End
- * markers that probe whether docrev locates comments by *real*
- * anchor (the highlighted run text, taken from the docx XML) or by
- * guesswork. Each comment is designed to expose a specific failure
- * mode if the matcher were not anchor-aware.
- */
-import AdmZip from 'adm-zip';
-import path from 'path';
-const out = path.resolve('dev_notes/stress2/adversarial.docx');
-// Sentence with: a triplicated word, unicode look-alikes, mid-word
-// hyphenation, and embedded XML-special chars in the *prose* (escaped).
-const paragraphs: { text: string; comments?: { id: string; on: string }[] }[] = [
-  // heading
-  { text: 'Methods', comments: [] },
-  // p0: single highlight, easy
-  { text: 'The methods section describes our approach in detail.',
-    comments: [{ id: '0', on: 'methods section' }] },
-  // p1: word "model" appears 3x — anchor must pin the SECOND occurrence
-  { text: 'The model is fitted, the model is checked, the model is reported.',
-    comments: [{ id: '1', on: 'model' /* second occurrence */ }] },
-  // p2: empty anchor (zero-width comment, classic Word "insert here")
-  { text: 'Reviewers often place a marker between two sentences. Like this. Then continue.',
-    comments: [{ id: '2', on: '' }] },
-  // p3: anchor spanning across multiple runs (formatting break)
-  { text: 'Our results show that p < 0.001 across all conditions.',
-    comments: [{ id: '3', on: 'p < 0.001' }] },
-  // p4: text containing literal angle brackets that would break a naive
-  // [^<]* regex — anchor includes "(<1825)"
-  { text: 'Trade volumes pre-industrial (<1825) were modest.',
-    comments: [{ id: '4', on: '(<1825)' }] },
-  // p5: very long anchor (full sentence) — matcher must still place it
-  { text: 'We used a hierarchical Bayesian model with weakly informative priors and Hamiltonian Monte Carlo sampling implemented in Stan.',
-    comments: [{ id: '5', on: 'We used a hierarchical Bayesian model with weakly informative priors and Hamiltonian Monte Carlo sampling implemented in Stan.' }] },
-  // p6: anchor on punctuation only — should fall back to context
-  { text: 'See Table 1 for details; numbers are rounded.',
-    comments: [{ id: '6', on: ';' }] },
-  // p7: two overlapping comment ranges share a word
-  { text: 'The overrepresented species are listed in Appendix A.',
-    comments: [
-      { id: '7', on: 'overrepresented species' },
-      { id: '8', on: 'overrepresented' },
-    ] },
-  // p8: anchor that exists VERBATIM elsewhere in the doc — context disambiguates
-  { text: 'The cohort was small. Limitations are discussed in Section 5.',
-    comments: [{ id: '9', on: 'small' }] },
-  { text: 'The effect was small but significant.',
-    comments: [{ id: '10', on: 'small' }] },
-  // p9: heading paragraph (<w:pStyle w:val="Heading1"/>)
-  { text: 'Discussion', comments: [], /* heading marker handled below */ },
-  { text: 'In this section we situate the findings in prior literature.',
-    comments: [{ id: '11', on: 'situate the findings' }] },
-];
-function escapeXml(s: string): string {
-  return s.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;')
-          .replace(/"/g, '&quot;').replace(/'/g, '&apos;');
-}
-function buildParagraphXml(p: typeof paragraphs[0], isHeading: boolean): string {
-  const text = p.text;
-  const comments = p.comments || [];
-  if (comments.length === 0) {
-    return `<w:p>${isHeading ? '<w:pPr><w:pStyle w:val="Heading1"/></w:pPr>' : ''}<w:r><w:t xml:space="preserve">${escapeXml(text)}</w:t></w:r></w:p>`;
-  }
-  // Plan ranges: each comment marks [start..end] character offsets into `text`.
-  // Empty anchors place start=end at first sentence boundary.
-  type Range = { id: string; start: number; end: number };
-  const ranges: Range[] = [];
-  for (const c of comments) {
-    if (c.on === '') {
-      const dotIdx = text.indexOf('.');
-      const pos = dotIdx >= 0 ? dotIdx + 1 : 0;
-      ranges.push({ id: c.id, start: pos, end: pos });
-      continue;
-    }
-    if (c.id === '1') {
-      // "model" — pick the SECOND occurrence
-      const first = text.indexOf('model');
-      const second = text.indexOf('model', first + 1);
-      ranges.push({ id: c.id, start: second, end: second + 'model'.length });
-      continue;
-    }
-    const start = text.indexOf(c.on);
-    if (start < 0) throw new Error(`anchor not found: ${c.on}`);
-    ranges.push({ id: c.id, start, end: start + c.on.length });
-  }
-  // Build event list: at each char boundary we may need to emit
-  // <w:commentRangeStart/> or <w:commentRangeEnd/>.
-  type Event = { pos: number; kind: 'start' | 'end'; id: string };
-  const events: Event[] = [];
-  for (const r of ranges) {
-    events.push({ pos: r.start, kind: 'start', id: r.id });
-    events.push({ pos: r.end, kind: 'end', id: r.id });
-  }
-  // Emit ends before starts at same position, so an empty anchor's start=end
-  // ordering keeps a zero-width range
-  events.sort((a, b) => a.pos - b.pos || (a.kind === 'end' ? -1 : 1));
-  let xml = `<w:p>${isHeading ? '<w:pPr><w:pStyle w:val="Heading1"/></w:pPr>' : ''}`;
-  let cursor = 0;
-  for (const ev of events) {
-    if (ev.pos > cursor) {
-      xml += `<w:r><w:t xml:space="preserve">${escapeXml(text.slice(cursor, ev.pos))}</w:t></w:r>`;
-      cursor = ev.pos;
-    }
-    xml += ev.kind === 'start'
-      ? `<w:commentRangeStart w:id="${ev.id}"/>`
-      : `<w:commentRangeEnd w:id="${ev.id}"/><w:r><w:commentReference w:id="${ev.id}"/></w:r>`;
-  }
-  if (cursor < text.length) {
-    xml += `<w:r><w:t xml:space="preserve">${escapeXml(text.slice(cursor))}</w:t></w:r>`;
-  }
-  xml += `</w:p>`;
-  return xml;
-}
-const headingTexts = new Set(['Methods', 'Discussion']);
-const documentBody = paragraphs.map(p => buildParagraphXml(p, headingTexts.has(p.text))).join('');
-const documentXml = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
-<w:document xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
-  <w:body>
-    ${documentBody}
-    <w:sectPr/>
-  </w:body>
-</w:document>`;
-const allComments = paragraphs.flatMap(p => p.comments || []);
-const commentsXml = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
-<w:comments xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
-${allComments.map(c => `  <w:comment w:id="${c.id}" w:author="Reviewer" w:initials="R" w:date="2026-01-01T00:00:00Z"><w:p><w:r><w:t>Comment ${c.id} on &quot;${escapeXml(c.on)}&quot; — text contains <embedded brackets and &amp;.</w:t></w:r></w:p></w:comment>`.replace('<embedded', '&lt;embedded')).join('\n')}
-</w:comments>`;
-const contentTypesXml = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
-<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">
-  <Default Extension="rels" ContentType="application/vnd.openxmlformats-package.relationships+xml"/>
-  <Default Extension="xml" ContentType="application/xml"/>
-  <Override PartName="/word/document.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"/>
-  <Override PartName="/word/comments.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml"/>
-  <Override PartName="/word/styles.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml"/>
-</Types>`;
-const rootRelsXml = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
-<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
-  <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" Target="word/document.xml"/>
-</Relationships>`;
-const docRelsXml = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
-<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">
-  <Relationship Id="rId1" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments" Target="comments.xml"/>
-  <Relationship Id="rId2" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles" Target="styles.xml"/>
-</Relationships>`;
-const stylesXml = `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
-<w:styles xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main">
-  <w:style w:type="paragraph" w:styleId="Heading1"><w:name w:val="heading 1"/></w:style>
-</w:styles>`;
-const zip = new AdmZip();
-zip.addFile('[Content_Types].xml', Buffer.from(contentTypesXml, 'utf8'));
-zip.addFile('_rels/.rels', Buffer.from(rootRelsXml, 'utf8'));
-zip.addFile('word/document.xml', Buffer.from(documentXml, 'utf8'));
-zip.addFile('word/comments.xml', Buffer.from(commentsXml, 'utf8'));
-zip.addFile('word/styles.xml', Buffer.from(stylesXml, 'utf8'));
-zip.addFile('word/_rels/document.xml.rels', Buffer.from(docRelsXml, 'utf8'));
-zip.writeZip(out);
-console.log(`wrote ${out}`);
-console.log(`comments: ${allComments.length}`);

package/dev_notes/stress2/drift_matcher.ts DELETED Viewed

@@ -1,62 +0,0 @@
-/**
- * Run anchors from the adversarial docx against a HEAVILY DRIFTED
- * markdown and report match quality per comment.
- */
-import { extractCommentAnchors } from '../../lib/word-extraction.js';
-import { findAnchorInText, classifyStrategy } from '../../lib/anchor-match.js';
-const docx = 'dev_notes/stress2/adversarial.docx';
-// Drifted markdown — same comments need to land on this.
-//  - p0 paraphrased
-//  - p1 reordered, only TWO occurrences of "model"
-//  - p2 prose lightly edited, sentence boundary preserved
-//  - p3 prose changed slightly: "p < 0.001 (n=412)"
-//  - p4 prose moved (<1825)
-//  - p5 mostly intact
-//  - p6 entire sentence rewritten ("Round numbers are reported in Table 1.")
-//  - p7 unchanged
-//  - p8/p9 unchanged
-//  - p10/p11 unchanged
-const drifted = [
-  '# Methods', '',
-  'Below we describe our methodology in detail.', '',
-  'The model is fitted; later, the model is reported.', '',
-  'Reviewers often place a marker between two sentences. Like this. Then continue.', '',
-  'Our results show that p < 0.001 (n=412) across all conditions.', '',
-  'Trade volumes were modest pre-industrial (<1825).', '',
-  'We used a hierarchical Bayesian model with weakly informative priors and Hamiltonian Monte Carlo sampling implemented in Stan.', '',
-  'Round numbers are reported in Table 1.', '',
-  'The overrepresented species are listed in Appendix A.', '',
-  'The cohort was small. Limitations are discussed in Section 5.',
-  'The effect was small but significant.', '',
-  '# Discussion', '',
-  'In this section we situate the findings in prior literature.',
-].join('\n');
-const { anchors } = await extractCommentAnchors(docx);
-const sorted = [...anchors.entries()].sort((a, b) => Number(a[0]) - Number(b[0]));
-const tally = { clean: 0, drift: 0, 'context-only': 0, ambiguous: 0, unmatched: 0 };
-console.log('id │ quality       │ strategy            │ #hits │ anchor');
-console.log('───┼───────────────┼─────────────────────┼───────┼──────────────────────');
-for (const [id, a] of sorted) {
-  const r = findAnchorInText(a.anchor, drifted, a.before, a.after);
-  let q: keyof typeof tally = classifyStrategy(r.strategy, r.occurrences.length) as any;
-  if (r.occurrences.length > 1) q = 'ambiguous';
-  tally[q]++;
-  const tag = a.isEmpty ? '∅' : a.anchor.length > 30 ? a.anchor.slice(0, 27) + '...' : a.anchor;
-  console.log(`${id.padStart(2)} │ ${q.padEnd(13)} │ ${r.strategy.padEnd(19)} │ ${String(r.occurrences.length).padStart(5)} │ ${JSON.stringify(tag)}`);
-}
-console.log('\nTally:', tally);
-// Also confirm: comment #6 (";") will fail anchor-direct match and force
-// context fallback (because the original sentence was rewritten).
-console.log('\nComment #6 (\";\") — sentence rewritten in drifted md.');
-const c6 = anchors.get('6')!;
-const r6 = findAnchorInText(c6.anchor, drifted, c6.before, c6.after);
-console.log(`  strategy=${r6.strategy}, hits=${r6.occurrences.length}`);
-console.log(`  ${r6.occurrences.length > 0 ? 'placed via fallback' : 'left for manual placement'}`);

package/dev_notes/stress2/probe_anchors.ts DELETED Viewed

@@ -1,35 +0,0 @@
-/**
- * Probe: do extracted anchors come from `<w:commentRangeStart/End>`
- * markers (i.e. real anchors), and does each comment carry a unique
- * docPosition that disambiguates duplicate prose?
- */
-import { extractCommentAnchors } from '../../lib/word-extraction.js';
-const docx = 'dev_notes/stress2/adversarial.docx';
-const { anchors, fullDocText } = await extractCommentAnchors(docx);
-console.log(`Doc text length: ${fullDocText.length}`);
-console.log(`Anchors extracted: ${anchors.size}\n`);
-const sorted = [...anchors.entries()].sort((a, b) => Number(a[0]) - Number(b[0]));
-for (const [id, a] of sorted) {
-  const tag = a.isEmpty ? '∅(empty)' : JSON.stringify(a.anchor);
-  console.log(`#${id}  pos=${String(a.docPosition).padStart(4)}  anchor=${tag}`);
-  if (a.before) console.log(`       before=${JSON.stringify(a.before.slice(-40))}`);
-  if (a.after)  console.log(`       after =${JSON.stringify(a.after.slice(0, 40))}`);
-}
-// Sanity: comment 1 should pin the SECOND occurrence of "model"
-const c1 = anchors.get('1')!;
-const firstModel = fullDocText.indexOf('model');
-const secondModel = fullDocText.indexOf('model', firstModel + 1);
-console.log(`\nComment #1 expectation: pin SECOND "model" at pos ${secondModel}`);
-console.log(`Comment #1 actual  docPosition=${c1.docPosition}`);
-console.log(`Match: ${c1.docPosition === secondModel ? 'YES (real-anchor)' : 'NO'}`);
-// Comment 7 vs 8: overlapping ranges, both anchored on "overrepresented..."
-const c7 = anchors.get('7')!;
-const c8 = anchors.get('8')!;
-console.log(`\nComment #7 anchor: ${JSON.stringify(c7.anchor)} (expected "overrepresented species")`);
-console.log(`Comment #8 anchor: ${JSON.stringify(c8.anchor)} (expected "overrepresented")`);

package/dev_notes/stress2/project/adversarial.docx DELETED Viewed

Binary file

package/dev_notes/stress2/project/discussion.before.md DELETED Viewed

@@ -1,3 +0,0 @@
-# Discussion
-In this section we situate the findings in prior literature.

package/dev_notes/stress2/project/discussion.md DELETED Viewed

@@ -1,3 +0,0 @@
-# Discussion
-In this section we {>>Reviewer: Comment 11 on "situate the findings" — text contains <embedded brackets and &.<<}situate the findings in prior literature.

package/dev_notes/stress2/project/methods.before.md DELETED Viewed

@@ -1,20 +0,0 @@
-# Methods
-Below we describe our methodology in detail.
-The model is fitted; later, the model is reported.
-Reviewers often place a marker between two sentences. Like this. Then continue.
-Our results show that p < 0.001 (n=412) across all conditions.
-Trade volumes were modest pre-industrial (<1825).
-We used a hierarchical Bayesian model with weakly informative priors and Hamiltonian Monte Carlo sampling implemented in Stan.
-Round numbers are reported in Table 1.
-The overrepresented species are listed in Appendix A.
-The cohort was small. Limitations are discussed in Section 5.
-The effect was small but significant.

package/dev_notes/stress2/project/methods.md DELETED Viewed

@@ -1,20 +0,0 @@
-# Methods
-Below we describe{>>Reviewer: Comment 0 on "methods section" — text contains <embedded brackets and &.<<} our methodology in detail.
-The {>>Reviewer: Comment 1 on "model" — text contains <embedded brackets and &.<<}model is fitted; later, the model is reported.
-Reviewers often place a marker between two sentences. Like this. Then{>>Reviewer: Comment 2 on "" — text contains <embedded brackets and &.<<} continue.
-Our results show that {>>Reviewer: Comment 3 on "p < 0.001" — text contains <embedded brackets and &.<<}p < 0.001 (n=412) across all conditions.
-Trade volumes were modest pre-industrial {>>Reviewer: Comment 4 on "(<1825)" — text contains <embedded brackets and &.<<}(<1825).
-{>>Reviewer: Comment 5 on "We used a hierarchical Bayesian model with weakly informative priors and Hamiltonian Monte Carlo sampling implemented in Stan." — text contains <embedded brackets and &.<<}We used a hierarchical Bayesian model with weakly informative priors and Hamiltonian Monte Carlo sampling implemented in Stan.
-Round numbers are reported{>>Reviewer: Comment 6 on ";" — text contains <embedded brackets and &.<<} in Table 1.
-The {>>Reviewer: Comment 7 on "overrepresented species" — text contains <embedded brackets and &.<<}overrepresented{>>Reviewer: Comment 8 on "overrepresented" — text contains <embedded brackets and &.<<} species are listed in Appendix A.
-The cohort was {>>Reviewer: Comment 9 on "small" — text contains <embedded brackets and &.<<}small. Limitations are discussed in Section 5.
-The effect was {>>Reviewer: Comment 10 on "small" — text contains <embedded brackets and &.<<}small but significant.

package/dev_notes/stress2/project/rev.yaml DELETED Viewed

@@ -1,5 +0,0 @@
-title: stress2
-authors: []
-sections:
-  - methods
-  - discussion

package/dev_notes/stress2/project/sections.yaml DELETED Viewed

@@ -1,4 +0,0 @@
-version: 1
-sections:
-  methods.md: Methods
-  discussion.md: Discussion

package/dev_notes/stress2/sections.yaml DELETED Viewed

@@ -1,5 +0,0 @@
-sections:
-  - file: methods.md
-    header: Methods
-  - file: discussion.md
-    header: Discussion

package/dev_notes/stress2/trace_placement.ts DELETED Viewed

@@ -1,50 +0,0 @@
-/**
- * Trace exactly where insertCommentsIntoMarkdown places each comment.
- * Reproduces the methods.md sync to see if disambiguation works.
- */
-import { extractCommentAnchors, extractWordComments } from '../../lib/word-extraction.js';
-import { insertCommentsIntoMarkdown } from '../../lib/import.js';
-import * as fs from 'fs';
-const docx = 'dev_notes/stress2/adversarial.docx';
-const md = fs.readFileSync('dev_notes/stress2/project/methods.before.md', 'utf-8');
-const { anchors } = await extractCommentAnchors(docx);
-const comments = await extractWordComments(docx);
-// Filter to methods (everything except #11)
-const methodsComments = comments.filter((c: any) => c.id !== '11');
-console.log('Markdown length:', md.length);
-console.log('Comments to place:', methodsComments.length);
-const smallPositions: number[] = [];
-let i = 0;
-while ((i = md.indexOf('small', i)) !== -1) {
-  smallPositions.push(i);
-  i += 5;
-}
-console.log('"small" occurrences in md at:', smallPositions);
-for (const p of smallPositions) {
-  console.log(`  pos ${p}: ...${JSON.stringify(md.slice(Math.max(0, p - 30), p + 30))}...`);
-}
-console.log('\nDocx anchor data for #9 and #10:');
-for (const id of ['9', '10']) {
-  const a = anchors.get(id)!;
-  console.log(`  #${id}: docPos=${a.docPosition}, before=${JSON.stringify(a.before.slice(-30))}, after=${JSON.stringify(a.after.slice(0, 30))}`);
-}
-// Run with quiet:false to see warnings
-const out = insertCommentsIntoMarkdown(md, methodsComments, anchors, {
-  quiet: false,
-  wrapAnchor: false,
-});
-// Locate both inserted comment blocks
-const block9 = out.indexOf('Comment 9 on');
-const block10 = out.indexOf('Comment 10 on');
-console.log(`\n#9 inserted at md offset: ${block9}`);
-console.log(`#10 inserted at md offset: ${block10}`);
-console.log(`Surrounding #9: ${JSON.stringify(out.slice(Math.max(0, block9 - 30), block9 + 50))}`);
-console.log(`Surrounding #10: ${JSON.stringify(out.slice(Math.max(0, block10 - 30), block10 + 50))}`);

package/dev_notes/stresstest_boundaries.ts DELETED Viewed

@@ -1,27 +0,0 @@
-import { extractCommentAnchors } from '../lib/import.js';
-const docx = 'C:/GillesC/tmp/docrev-stress/reviewed.docx';
-const { fullDocText } = await extractCommentAnchors(docx);
-function findSectionHeader(text: string, header: string): number {
-  const needle = header.toLowerCase().trim();
-  const lower = text.toLowerCase();
-  let idx = 0;
-  while ((idx = lower.indexOf(needle, idx)) !== -1) {
-    const after = text.slice(idx + needle.length, idx + needle.length + 5);
-    if (!after.startsWith(':') && !after.startsWith(' :')) return idx;
-    idx++;
-  }
-  return -1;
-}
-const headers = ['Abstract', 'Introduction', 'Methods', 'Results', 'Discussion', 'Conclusion', 'References', 'Supplementary Materials'];
-const found = headers.map(h => ({ h, idx: findSectionHeader(fullDocText, h) }))
-  .filter(x => x.idx >= 0)
-  .sort((a, b) => a.idx - b.idx);
-console.log('Sorted boundary positions found:');
-for (const f of found) {
-  const ctx = fullDocText.slice(Math.max(0, f.idx - 20), f.idx + 30).replace(/\s+/g, ' ');
-  console.log(`  ${f.h.padEnd(25)} @ ${f.idx}  ctx: ...${ctx}...`);
-}

package/dev_notes/stresstest_drift_apply.ts DELETED Viewed

@@ -1,43 +0,0 @@
-// Apply targeted drift edits to project-drifted/abstract.md and methods.md
-import * as fs from 'fs';
-const dir = 'C:/GillesC/tmp/docrev-stress/project-drifted';
-function edit(file: string, edits: Array<[string, string]>) {
-  const p = `${dir}/${file}`;
-  let t = fs.readFileSync(p, 'utf-8');
-  for (const [from, to] of edits) {
-    if (!t.includes(from)) {
-      console.error(`!! ${file}: pattern not found: "${from.slice(0, 60)}"`);
-      continue;
-    }
-    t = t.replace(from, to);
-    console.log(`OK ${file}: replaced "${from.slice(0, 60)}..."`);
-  }
-  fs.writeFileSync(p, t);
-}
-// Word swap: 'accelerating' should still be findable via stripped/partial fallback (anchor was a single word, replacing changes the prose)
-// Actually: replacing "accelerating" with "rapid" in the abstract removes the anchor entirely.
-// Some comments have anchor "accelerating" — those should go drift -> unmatched.
-edit('abstract.md', [
-  ['accelerating rates', 'rapid rates'],
-  // Numerical drift: this anchor is "0–20 years" / "57%" — both should fail to direct match
-  ['from 17% (0--20 years) to 57%', 'from 15% (0–25 years) to 60%'],
-  // Anchor-spanning rewrite: replace 'Man-made and ruderal habitats functioned as gateways' with 'Anthropogenic habitats acted as entry points'
-  ['Man-made and ruderal habitats functioned as gateways', 'Anthropogenic habitats acted as entry points'],
-  // Number change in big number
-  ['835,891 vegetation plots', '1,200,000 vegetation plots'],
-]);
-// Delete an entire paragraph from discussion (so anchors there go unmatched)
-const disc = fs.readFileSync(`${dir}/discussion.md`, 'utf-8');
-// Just append a marker to track that we did NOT delete; instead, we'll insert a new prose block to test that comments still land correctly relative to it.
-// For deletion test, find a known phrase and remove its sentence
-edit('discussion.md', [
-  // Insert a new paragraph at top to force position drift in proportion-based placement
-  ['# Discussion\n', '# Discussion\n\n_NOTE: this paragraph was inserted after review. Just an extra block of prose to push everything downward in the markdown so that proportion-based anchor placement gets stress-tested. We pad with several sentences to ensure the offset is meaningful and that fuzzy matchers still find the right targets despite this drift._\n\n'],
-]);
-console.log('drift applied');

package/dev_notes/stresstest_drift_compare.ts DELETED Viewed

@@ -1,43 +0,0 @@
-// Compare pristine vs drifted verify-anchors output
-import * as fs from 'fs';
-const a = JSON.parse(fs.readFileSync('C:/GillesC/tmp/docrev-stress/pristine.json', 'utf-8'));
-const b = JSON.parse(fs.readFileSync('C:/GillesC/tmp/docrev-stress/drifted.json', 'utf-8'));
-console.log('Summary deltas:');
-const keys = Array.from(new Set([...Object.keys(a.summary), ...Object.keys(b.summary)]));
-for (const k of keys) {
-  const av = a.summary[k] ?? 0;
-  const bv = b.summary[k] ?? 0;
-  const delta = bv - av;
-  console.log(`  ${k.padEnd(15)} ${String(av).padStart(4)} -> ${String(bv).padStart(4)}  (delta ${delta >= 0 ? '+' : ''}${delta})`);
-}
-console.log();
-// Show comments whose quality changed (especially those that moved to a worse bucket)
-const aMap = new Map(a.comments.map((c: any) => [c.id, c]));
-console.log('Quality changes:');
-let regressions = 0;
-let improvements = 0;
-for (const c of b.comments) {
-  const prev = aMap.get(c.id) as any;
-  if (!prev) continue;
-  if (prev.quality !== c.quality) {
-    const dir = qualityRank(c.quality) > qualityRank(prev.quality) ? '⬇' : '⬆';
-    if (dir === '⬇') regressions++; else improvements++;
-    console.log(`  ${dir} #${c.id} [${c.section || '—'}] ${prev.quality}/${prev.strategy} -> ${c.quality}/${c.strategy}  anchor="${(c.anchor || '').slice(0, 35)}"`);
-  }
-}
-console.log();
-console.log(`regressions: ${regressions}  improvements: ${improvements}`);
-function qualityRank(q: string): number {
-  switch (q) {
-    case 'clean': return 0;
-    case 'drift': return 1;
-    case 'context-only': return 2;
-    case 'ambiguous': return 1; // ambiguous is sideways, not strictly worse
-    case 'unmatched': return 3;
-    default: return 4;
-  }
-}

package/dev_notes/stresstest_drift_v2.ts DELETED Viewed

@@ -1,54 +0,0 @@
-// Apply drift edits that actually intersect real comment anchors
-import * as fs from 'fs';
-const dir = 'C:/GillesC/tmp/docrev-stress/project-drifted';
-function edit(file: string, edits: Array<[string, string]>) {
-  const p = `${dir}/${file}`;
-  let t = fs.readFileSync(p, 'utf-8');
-  for (const [from, to] of edits) {
-    if (!t.includes(from)) {
-      console.error(`!! ${file}: pattern not found: "${from.slice(0, 60)}"`);
-      continue;
-    }
-    t = t.replace(from, to);
-    console.log(`OK ${file}: replaced "${from.slice(0, 60)}..."`);
-  }
-  fs.writeFileSync(p, t);
-}
-// Each of these intersects real anchors; expected effect noted.
-edit('abstract.md', [
-  // Title: 3 anchors are 'Patterns of habitat niche expansion' (full title) — but title isn't in abstract.md, it's in YAML frontmatter, so probably already failing
-  // 'overrepresented' is the most common anchor (6 comments). Replace it everywhere.
-  // After this, the word 'overrepresented' is gone → all those comments should go to context-only or unmatched
-  ['overrepresented in more than one habitat type', 'preferentially distributed across habitat types'],
-  // 'undisturbed' appears in 'undisturbed habitat types' → 2 comments
-  ['undisturbed habitat types', 'pristine vegetation communities'],
-  // 'human economies' (3 anchors)
-  ['human economies', 'economic activity'],
-  // 'nutrient-rich'
-  ['nutrient-rich, disturbed habitats', 'anthropic, modified environments'],
-  // 'when residence time was prolonged.' — long anchor, should go to drift / partial-start
-  ['when residence time was prolonged.', 'as residence time increased.'],
-  // 'pervasive' (1 anchor)
-  ['Invasion debt in terms of habitat niche breadth is pervasive', 'Invasion debt in terms of habitat niche breadth is widespread'],
-  // 'alpine habitats' (1 anchor)
-  ['alpine habitats', 'high-altitude environments'],
-  // 'semi-natural vegetation' (2 anchors)
-  ['semi-natural vegetation', 'natural plant communities'],
-  // 'Man-made habitats function as gateways' (1 anchor)
-  ['Man-made habitats function as gateways', 'Anthropogenic environments serve as entry routes'],
-  // 'Invasion debt in terms of habitat niche breadth' (2 anchors)
-  // Already mutated above (pervasive→widespread keeps anchor matchable). Add another small change to test partial-start
-  // 'habitat niche expansion' — anchor for #27
-  ['habitat niche expansion', 'ecological niche broadening'],
-  // ', yet' anchor for #19
-  [', yet', ', however'],
-  // '44' — short numeric anchor
-  ['44%', '53%'],
-  // '17 habitats'
-  ['17 habitats', '18 habitats'],
-]);
-console.log('---done');

package/dev_notes/stresstest_inspect.ts DELETED Viewed

@@ -1,54 +0,0 @@
-import { extractWordComments, extractCommentAnchors } from '../lib/import.js';
-const docx = 'C:/GillesC/tmp/docrev-stress/reviewed.docx';
-const comments = await extractWordComments(docx);
-const { anchors, fullDocText } = await extractCommentAnchors(docx);
-console.log(`comments: ${comments.length}`);
-console.log(`anchors:  ${anchors.size}`);
-console.log(`docText:  ${fullDocText.length} chars`);
-console.log();
-const byAuthor: Record<string, number> = {};
-for (const c of comments) byAuthor[c.author] = (byAuthor[c.author] || 0) + 1;
-console.log('by author:', byAuthor);
-console.log();
-console.log('first 5 comments:');
-for (const c of comments.slice(0, 5)) {
-  const a = anchors.get(c.id);
-  const pos = a?.docPosition ?? -1;
-  const anchor = (a?.anchor || '').slice(0, 60);
-  const text = c.text.replace(/\s+/g, ' ').slice(0, 80);
-  console.log(`  #${c.id} [${c.author}] pos=${pos} anchor="${anchor}" text="${text}"`);
-}
-console.log();
-console.log('last 3 comments:');
-for (const c of comments.slice(-3)) {
-  const a = anchors.get(c.id);
-  const pos = a?.docPosition ?? -1;
-  const anchor = (a?.anchor || '').slice(0, 60);
-  const text = c.text.replace(/\s+/g, ' ').slice(0, 80);
-  console.log(`  #${c.id} [${c.author}] pos=${pos} anchor="${anchor}" text="${text}"`);
-}
-console.log();
-const sectionKeywords = ['Abstract', 'Introduction', 'Methods', 'Results', 'Discussion', 'Conclusion', 'References', 'Acknowledgements', 'Data Availability', 'Author Contributions'];
-console.log('candidate headings:');
-for (const kw of sectionKeywords) {
-  const idx = fullDocText.indexOf(kw);
-  if (idx >= 0) {
-    const context = fullDocText.slice(idx, idx + 80).replace(/\s+/g, ' ');
-    console.log(`  ${kw} @ ${idx}: "${context}"`);
-  }
-}
-// Check anchor distribution to understand section spans
-const positions = [...anchors.values()].map(a => a.docPosition).sort((a, b) => a - b);
-console.log();
-console.log(`anchor positions: min=${positions[0]} max=${positions[positions.length-1]} median=${positions[Math.floor(positions.length/2)]}`);
-// Check for empty anchors
-const empty = [...anchors.values()].filter(a => a.isEmpty).length;
-console.log(`empty anchors: ${empty}`);