docrev 0.9.11 → 0.9.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/.claude/settings.local.json +9 -9
  2. package/.gitattributes +1 -1
  3. package/CHANGELOG.md +149 -149
  4. package/PLAN-tables-and-postprocess.md +850 -850
  5. package/README.md +391 -391
  6. package/bin/rev.js +11 -11
  7. package/bin/rev.ts +145 -145
  8. package/completions/rev.bash +127 -127
  9. package/completions/rev.ps1 +210 -210
  10. package/completions/rev.zsh +207 -207
  11. package/dev_notes/stress2/build_adversarial.ts +186 -186
  12. package/dev_notes/stress2/drift_matcher.ts +62 -62
  13. package/dev_notes/stress2/probe_anchors.ts +35 -35
  14. package/dev_notes/stress2/project/discussion.before.md +3 -3
  15. package/dev_notes/stress2/project/discussion.md +3 -3
  16. package/dev_notes/stress2/project/methods.before.md +20 -20
  17. package/dev_notes/stress2/project/methods.md +20 -20
  18. package/dev_notes/stress2/project/rev.yaml +5 -5
  19. package/dev_notes/stress2/project/sections.yaml +4 -4
  20. package/dev_notes/stress2/sections.yaml +5 -5
  21. package/dev_notes/stress2/trace_placement.ts +50 -50
  22. package/dev_notes/stresstest_boundaries.ts +27 -27
  23. package/dev_notes/stresstest_drift_apply.ts +43 -43
  24. package/dev_notes/stresstest_drift_compare.ts +43 -43
  25. package/dev_notes/stresstest_drift_v2.ts +54 -54
  26. package/dev_notes/stresstest_inspect.ts +54 -54
  27. package/dev_notes/stresstest_pstyle.ts +55 -55
  28. package/dev_notes/stresstest_section_debug.ts +23 -23
  29. package/dev_notes/stresstest_split.ts +70 -70
  30. package/dev_notes/stresstest_trace.ts +19 -19
  31. package/dev_notes/stresstest_verify_no_overwrite.ts +40 -40
  32. package/dist/lib/build.d.ts +50 -1
  33. package/dist/lib/build.d.ts.map +1 -1
  34. package/dist/lib/build.js +80 -30
  35. package/dist/lib/build.js.map +1 -1
  36. package/dist/lib/commands/build.d.ts.map +1 -1
  37. package/dist/lib/commands/build.js +38 -5
  38. package/dist/lib/commands/build.js.map +1 -1
  39. package/dist/lib/commands/utilities.js +164 -164
  40. package/dist/lib/commands/word-tools.js +8 -8
  41. package/dist/lib/grammar.js +3 -3
  42. package/dist/lib/import.d.ts.map +1 -1
  43. package/dist/lib/import.js +146 -24
  44. package/dist/lib/import.js.map +1 -1
  45. package/dist/lib/pdf-comments.js +44 -44
  46. package/dist/lib/plugins.js +57 -57
  47. package/dist/lib/pptx-themes.js +115 -115
  48. package/dist/lib/spelling.js +2 -2
  49. package/dist/lib/templates.js +387 -387
  50. package/dist/lib/themes.js +51 -51
  51. package/dist/lib/types.d.ts +20 -0
  52. package/dist/lib/types.d.ts.map +1 -1
  53. package/dist/lib/word-extraction.d.ts +6 -0
  54. package/dist/lib/word-extraction.d.ts.map +1 -1
  55. package/dist/lib/word-extraction.js +46 -3
  56. package/dist/lib/word-extraction.js.map +1 -1
  57. package/dist/lib/wordcomments.d.ts.map +1 -1
  58. package/dist/lib/wordcomments.js +23 -5
  59. package/dist/lib/wordcomments.js.map +1 -1
  60. package/eslint.config.js +27 -27
  61. package/lib/anchor-match.ts +276 -276
  62. package/lib/annotations.ts +644 -644
  63. package/lib/build.ts +1300 -1227
  64. package/lib/citations.ts +160 -160
  65. package/lib/commands/build.ts +833 -801
  66. package/lib/commands/citations.ts +515 -515
  67. package/lib/commands/comments.ts +1050 -1050
  68. package/lib/commands/context.ts +174 -174
  69. package/lib/commands/core.ts +309 -309
  70. package/lib/commands/doi.ts +435 -435
  71. package/lib/commands/file-ops.ts +372 -372
  72. package/lib/commands/history.ts +320 -320
  73. package/lib/commands/index.ts +87 -87
  74. package/lib/commands/init.ts +259 -259
  75. package/lib/commands/merge-resolve.ts +378 -378
  76. package/lib/commands/preview.ts +178 -178
  77. package/lib/commands/project-info.ts +244 -244
  78. package/lib/commands/quality.ts +517 -517
  79. package/lib/commands/response.ts +454 -454
  80. package/lib/commands/section-boundaries.ts +82 -82
  81. package/lib/commands/sections.ts +451 -451
  82. package/lib/commands/sync.ts +706 -706
  83. package/lib/commands/text-ops.ts +449 -449
  84. package/lib/commands/utilities.ts +448 -448
  85. package/lib/commands/verify-anchors.ts +272 -272
  86. package/lib/commands/word-tools.ts +340 -340
  87. package/lib/comment-realign.ts +517 -517
  88. package/lib/config.ts +84 -84
  89. package/lib/crossref.ts +781 -781
  90. package/lib/csl.ts +191 -191
  91. package/lib/dependencies.ts +98 -98
  92. package/lib/diff-engine.ts +465 -465
  93. package/lib/doi-cache.ts +115 -115
  94. package/lib/doi.ts +897 -897
  95. package/lib/equations.ts +506 -506
  96. package/lib/errors.ts +346 -346
  97. package/lib/format.ts +541 -541
  98. package/lib/git.ts +326 -326
  99. package/lib/grammar.ts +303 -303
  100. package/lib/image-registry.ts +180 -180
  101. package/lib/import.ts +911 -792
  102. package/lib/journals.ts +543 -543
  103. package/lib/merge.ts +633 -633
  104. package/lib/orcid.ts +144 -144
  105. package/lib/pdf-comments.ts +263 -263
  106. package/lib/pdf-import.ts +524 -524
  107. package/lib/plugins.ts +362 -362
  108. package/lib/postprocess.ts +188 -188
  109. package/lib/pptx-color-filter.lua +37 -37
  110. package/lib/pptx-template.ts +469 -469
  111. package/lib/pptx-themes.ts +483 -483
  112. package/lib/protect-restore.ts +520 -520
  113. package/lib/rate-limiter.ts +94 -94
  114. package/lib/response.ts +197 -197
  115. package/lib/restore-references.ts +240 -240
  116. package/lib/review.ts +327 -327
  117. package/lib/schema.ts +417 -417
  118. package/lib/scientific-words.ts +73 -73
  119. package/lib/sections.ts +335 -335
  120. package/lib/slides.ts +756 -756
  121. package/lib/spelling.ts +334 -334
  122. package/lib/templates.ts +526 -526
  123. package/lib/themes.ts +742 -742
  124. package/lib/trackchanges.ts +247 -247
  125. package/lib/tui.ts +450 -450
  126. package/lib/types.ts +550 -530
  127. package/lib/undo.ts +250 -250
  128. package/lib/utils.ts +69 -69
  129. package/lib/variables.ts +179 -179
  130. package/lib/word-extraction.ts +806 -759
  131. package/lib/word.ts +643 -643
  132. package/lib/wordcomments.ts +817 -798
  133. package/package.json +137 -137
  134. package/scripts/postbuild.js +28 -28
  135. package/skill/REFERENCE.md +431 -431
  136. package/skill/SKILL.md +258 -258
  137. package/tsconfig.json +26 -26
  138. package/types/index.d.ts +525 -525
@@ -1,20 +1,20 @@
1
- # Methods
2
-
3
- Below we describe{>>Reviewer: Comment 0 on "methods section" — text contains <embedded brackets and &.<<} our methodology in detail.
4
-
5
- The {>>Reviewer: Comment 1 on "model" — text contains <embedded brackets and &.<<}model is fitted; later, the model is reported.
6
-
7
- Reviewers often place a marker between two sentences. Like this. Then{>>Reviewer: Comment 2 on "" — text contains <embedded brackets and &.<<} continue.
8
-
9
- Our results show that {>>Reviewer: Comment 3 on "p < 0.001" — text contains <embedded brackets and &.<<}p < 0.001 (n=412) across all conditions.
10
-
11
- Trade volumes were modest pre-industrial {>>Reviewer: Comment 4 on "(<1825)" — text contains <embedded brackets and &.<<}(<1825).
12
-
13
- {>>Reviewer: Comment 5 on "We used a hierarchical Bayesian model with weakly informative priors and Hamiltonian Monte Carlo sampling implemented in Stan." — text contains <embedded brackets and &.<<}We used a hierarchical Bayesian model with weakly informative priors and Hamiltonian Monte Carlo sampling implemented in Stan.
14
-
15
- Round numbers are reported{>>Reviewer: Comment 6 on ";" — text contains <embedded brackets and &.<<} in Table 1.
16
-
17
- The {>>Reviewer: Comment 7 on "overrepresented species" — text contains <embedded brackets and &.<<}overrepresented{>>Reviewer: Comment 8 on "overrepresented" — text contains <embedded brackets and &.<<} species are listed in Appendix A.
18
-
19
- The cohort was {>>Reviewer: Comment 9 on "small" — text contains <embedded brackets and &.<<}small. Limitations are discussed in Section 5.
20
- The effect was {>>Reviewer: Comment 10 on "small" — text contains <embedded brackets and &.<<}small but significant.
1
+ # Methods
2
+
3
+ Below we describe{>>Reviewer: Comment 0 on "methods section" — text contains <embedded brackets and &.<<} our methodology in detail.
4
+
5
+ The {>>Reviewer: Comment 1 on "model" — text contains <embedded brackets and &.<<}model is fitted; later, the model is reported.
6
+
7
+ Reviewers often place a marker between two sentences. Like this. Then{>>Reviewer: Comment 2 on "" — text contains <embedded brackets and &.<<} continue.
8
+
9
+ Our results show that {>>Reviewer: Comment 3 on "p < 0.001" — text contains <embedded brackets and &.<<}p < 0.001 (n=412) across all conditions.
10
+
11
+ Trade volumes were modest pre-industrial {>>Reviewer: Comment 4 on "(<1825)" — text contains <embedded brackets and &.<<}(<1825).
12
+
13
+ {>>Reviewer: Comment 5 on "We used a hierarchical Bayesian model with weakly informative priors and Hamiltonian Monte Carlo sampling implemented in Stan." — text contains <embedded brackets and &.<<}We used a hierarchical Bayesian model with weakly informative priors and Hamiltonian Monte Carlo sampling implemented in Stan.
14
+
15
+ Round numbers are reported{>>Reviewer: Comment 6 on ";" — text contains <embedded brackets and &.<<} in Table 1.
16
+
17
+ The {>>Reviewer: Comment 7 on "overrepresented species" — text contains <embedded brackets and &.<<}overrepresented{>>Reviewer: Comment 8 on "overrepresented" — text contains <embedded brackets and &.<<} species are listed in Appendix A.
18
+
19
+ The cohort was {>>Reviewer: Comment 9 on "small" — text contains <embedded brackets and &.<<}small. Limitations are discussed in Section 5.
20
+ The effect was {>>Reviewer: Comment 10 on "small" — text contains <embedded brackets and &.<<}small but significant.
@@ -1,5 +1,5 @@
1
- title: stress2
2
- authors: []
3
- sections:
4
- - methods
5
- - discussion
1
+ title: stress2
2
+ authors: []
3
+ sections:
4
+ - methods
5
+ - discussion
@@ -1,4 +1,4 @@
1
- version: 1
2
- sections:
3
- methods.md: Methods
4
- discussion.md: Discussion
1
+ version: 1
2
+ sections:
3
+ methods.md: Methods
4
+ discussion.md: Discussion
@@ -1,5 +1,5 @@
1
- sections:
2
- - file: methods.md
3
- header: Methods
4
- - file: discussion.md
5
- header: Discussion
1
+ sections:
2
+ - file: methods.md
3
+ header: Methods
4
+ - file: discussion.md
5
+ header: Discussion
@@ -1,50 +1,50 @@
1
- /**
2
- * Trace exactly where insertCommentsIntoMarkdown places each comment.
3
- * Reproduces the methods.md sync to see if disambiguation works.
4
- */
5
- import { extractCommentAnchors, extractWordComments } from '../../lib/word-extraction.js';
6
- import { insertCommentsIntoMarkdown } from '../../lib/import.js';
7
- import * as fs from 'fs';
8
-
9
- const docx = 'dev_notes/stress2/adversarial.docx';
10
- const md = fs.readFileSync('dev_notes/stress2/project/methods.before.md', 'utf-8');
11
-
12
- const { anchors } = await extractCommentAnchors(docx);
13
- const comments = await extractWordComments(docx);
14
-
15
- // Filter to methods (everything except #11)
16
- const methodsComments = comments.filter((c: any) => c.id !== '11');
17
-
18
- console.log('Markdown length:', md.length);
19
- console.log('Comments to place:', methodsComments.length);
20
-
21
- const smallPositions: number[] = [];
22
- let i = 0;
23
- while ((i = md.indexOf('small', i)) !== -1) {
24
- smallPositions.push(i);
25
- i += 5;
26
- }
27
- console.log('"small" occurrences in md at:', smallPositions);
28
- for (const p of smallPositions) {
29
- console.log(` pos ${p}: ...${JSON.stringify(md.slice(Math.max(0, p - 30), p + 30))}...`);
30
- }
31
-
32
- console.log('\nDocx anchor data for #9 and #10:');
33
- for (const id of ['9', '10']) {
34
- const a = anchors.get(id)!;
35
- console.log(` #${id}: docPos=${a.docPosition}, before=${JSON.stringify(a.before.slice(-30))}, after=${JSON.stringify(a.after.slice(0, 30))}`);
36
- }
37
-
38
- // Run with quiet:false to see warnings
39
- const out = insertCommentsIntoMarkdown(md, methodsComments, anchors, {
40
- quiet: false,
41
- wrapAnchor: false,
42
- });
43
-
44
- // Locate both inserted comment blocks
45
- const block9 = out.indexOf('Comment 9 on');
46
- const block10 = out.indexOf('Comment 10 on');
47
- console.log(`\n#9 inserted at md offset: ${block9}`);
48
- console.log(`#10 inserted at md offset: ${block10}`);
49
- console.log(`Surrounding #9: ${JSON.stringify(out.slice(Math.max(0, block9 - 30), block9 + 50))}`);
50
- console.log(`Surrounding #10: ${JSON.stringify(out.slice(Math.max(0, block10 - 30), block10 + 50))}`);
1
+ /**
2
+ * Trace exactly where insertCommentsIntoMarkdown places each comment.
3
+ * Reproduces the methods.md sync to see if disambiguation works.
4
+ */
5
+ import { extractCommentAnchors, extractWordComments } from '../../lib/word-extraction.js';
6
+ import { insertCommentsIntoMarkdown } from '../../lib/import.js';
7
+ import * as fs from 'fs';
8
+
9
+ const docx = 'dev_notes/stress2/adversarial.docx';
10
+ const md = fs.readFileSync('dev_notes/stress2/project/methods.before.md', 'utf-8');
11
+
12
+ const { anchors } = await extractCommentAnchors(docx);
13
+ const comments = await extractWordComments(docx);
14
+
15
+ // Filter to methods (everything except #11)
16
+ const methodsComments = comments.filter((c: any) => c.id !== '11');
17
+
18
+ console.log('Markdown length:', md.length);
19
+ console.log('Comments to place:', methodsComments.length);
20
+
21
+ const smallPositions: number[] = [];
22
+ let i = 0;
23
+ while ((i = md.indexOf('small', i)) !== -1) {
24
+ smallPositions.push(i);
25
+ i += 5;
26
+ }
27
+ console.log('"small" occurrences in md at:', smallPositions);
28
+ for (const p of smallPositions) {
29
+ console.log(` pos ${p}: ...${JSON.stringify(md.slice(Math.max(0, p - 30), p + 30))}...`);
30
+ }
31
+
32
+ console.log('\nDocx anchor data for #9 and #10:');
33
+ for (const id of ['9', '10']) {
34
+ const a = anchors.get(id)!;
35
+ console.log(` #${id}: docPos=${a.docPosition}, before=${JSON.stringify(a.before.slice(-30))}, after=${JSON.stringify(a.after.slice(0, 30))}`);
36
+ }
37
+
38
+ // Run with quiet:false to see warnings
39
+ const out = insertCommentsIntoMarkdown(md, methodsComments, anchors, {
40
+ quiet: false,
41
+ wrapAnchor: false,
42
+ });
43
+
44
+ // Locate both inserted comment blocks
45
+ const block9 = out.indexOf('Comment 9 on');
46
+ const block10 = out.indexOf('Comment 10 on');
47
+ console.log(`\n#9 inserted at md offset: ${block9}`);
48
+ console.log(`#10 inserted at md offset: ${block10}`);
49
+ console.log(`Surrounding #9: ${JSON.stringify(out.slice(Math.max(0, block9 - 30), block9 + 50))}`);
50
+ console.log(`Surrounding #10: ${JSON.stringify(out.slice(Math.max(0, block10 - 30), block10 + 50))}`);
@@ -1,27 +1,27 @@
1
- import { extractCommentAnchors } from '../lib/import.js';
2
-
3
- const docx = 'C:/GillesC/tmp/docrev-stress/reviewed.docx';
4
- const { fullDocText } = await extractCommentAnchors(docx);
5
-
6
- function findSectionHeader(text: string, header: string): number {
7
- const needle = header.toLowerCase().trim();
8
- const lower = text.toLowerCase();
9
- let idx = 0;
10
- while ((idx = lower.indexOf(needle, idx)) !== -1) {
11
- const after = text.slice(idx + needle.length, idx + needle.length + 5);
12
- if (!after.startsWith(':') && !after.startsWith(' :')) return idx;
13
- idx++;
14
- }
15
- return -1;
16
- }
17
-
18
- const headers = ['Abstract', 'Introduction', 'Methods', 'Results', 'Discussion', 'Conclusion', 'References', 'Supplementary Materials'];
19
- const found = headers.map(h => ({ h, idx: findSectionHeader(fullDocText, h) }))
20
- .filter(x => x.idx >= 0)
21
- .sort((a, b) => a.idx - b.idx);
22
-
23
- console.log('Sorted boundary positions found:');
24
- for (const f of found) {
25
- const ctx = fullDocText.slice(Math.max(0, f.idx - 20), f.idx + 30).replace(/\s+/g, ' ');
26
- console.log(` ${f.h.padEnd(25)} @ ${f.idx} ctx: ...${ctx}...`);
27
- }
1
+ import { extractCommentAnchors } from '../lib/import.js';
2
+
3
+ const docx = 'C:/GillesC/tmp/docrev-stress/reviewed.docx';
4
+ const { fullDocText } = await extractCommentAnchors(docx);
5
+
6
+ function findSectionHeader(text: string, header: string): number {
7
+ const needle = header.toLowerCase().trim();
8
+ const lower = text.toLowerCase();
9
+ let idx = 0;
10
+ while ((idx = lower.indexOf(needle, idx)) !== -1) {
11
+ const after = text.slice(idx + needle.length, idx + needle.length + 5);
12
+ if (!after.startsWith(':') && !after.startsWith(' :')) return idx;
13
+ idx++;
14
+ }
15
+ return -1;
16
+ }
17
+
18
+ const headers = ['Abstract', 'Introduction', 'Methods', 'Results', 'Discussion', 'Conclusion', 'References', 'Supplementary Materials'];
19
+ const found = headers.map(h => ({ h, idx: findSectionHeader(fullDocText, h) }))
20
+ .filter(x => x.idx >= 0)
21
+ .sort((a, b) => a.idx - b.idx);
22
+
23
+ console.log('Sorted boundary positions found:');
24
+ for (const f of found) {
25
+ const ctx = fullDocText.slice(Math.max(0, f.idx - 20), f.idx + 30).replace(/\s+/g, ' ');
26
+ console.log(` ${f.h.padEnd(25)} @ ${f.idx} ctx: ...${ctx}...`);
27
+ }
@@ -1,43 +1,43 @@
1
- // Apply targeted drift edits to project-drifted/abstract.md and methods.md
2
- import * as fs from 'fs';
3
-
4
- const dir = 'C:/GillesC/tmp/docrev-stress/project-drifted';
5
-
6
- function edit(file: string, edits: Array<[string, string]>) {
7
- const p = `${dir}/${file}`;
8
- let t = fs.readFileSync(p, 'utf-8');
9
- for (const [from, to] of edits) {
10
- if (!t.includes(from)) {
11
- console.error(`!! ${file}: pattern not found: "${from.slice(0, 60)}"`);
12
- continue;
13
- }
14
- t = t.replace(from, to);
15
- console.log(`OK ${file}: replaced "${from.slice(0, 60)}..."`);
16
- }
17
- fs.writeFileSync(p, t);
18
- }
19
-
20
- // Word swap: 'accelerating' should still be findable via stripped/partial fallback (anchor was a single word, replacing changes the prose)
21
- // Actually: replacing "accelerating" with "rapid" in the abstract removes the anchor entirely.
22
- // Some comments have anchor "accelerating" — those should go drift -> unmatched.
23
-
24
- edit('abstract.md', [
25
- ['accelerating rates', 'rapid rates'],
26
- // Numerical drift: this anchor is "0–20 years" / "57%" — both should fail to direct match
27
- ['from 17% (0--20 years) to 57%', 'from 15% (0–25 years) to 60%'],
28
- // Anchor-spanning rewrite: replace 'Man-made and ruderal habitats functioned as gateways' with 'Anthropogenic habitats acted as entry points'
29
- ['Man-made and ruderal habitats functioned as gateways', 'Anthropogenic habitats acted as entry points'],
30
- // Number change in big number
31
- ['835,891 vegetation plots', '1,200,000 vegetation plots'],
32
- ]);
33
-
34
- // Delete an entire paragraph from discussion (so anchors there go unmatched)
35
- const disc = fs.readFileSync(`${dir}/discussion.md`, 'utf-8');
36
- // Just append a marker to track that we did NOT delete; instead, we'll insert a new prose block to test that comments still land correctly relative to it.
37
- // For deletion test, find a known phrase and remove its sentence
38
- edit('discussion.md', [
39
- // Insert a new paragraph at top to force position drift in proportion-based placement
40
- ['# Discussion\n', '# Discussion\n\n_NOTE: this paragraph was inserted after review. Just an extra block of prose to push everything downward in the markdown so that proportion-based anchor placement gets stress-tested. We pad with several sentences to ensure the offset is meaningful and that fuzzy matchers still find the right targets despite this drift._\n\n'],
41
- ]);
42
-
43
- console.log('drift applied');
1
+ // Apply targeted drift edits to project-drifted/abstract.md and methods.md
2
+ import * as fs from 'fs';
3
+
4
+ const dir = 'C:/GillesC/tmp/docrev-stress/project-drifted';
5
+
6
+ function edit(file: string, edits: Array<[string, string]>) {
7
+ const p = `${dir}/${file}`;
8
+ let t = fs.readFileSync(p, 'utf-8');
9
+ for (const [from, to] of edits) {
10
+ if (!t.includes(from)) {
11
+ console.error(`!! ${file}: pattern not found: "${from.slice(0, 60)}"`);
12
+ continue;
13
+ }
14
+ t = t.replace(from, to);
15
+ console.log(`OK ${file}: replaced "${from.slice(0, 60)}..."`);
16
+ }
17
+ fs.writeFileSync(p, t);
18
+ }
19
+
20
+ // Word swap: 'accelerating' should still be findable via stripped/partial fallback (anchor was a single word, replacing changes the prose)
21
+ // Actually: replacing "accelerating" with "rapid" in the abstract removes the anchor entirely.
22
+ // Some comments have anchor "accelerating" — those should go drift -> unmatched.
23
+
24
+ edit('abstract.md', [
25
+ ['accelerating rates', 'rapid rates'],
26
+ // Numerical drift: this anchor is "0–20 years" / "57%" — both should fail to direct match
27
+ ['from 17% (0--20 years) to 57%', 'from 15% (0–25 years) to 60%'],
28
+ // Anchor-spanning rewrite: replace 'Man-made and ruderal habitats functioned as gateways' with 'Anthropogenic habitats acted as entry points'
29
+ ['Man-made and ruderal habitats functioned as gateways', 'Anthropogenic habitats acted as entry points'],
30
+ // Number change in big number
31
+ ['835,891 vegetation plots', '1,200,000 vegetation plots'],
32
+ ]);
33
+
34
+ // Delete an entire paragraph from discussion (so anchors there go unmatched)
35
+ const disc = fs.readFileSync(`${dir}/discussion.md`, 'utf-8');
36
+ // Just append a marker to track that we did NOT delete; instead, we'll insert a new prose block to test that comments still land correctly relative to it.
37
+ // For deletion test, find a known phrase and remove its sentence
38
+ edit('discussion.md', [
39
+ // Insert a new paragraph at top to force position drift in proportion-based placement
40
+ ['# Discussion\n', '# Discussion\n\n_NOTE: this paragraph was inserted after review. Just an extra block of prose to push everything downward in the markdown so that proportion-based anchor placement gets stress-tested. We pad with several sentences to ensure the offset is meaningful and that fuzzy matchers still find the right targets despite this drift._\n\n'],
41
+ ]);
42
+
43
+ console.log('drift applied');
@@ -1,43 +1,43 @@
1
- // Compare pristine vs drifted verify-anchors output
2
- import * as fs from 'fs';
3
-
4
- const a = JSON.parse(fs.readFileSync('C:/GillesC/tmp/docrev-stress/pristine.json', 'utf-8'));
5
- const b = JSON.parse(fs.readFileSync('C:/GillesC/tmp/docrev-stress/drifted.json', 'utf-8'));
6
-
7
- console.log('Summary deltas:');
8
- const keys = Array.from(new Set([...Object.keys(a.summary), ...Object.keys(b.summary)]));
9
- for (const k of keys) {
10
- const av = a.summary[k] ?? 0;
11
- const bv = b.summary[k] ?? 0;
12
- const delta = bv - av;
13
- console.log(` ${k.padEnd(15)} ${String(av).padStart(4)} -> ${String(bv).padStart(4)} (delta ${delta >= 0 ? '+' : ''}${delta})`);
14
- }
15
- console.log();
16
-
17
- // Show comments whose quality changed (especially those that moved to a worse bucket)
18
- const aMap = new Map(a.comments.map((c: any) => [c.id, c]));
19
- console.log('Quality changes:');
20
- let regressions = 0;
21
- let improvements = 0;
22
- for (const c of b.comments) {
23
- const prev = aMap.get(c.id) as any;
24
- if (!prev) continue;
25
- if (prev.quality !== c.quality) {
26
- const dir = qualityRank(c.quality) > qualityRank(prev.quality) ? '⬇' : '⬆';
27
- if (dir === '⬇') regressions++; else improvements++;
28
- console.log(` ${dir} #${c.id} [${c.section || '—'}] ${prev.quality}/${prev.strategy} -> ${c.quality}/${c.strategy} anchor="${(c.anchor || '').slice(0, 35)}"`);
29
- }
30
- }
31
- console.log();
32
- console.log(`regressions: ${regressions} improvements: ${improvements}`);
33
-
34
- function qualityRank(q: string): number {
35
- switch (q) {
36
- case 'clean': return 0;
37
- case 'drift': return 1;
38
- case 'context-only': return 2;
39
- case 'ambiguous': return 1; // ambiguous is sideways, not strictly worse
40
- case 'unmatched': return 3;
41
- default: return 4;
42
- }
43
- }
1
+ // Compare pristine vs drifted verify-anchors output
2
+ import * as fs from 'fs';
3
+
4
+ const a = JSON.parse(fs.readFileSync('C:/GillesC/tmp/docrev-stress/pristine.json', 'utf-8'));
5
+ const b = JSON.parse(fs.readFileSync('C:/GillesC/tmp/docrev-stress/drifted.json', 'utf-8'));
6
+
7
+ console.log('Summary deltas:');
8
+ const keys = Array.from(new Set([...Object.keys(a.summary), ...Object.keys(b.summary)]));
9
+ for (const k of keys) {
10
+ const av = a.summary[k] ?? 0;
11
+ const bv = b.summary[k] ?? 0;
12
+ const delta = bv - av;
13
+ console.log(` ${k.padEnd(15)} ${String(av).padStart(4)} -> ${String(bv).padStart(4)} (delta ${delta >= 0 ? '+' : ''}${delta})`);
14
+ }
15
+ console.log();
16
+
17
+ // Show comments whose quality changed (especially those that moved to a worse bucket)
18
+ const aMap = new Map(a.comments.map((c: any) => [c.id, c]));
19
+ console.log('Quality changes:');
20
+ let regressions = 0;
21
+ let improvements = 0;
22
+ for (const c of b.comments) {
23
+ const prev = aMap.get(c.id) as any;
24
+ if (!prev) continue;
25
+ if (prev.quality !== c.quality) {
26
+ const dir = qualityRank(c.quality) > qualityRank(prev.quality) ? '⬇' : '⬆';
27
+ if (dir === '⬇') regressions++; else improvements++;
28
+ console.log(` ${dir} #${c.id} [${c.section || '—'}] ${prev.quality}/${prev.strategy} -> ${c.quality}/${c.strategy} anchor="${(c.anchor || '').slice(0, 35)}"`);
29
+ }
30
+ }
31
+ console.log();
32
+ console.log(`regressions: ${regressions} improvements: ${improvements}`);
33
+
34
+ function qualityRank(q: string): number {
35
+ switch (q) {
36
+ case 'clean': return 0;
37
+ case 'drift': return 1;
38
+ case 'context-only': return 2;
39
+ case 'ambiguous': return 1; // ambiguous is sideways, not strictly worse
40
+ case 'unmatched': return 3;
41
+ default: return 4;
42
+ }
43
+ }
@@ -1,54 +1,54 @@
1
- // Apply drift edits that actually intersect real comment anchors
2
- import * as fs from 'fs';
3
-
4
- const dir = 'C:/GillesC/tmp/docrev-stress/project-drifted';
5
-
6
- function edit(file: string, edits: Array<[string, string]>) {
7
- const p = `${dir}/${file}`;
8
- let t = fs.readFileSync(p, 'utf-8');
9
- for (const [from, to] of edits) {
10
- if (!t.includes(from)) {
11
- console.error(`!! ${file}: pattern not found: "${from.slice(0, 60)}"`);
12
- continue;
13
- }
14
- t = t.replace(from, to);
15
- console.log(`OK ${file}: replaced "${from.slice(0, 60)}..."`);
16
- }
17
- fs.writeFileSync(p, t);
18
- }
19
-
20
- // Each of these intersects real anchors; expected effect noted.
21
- edit('abstract.md', [
22
- // Title: 3 anchors are 'Patterns of habitat niche expansion' (full title) — but title isn't in abstract.md, it's in YAML frontmatter, so probably already failing
23
- // 'overrepresented' is the most common anchor (6 comments). Replace it everywhere.
24
- // After this, the word 'overrepresented' is gone → all those comments should go to context-only or unmatched
25
- ['overrepresented in more than one habitat type', 'preferentially distributed across habitat types'],
26
- // 'undisturbed' appears in 'undisturbed habitat types' → 2 comments
27
- ['undisturbed habitat types', 'pristine vegetation communities'],
28
- // 'human economies' (3 anchors)
29
- ['human economies', 'economic activity'],
30
- // 'nutrient-rich'
31
- ['nutrient-rich, disturbed habitats', 'anthropic, modified environments'],
32
- // 'when residence time was prolonged.' — long anchor, should go to drift / partial-start
33
- ['when residence time was prolonged.', 'as residence time increased.'],
34
- // 'pervasive' (1 anchor)
35
- ['Invasion debt in terms of habitat niche breadth is pervasive', 'Invasion debt in terms of habitat niche breadth is widespread'],
36
- // 'alpine habitats' (1 anchor)
37
- ['alpine habitats', 'high-altitude environments'],
38
- // 'semi-natural vegetation' (2 anchors)
39
- ['semi-natural vegetation', 'natural plant communities'],
40
- // 'Man-made habitats function as gateways' (1 anchor)
41
- ['Man-made habitats function as gateways', 'Anthropogenic environments serve as entry routes'],
42
- // 'Invasion debt in terms of habitat niche breadth' (2 anchors)
43
- // Already mutated above (pervasive→widespread keeps anchor matchable). Add another small change to test partial-start
44
- // 'habitat niche expansion' — anchor for #27
45
- ['habitat niche expansion', 'ecological niche broadening'],
46
- // ', yet' anchor for #19
47
- [', yet', ', however'],
48
- // '44' — short numeric anchor
49
- ['44%', '53%'],
50
- // '17 habitats'
51
- ['17 habitats', '18 habitats'],
52
- ]);
53
-
54
- console.log('---done');
1
+ // Apply drift edits that actually intersect real comment anchors
2
+ import * as fs from 'fs';
3
+
4
+ const dir = 'C:/GillesC/tmp/docrev-stress/project-drifted';
5
+
6
+ function edit(file: string, edits: Array<[string, string]>) {
7
+ const p = `${dir}/${file}`;
8
+ let t = fs.readFileSync(p, 'utf-8');
9
+ for (const [from, to] of edits) {
10
+ if (!t.includes(from)) {
11
+ console.error(`!! ${file}: pattern not found: "${from.slice(0, 60)}"`);
12
+ continue;
13
+ }
14
+ t = t.replace(from, to);
15
+ console.log(`OK ${file}: replaced "${from.slice(0, 60)}..."`);
16
+ }
17
+ fs.writeFileSync(p, t);
18
+ }
19
+
20
+ // Each of these intersects real anchors; expected effect noted.
21
+ edit('abstract.md', [
22
+ // Title: 3 anchors are 'Patterns of habitat niche expansion' (full title) — but title isn't in abstract.md, it's in YAML frontmatter, so probably already failing
23
+ // 'overrepresented' is the most common anchor (6 comments). Replace it everywhere.
24
+ // After this, the word 'overrepresented' is gone → all those comments should go to context-only or unmatched
25
+ ['overrepresented in more than one habitat type', 'preferentially distributed across habitat types'],
26
+ // 'undisturbed' appears in 'undisturbed habitat types' → 2 comments
27
+ ['undisturbed habitat types', 'pristine vegetation communities'],
28
+ // 'human economies' (3 anchors)
29
+ ['human economies', 'economic activity'],
30
+ // 'nutrient-rich'
31
+ ['nutrient-rich, disturbed habitats', 'anthropic, modified environments'],
32
+ // 'when residence time was prolonged.' — long anchor, should go to drift / partial-start
33
+ ['when residence time was prolonged.', 'as residence time increased.'],
34
+ // 'pervasive' (1 anchor)
35
+ ['Invasion debt in terms of habitat niche breadth is pervasive', 'Invasion debt in terms of habitat niche breadth is widespread'],
36
+ // 'alpine habitats' (1 anchor)
37
+ ['alpine habitats', 'high-altitude environments'],
38
+ // 'semi-natural vegetation' (2 anchors)
39
+ ['semi-natural vegetation', 'natural plant communities'],
40
+ // 'Man-made habitats function as gateways' (1 anchor)
41
+ ['Man-made habitats function as gateways', 'Anthropogenic environments serve as entry routes'],
42
+ // 'Invasion debt in terms of habitat niche breadth' (2 anchors)
43
+ // Already mutated above (pervasive→widespread keeps anchor matchable). Add another small change to test partial-start
44
+ // 'habitat niche expansion' — anchor for #27
45
+ ['habitat niche expansion', 'ecological niche broadening'],
46
+ // ', yet' anchor for #19
47
+ [', yet', ', however'],
48
+ // '44' — short numeric anchor
49
+ ['44%', '53%'],
50
+ // '17 habitats'
51
+ ['17 habitats', '18 habitats'],
52
+ ]);
53
+
54
+ console.log('---done');
@@ -1,54 +1,54 @@
1
- import { extractWordComments, extractCommentAnchors } from '../lib/import.js';
2
-
3
- const docx = 'C:/GillesC/tmp/docrev-stress/reviewed.docx';
4
-
5
- const comments = await extractWordComments(docx);
6
- const { anchors, fullDocText } = await extractCommentAnchors(docx);
7
-
8
- console.log(`comments: ${comments.length}`);
9
- console.log(`anchors: ${anchors.size}`);
10
- console.log(`docText: ${fullDocText.length} chars`);
11
- console.log();
12
-
13
- const byAuthor: Record<string, number> = {};
14
- for (const c of comments) byAuthor[c.author] = (byAuthor[c.author] || 0) + 1;
15
- console.log('by author:', byAuthor);
16
- console.log();
17
-
18
- console.log('first 5 comments:');
19
- for (const c of comments.slice(0, 5)) {
20
- const a = anchors.get(c.id);
21
- const pos = a?.docPosition ?? -1;
22
- const anchor = (a?.anchor || '').slice(0, 60);
23
- const text = c.text.replace(/\s+/g, ' ').slice(0, 80);
24
- console.log(` #${c.id} [${c.author}] pos=${pos} anchor="${anchor}" text="${text}"`);
25
- }
26
- console.log();
27
- console.log('last 3 comments:');
28
- for (const c of comments.slice(-3)) {
29
- const a = anchors.get(c.id);
30
- const pos = a?.docPosition ?? -1;
31
- const anchor = (a?.anchor || '').slice(0, 60);
32
- const text = c.text.replace(/\s+/g, ' ').slice(0, 80);
33
- console.log(` #${c.id} [${c.author}] pos=${pos} anchor="${anchor}" text="${text}"`);
34
- }
35
- console.log();
36
-
37
- const sectionKeywords = ['Abstract', 'Introduction', 'Methods', 'Results', 'Discussion', 'Conclusion', 'References', 'Acknowledgements', 'Data Availability', 'Author Contributions'];
38
- console.log('candidate headings:');
39
- for (const kw of sectionKeywords) {
40
- const idx = fullDocText.indexOf(kw);
41
- if (idx >= 0) {
42
- const context = fullDocText.slice(idx, idx + 80).replace(/\s+/g, ' ');
43
- console.log(` ${kw} @ ${idx}: "${context}"`);
44
- }
45
- }
46
-
47
- // Check anchor distribution to understand section spans
48
- const positions = [...anchors.values()].map(a => a.docPosition).sort((a, b) => a - b);
49
- console.log();
50
- console.log(`anchor positions: min=${positions[0]} max=${positions[positions.length-1]} median=${positions[Math.floor(positions.length/2)]}`);
51
-
52
- // Check for empty anchors
53
- const empty = [...anchors.values()].filter(a => a.isEmpty).length;
54
- console.log(`empty anchors: ${empty}`);
1
+ import { extractWordComments, extractCommentAnchors } from '../lib/import.js';
2
+
3
+ const docx = 'C:/GillesC/tmp/docrev-stress/reviewed.docx';
4
+
5
+ const comments = await extractWordComments(docx);
6
+ const { anchors, fullDocText } = await extractCommentAnchors(docx);
7
+
8
+ console.log(`comments: ${comments.length}`);
9
+ console.log(`anchors: ${anchors.size}`);
10
+ console.log(`docText: ${fullDocText.length} chars`);
11
+ console.log();
12
+
13
+ const byAuthor: Record<string, number> = {};
14
+ for (const c of comments) byAuthor[c.author] = (byAuthor[c.author] || 0) + 1;
15
+ console.log('by author:', byAuthor);
16
+ console.log();
17
+
18
+ console.log('first 5 comments:');
19
+ for (const c of comments.slice(0, 5)) {
20
+ const a = anchors.get(c.id);
21
+ const pos = a?.docPosition ?? -1;
22
+ const anchor = (a?.anchor || '').slice(0, 60);
23
+ const text = c.text.replace(/\s+/g, ' ').slice(0, 80);
24
+ console.log(` #${c.id} [${c.author}] pos=${pos} anchor="${anchor}" text="${text}"`);
25
+ }
26
+ console.log();
27
+ console.log('last 3 comments:');
28
+ for (const c of comments.slice(-3)) {
29
+ const a = anchors.get(c.id);
30
+ const pos = a?.docPosition ?? -1;
31
+ const anchor = (a?.anchor || '').slice(0, 60);
32
+ const text = c.text.replace(/\s+/g, ' ').slice(0, 80);
33
+ console.log(` #${c.id} [${c.author}] pos=${pos} anchor="${anchor}" text="${text}"`);
34
+ }
35
+ console.log();
36
+
37
+ const sectionKeywords = ['Abstract', 'Introduction', 'Methods', 'Results', 'Discussion', 'Conclusion', 'References', 'Acknowledgements', 'Data Availability', 'Author Contributions'];
38
+ console.log('candidate headings:');
39
+ for (const kw of sectionKeywords) {
40
+ const idx = fullDocText.indexOf(kw);
41
+ if (idx >= 0) {
42
+ const context = fullDocText.slice(idx, idx + 80).replace(/\s+/g, ' ');
43
+ console.log(` ${kw} @ ${idx}: "${context}"`);
44
+ }
45
+ }
46
+
47
+ // Check anchor distribution to understand section spans
48
+ const positions = [...anchors.values()].map(a => a.docPosition).sort((a, b) => a - b);
49
+ console.log();
50
+ console.log(`anchor positions: min=${positions[0]} max=${positions[positions.length-1]} median=${positions[Math.floor(positions.length/2)]}`);
51
+
52
+ // Check for empty anchors
53
+ const empty = [...anchors.values()].filter(a => a.isEmpty).length;
54
+ console.log(`empty anchors: ${empty}`);