docrev 0.9.7 → 0.9.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -0
- package/dev_notes/stress2/adversarial.docx +0 -0
- package/dev_notes/stress2/build_adversarial.ts +186 -0
- package/dev_notes/stress2/drift_matcher.ts +62 -0
- package/dev_notes/stress2/probe_anchors.ts +35 -0
- package/dev_notes/stress2/project/adversarial.docx +0 -0
- package/dev_notes/stress2/project/discussion.before.md +3 -0
- package/dev_notes/stress2/project/discussion.md +3 -0
- package/dev_notes/stress2/project/methods.before.md +20 -0
- package/dev_notes/stress2/project/methods.md +20 -0
- package/dev_notes/stress2/project/rev.yaml +5 -0
- package/dev_notes/stress2/project/sections.yaml +4 -0
- package/dev_notes/stress2/sections.yaml +5 -0
- package/dev_notes/stress2/trace_placement.ts +50 -0
- package/dev_notes/stresstest_boundaries.ts +27 -0
- package/dev_notes/stresstest_drift_apply.ts +43 -0
- package/dev_notes/stresstest_drift_compare.ts +43 -0
- package/dev_notes/stresstest_drift_v2.ts +54 -0
- package/dev_notes/stresstest_inspect.ts +54 -0
- package/dev_notes/stresstest_pstyle.ts +55 -0
- package/dev_notes/stresstest_section_debug.ts +23 -0
- package/dev_notes/stresstest_split.ts +70 -0
- package/dev_notes/stresstest_trace.ts +19 -0
- package/dev_notes/stresstest_verify_no_overwrite.ts +40 -0
- package/dist/lib/anchor-match.d.ts +10 -0
- package/dist/lib/anchor-match.d.ts.map +1 -1
- package/dist/lib/anchor-match.js +35 -0
- package/dist/lib/anchor-match.js.map +1 -1
- package/dist/lib/annotations.d.ts.map +1 -1
- package/dist/lib/annotations.js +16 -6
- package/dist/lib/annotations.js.map +1 -1
- package/dist/lib/build.d.ts +12 -0
- package/dist/lib/build.d.ts.map +1 -1
- package/dist/lib/build.js +12 -0
- package/dist/lib/build.js.map +1 -1
- package/dist/lib/commands/quality.js +1 -1
- package/dist/lib/commands/quality.js.map +1 -1
- package/dist/lib/commands/section-boundaries.d.ts +1 -1
- package/dist/lib/commands/section-boundaries.d.ts.map +1 -1
- package/dist/lib/commands/section-boundaries.js +12 -2
- package/dist/lib/commands/section-boundaries.js.map +1 -1
- package/dist/lib/commands/sync.js +19 -13
- package/dist/lib/commands/sync.js.map +1 -1
- package/dist/lib/commands/verify-anchors.d.ts.map +1 -1
- package/dist/lib/commands/verify-anchors.js +15 -4
- package/dist/lib/commands/verify-anchors.js.map +1 -1
- package/dist/lib/comment-realign.js +2 -2
- package/dist/lib/comment-realign.js.map +1 -1
- package/dist/lib/import.d.ts +12 -0
- package/dist/lib/import.d.ts.map +1 -1
- package/dist/lib/import.js +289 -60
- package/dist/lib/import.js.map +1 -1
- package/dist/lib/response.js +1 -1
- package/dist/lib/response.js.map +1 -1
- package/dist/lib/types.d.ts +20 -0
- package/dist/lib/types.d.ts.map +1 -1
- package/dist/lib/word-extraction.d.ts +6 -0
- package/dist/lib/word-extraction.d.ts.map +1 -1
- package/dist/lib/word-extraction.js +46 -3
- package/dist/lib/word-extraction.js.map +1 -1
- package/dist/lib/wordcomments.d.ts.map +1 -1
- package/dist/lib/wordcomments.js +188 -78
- package/dist/lib/wordcomments.js.map +1 -1
- package/lib/anchor-match.ts +38 -0
- package/lib/annotations.ts +16 -6
- package/lib/build.ts +24 -0
- package/lib/commands/quality.ts +1 -1
- package/lib/commands/section-boundaries.ts +11 -1
- package/lib/commands/sync.ts +21 -16
- package/lib/commands/verify-anchors.ts +15 -4
- package/lib/comment-realign.ts +2 -2
- package/lib/import.ts +304 -61
- package/lib/response.ts +1 -1
- package/lib/types.ts +20 -0
- package/lib/word-extraction.ts +50 -3
- package/lib/wordcomments.ts +205 -88
- package/package.json +1 -1
- package/dist/package.json +0 -137
package/lib/types.ts
CHANGED
|
@@ -69,6 +69,22 @@ export interface PdfConfig {
|
|
|
69
69
|
geometry?: string;
|
|
70
70
|
linestretch?: number;
|
|
71
71
|
toc?: boolean;
|
|
72
|
+
/**
|
|
73
|
+
* LaTeX engine to use for PDF output. One of `pdflatex` (default),
|
|
74
|
+
* `xelatex`, `lualatex`, `tectonic`, etc. xelatex/lualatex are required
|
|
75
|
+
* for native UTF-8 rendering of diacritics in author names, place
|
|
76
|
+
* names, and species epithets.
|
|
77
|
+
*/
|
|
78
|
+
engine?: string;
|
|
79
|
+
/** Roman/serif main font (xelatex/lualatex only — uses fontspec). */
|
|
80
|
+
mainfont?: string;
|
|
81
|
+
/** Sans-serif font (xelatex/lualatex only). */
|
|
82
|
+
sansfont?: string;
|
|
83
|
+
/** Monospace font (xelatex/lualatex only). */
|
|
84
|
+
monofont?: string;
|
|
85
|
+
numbersections?: boolean;
|
|
86
|
+
template?: string;
|
|
87
|
+
headerIncludes?: string;
|
|
72
88
|
}
|
|
73
89
|
|
|
74
90
|
export interface DocxConfig {
|
|
@@ -338,6 +354,10 @@ export interface JournalFormatting {
|
|
|
338
354
|
linestretch?: number;
|
|
339
355
|
template?: string;
|
|
340
356
|
numbersections?: boolean;
|
|
357
|
+
engine?: string;
|
|
358
|
+
mainfont?: string;
|
|
359
|
+
sansfont?: string;
|
|
360
|
+
monofont?: string;
|
|
341
361
|
};
|
|
342
362
|
docx?: {
|
|
343
363
|
reference?: string;
|
package/lib/word-extraction.ts
CHANGED
|
@@ -18,6 +18,12 @@ export interface WordComment {
|
|
|
18
18
|
author: string;
|
|
19
19
|
date: string;
|
|
20
20
|
text: string;
|
|
21
|
+
/**
|
|
22
|
+
* Parent comment id when this is a reply in a Word comment thread.
|
|
23
|
+
* Resolved from `commentsExtended.xml`'s `w15:paraIdParent` field.
|
|
24
|
+
* `undefined` for top-level comments.
|
|
25
|
+
*/
|
|
26
|
+
parentId?: string;
|
|
21
27
|
}
|
|
22
28
|
|
|
23
29
|
export interface TextNode {
|
|
@@ -126,7 +132,6 @@ export async function extractWordComments(docxPath: string): Promise<WordComment
|
|
|
126
132
|
|
|
127
133
|
const parsed = await parseStringPromise(commentsXml, { explicitArray: false });
|
|
128
134
|
|
|
129
|
-
const ns = 'w:';
|
|
130
135
|
const commentsRoot = parsed['w:comments'];
|
|
131
136
|
if (!commentsRoot || !commentsRoot['w:comment']) {
|
|
132
137
|
return comments;
|
|
@@ -137,12 +142,18 @@ export async function extractWordComments(docxPath: string): Promise<WordComment
|
|
|
137
142
|
? commentsRoot['w:comment']
|
|
138
143
|
: [commentsRoot['w:comment']];
|
|
139
144
|
|
|
145
|
+
// Map every paraId that lives inside a comment back to that comment's id.
|
|
146
|
+
// Word's commentsExtended.xml expresses threading via w15:paraIdParent,
|
|
147
|
+
// which references the parent's first <w:p>. Replies use a secondary
|
|
148
|
+
// (often-empty) <w:p>, so each comment may contribute multiple paraIds.
|
|
149
|
+
const paraIdToCommentId = new Map<string, string>();
|
|
150
|
+
|
|
140
151
|
for (const comment of commentNodes) {
|
|
141
152
|
const id = comment.$?.['w:id'] || '';
|
|
142
153
|
const author = comment.$?.['w:author'] || 'Unknown';
|
|
143
154
|
const date = comment.$?.['w:date'] || '';
|
|
144
155
|
|
|
145
|
-
// Extract text from nested w:p/w:r/w:t elements
|
|
156
|
+
// Extract text from nested w:p/w:r/w:t elements and record paraIds.
|
|
146
157
|
let text = '';
|
|
147
158
|
const extractText = (node: any): void => {
|
|
148
159
|
if (!node) return;
|
|
@@ -160,13 +171,49 @@ export async function extractWordComments(docxPath: string): Promise<WordComment
|
|
|
160
171
|
}
|
|
161
172
|
if (node['w:p']) {
|
|
162
173
|
const paras = Array.isArray(node['w:p']) ? node['w:p'] : [node['w:p']];
|
|
163
|
-
paras
|
|
174
|
+
for (const para of paras) {
|
|
175
|
+
const paraId = para?.$?.['w14:paraId'];
|
|
176
|
+
if (paraId && id) paraIdToCommentId.set(paraId, id);
|
|
177
|
+
extractText(para);
|
|
178
|
+
}
|
|
164
179
|
}
|
|
165
180
|
};
|
|
166
181
|
extractText(comment);
|
|
167
182
|
|
|
168
183
|
comments.push({ id, author, date: date.slice(0, 10), text: text.trim() });
|
|
169
184
|
}
|
|
185
|
+
|
|
186
|
+
// Resolve parent links from commentsExtended.xml. Missing entry just
|
|
187
|
+
// means the docx has no threading metadata (e.g. legacy/non-Word source).
|
|
188
|
+
const extendedEntry = zip.getEntry('word/commentsExtended.xml');
|
|
189
|
+
if (extendedEntry && paraIdToCommentId.size > 0) {
|
|
190
|
+
let extendedXml = '';
|
|
191
|
+
try {
|
|
192
|
+
extendedXml = extendedEntry.getData().toString('utf8');
|
|
193
|
+
} catch {
|
|
194
|
+
// Unreadable threading metadata is non-fatal; skip parent linking.
|
|
195
|
+
}
|
|
196
|
+
if (extendedXml) {
|
|
197
|
+
const parentByCommentId = new Map<string, string>();
|
|
198
|
+
const exPattern = /<w15:commentEx\b([^>]*?)\/>/g;
|
|
199
|
+
let m: RegExpExecArray | null;
|
|
200
|
+
while ((m = exPattern.exec(extendedXml)) !== null) {
|
|
201
|
+
const attrs = m[1] ?? '';
|
|
202
|
+
const paraIdMatch = attrs.match(/w15:paraId="([^"]+)"/);
|
|
203
|
+
const parentMatch = attrs.match(/w15:paraIdParent="([^"]+)"/);
|
|
204
|
+
if (!paraIdMatch || !parentMatch) continue;
|
|
205
|
+
const childCommentId = paraIdToCommentId.get(paraIdMatch[1]);
|
|
206
|
+
const parentCommentId = paraIdToCommentId.get(parentMatch[1]);
|
|
207
|
+
if (childCommentId && parentCommentId && childCommentId !== parentCommentId) {
|
|
208
|
+
parentByCommentId.set(childCommentId, parentCommentId);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
for (const c of comments) {
|
|
212
|
+
const parent = parentByCommentId.get(c.id);
|
|
213
|
+
if (parent) c.parentId = parent;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
}
|
|
170
217
|
} catch (err: any) {
|
|
171
218
|
// Re-throw with more context if it's already an Error we created
|
|
172
219
|
if (err.message.includes('Invalid Word document') || err.message.includes('File not found')) {
|
package/lib/wordcomments.ts
CHANGED
|
@@ -72,10 +72,38 @@ function generateParaId(commentIdx: number, paraNum: number): string {
|
|
|
72
72
|
* - comments: array with author, text, isReply, parentIdx
|
|
73
73
|
*/
|
|
74
74
|
export function prepareMarkdownWithMarkers(markdown: string): PrepareResult {
|
|
75
|
-
// Match
|
|
76
|
-
|
|
75
|
+
// Match the comment block first; extend manually to capture an optional
|
|
76
|
+
// trailing `[anchor]{.mark}` span. A regex `[^\]]+` for the anchor would
|
|
77
|
+
// bail on the inner `]` of nested syntax (e.g. `[[0..9]]{.mark}` or
|
|
78
|
+
// `[*phrase*]{.mark}` after pandoc-rewriting), so we walk the brackets
|
|
79
|
+
// ourselves and verify a `{.mark}` suffix.
|
|
80
|
+
const commentPattern = /\{>>([\s\S]+?)<<\}/g;
|
|
81
|
+
|
|
82
|
+
function tryParseTrailingAnchor(
|
|
83
|
+
text: string,
|
|
84
|
+
fromIdx: number,
|
|
85
|
+
): { anchor: string; endIdx: number } | null {
|
|
86
|
+
let i = fromIdx;
|
|
87
|
+
while (i < text.length && /\s/.test(text[i] ?? '')) i++;
|
|
88
|
+
if (text[i] !== '[') return null;
|
|
89
|
+
let depth = 1;
|
|
90
|
+
let j = i + 1;
|
|
91
|
+
while (j < text.length) {
|
|
92
|
+
const ch = text[j];
|
|
93
|
+
if (ch === '[') depth++;
|
|
94
|
+
else if (ch === ']') {
|
|
95
|
+
depth--;
|
|
96
|
+
if (depth === 0) break;
|
|
97
|
+
}
|
|
98
|
+
j++;
|
|
99
|
+
}
|
|
100
|
+
if (depth !== 0) return null;
|
|
101
|
+
if (text.slice(j + 1, j + 8) !== '{.mark}') return null;
|
|
102
|
+
return { anchor: text.slice(i + 1, j), endIdx: j + 8 };
|
|
103
|
+
}
|
|
77
104
|
|
|
78
|
-
const
|
|
105
|
+
const REPLY_PREFIX = '↪ ';
|
|
106
|
+
const rawMatches: (ParsedComment & { explicitReply: boolean })[] = [];
|
|
79
107
|
let match: RegExpExecArray | null;
|
|
80
108
|
while ((match = commentPattern.exec(markdown)) !== null) {
|
|
81
109
|
const content = match[1] ?? '';
|
|
@@ -87,24 +115,52 @@ export function prepareMarkdownWithMarkers(markdown: string): PrepareResult {
|
|
|
87
115
|
text = content.slice(colonIdx + 1).trim();
|
|
88
116
|
}
|
|
89
117
|
|
|
118
|
+
// The `↪ ` prefix is the authoritative reply signal emitted by
|
|
119
|
+
// `insertCommentsIntoMarkdown`. Strip it from the author before injection
|
|
120
|
+
// so Word displays the real name.
|
|
121
|
+
let explicitReply = false;
|
|
122
|
+
if (author.startsWith(REPLY_PREFIX)) {
|
|
123
|
+
explicitReply = true;
|
|
124
|
+
author = author.slice(REPLY_PREFIX.length).trim();
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const commentEnd = match.index + match[0].length;
|
|
128
|
+
const trailing = tryParseTrailingAnchor(markdown, commentEnd);
|
|
129
|
+
|
|
90
130
|
rawMatches.push({
|
|
91
131
|
author,
|
|
92
132
|
text,
|
|
93
|
-
anchor:
|
|
133
|
+
anchor: trailing ? trailing.anchor : null,
|
|
94
134
|
start: match.index,
|
|
95
|
-
end:
|
|
96
|
-
fullMatch: match
|
|
135
|
+
end: trailing ? trailing.endIdx : commentEnd,
|
|
136
|
+
fullMatch: markdown.slice(match.index, trailing ? trailing.endIdx : commentEnd),
|
|
137
|
+
explicitReply,
|
|
97
138
|
});
|
|
139
|
+
|
|
140
|
+
// Advance regex lastIndex past the consumed anchor so the next iteration
|
|
141
|
+
// doesn't re-scan inside it (e.g. `[*emphasis*]{.mark}` would otherwise
|
|
142
|
+
// tempt the matcher to look for another `{>>...<<}` in the body of the
|
|
143
|
+
// anchor span).
|
|
144
|
+
if (trailing) {
|
|
145
|
+
commentPattern.lastIndex = trailing.endIdx;
|
|
146
|
+
}
|
|
98
147
|
}
|
|
99
148
|
|
|
100
149
|
if (rawMatches.length === 0) {
|
|
101
150
|
return { markedMarkdown: markdown, comments: [] };
|
|
102
151
|
}
|
|
103
152
|
|
|
104
|
-
//
|
|
105
|
-
//
|
|
106
|
-
//
|
|
153
|
+
// Two-mode reply detection driven by the markdown itself:
|
|
154
|
+
// - If any comment carries the `↪ ` author prefix, the markdown came
|
|
155
|
+
// through `insertCommentsIntoMarkdown` and we use prefix-only mode.
|
|
156
|
+
// Distinct clusters that happen to land at gap=0 (a real failure
|
|
157
|
+
// mode on dense reviewer docs — 298-comment paper produced 9 such
|
|
158
|
+
// collisions) are not misthreaded.
|
|
159
|
+
// - If no comment carries the prefix, the markdown was hand-typed.
|
|
160
|
+
// Fall back to gap < 10 adjacency for backward compat with users
|
|
161
|
+
// who write CriticMarkup directly.
|
|
107
162
|
const ADJACENT_THRESHOLD = 10;
|
|
163
|
+
const useExplicitMode = rawMatches.some(m => m.explicitReply);
|
|
108
164
|
const comments: PreparedComment[] = [];
|
|
109
165
|
let clusterParentIdx = -1; // Index of first comment in current cluster
|
|
110
166
|
let lastCommentEnd = -1;
|
|
@@ -113,9 +169,10 @@ export function prepareMarkdownWithMarkers(markdown: string): PrepareResult {
|
|
|
113
169
|
const m = rawMatches[i];
|
|
114
170
|
if (!m) continue;
|
|
115
171
|
|
|
116
|
-
// Check if this comment is adjacent to the previous one
|
|
117
172
|
const gap = lastCommentEnd >= 0 ? m.start - lastCommentEnd : Infinity;
|
|
118
|
-
const isAdjacent =
|
|
173
|
+
const isAdjacent = useExplicitMode
|
|
174
|
+
? m.explicitReply
|
|
175
|
+
: gap < ADJACENT_THRESHOLD;
|
|
119
176
|
|
|
120
177
|
// Reset cluster if there's a gap (comments not in same cluster)
|
|
121
178
|
if (!isAdjacent) {
|
|
@@ -179,10 +236,11 @@ export function prepareMarkdownWithMarkers(markdown: string): PrepareResult {
|
|
|
179
236
|
|
|
180
237
|
if (c.isReply) {
|
|
181
238
|
// Reply: remove from document entirely (will be in comments.xml only)
|
|
182
|
-
// Also consume
|
|
239
|
+
// Also consume one preceding whitespace char to avoid double spaces.
|
|
240
|
+
// We deliberately consume at most one — walking arbitrarily backwards
|
|
241
|
+
// would shift positions that lower-index comments still depend on.
|
|
183
242
|
let removeStart = c.start;
|
|
184
|
-
|
|
185
|
-
while (removeStart > 0 && charBefore && /\s/.test(charBefore)) {
|
|
243
|
+
if (removeStart > 0 && /\s/.test(markedMarkdown[removeStart - 1] ?? '')) {
|
|
186
244
|
removeStart--;
|
|
187
245
|
}
|
|
188
246
|
|
|
@@ -205,10 +263,10 @@ export function prepareMarkdownWithMarkers(markdown: string): PrepareResult {
|
|
|
205
263
|
} else {
|
|
206
264
|
// Parent comment
|
|
207
265
|
if (c.anchorFromReply) {
|
|
208
|
-
// Anchor markers are placed by the reply, just remove this comment
|
|
266
|
+
// Anchor markers are placed by the reply, just remove this comment.
|
|
267
|
+
// Consume one preceding whitespace char only (see reply branch above).
|
|
209
268
|
let removeStart = c.start;
|
|
210
|
-
|
|
211
|
-
while (removeStart > 0 && charBefore && /\s/.test(charBefore)) {
|
|
269
|
+
if (removeStart > 0 && /\s/.test(markedMarkdown[removeStart - 1] ?? '')) {
|
|
212
270
|
removeStart--;
|
|
213
271
|
}
|
|
214
272
|
markedMarkdown = markedMarkdown.slice(0, removeStart) + markedMarkdown.slice(c.end);
|
|
@@ -421,93 +479,152 @@ export async function injectCommentsAtMarkers(
|
|
|
421
479
|
const endMarker = `${MARKER_END_PREFIX}${idx}${MARKER_SUFFIX}`;
|
|
422
480
|
|
|
423
481
|
const startPos = documentXml.indexOf(startMarker);
|
|
424
|
-
const endPos = documentXml.indexOf(endMarker);
|
|
482
|
+
const endPos = documentXml.indexOf(endMarker, startPos + startMarker.length);
|
|
425
483
|
|
|
426
484
|
if (startPos === -1 || endPos === -1) continue;
|
|
427
485
|
|
|
428
|
-
// Find the
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
const
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
const
|
|
451
|
-
|
|
452
|
-
const
|
|
453
|
-
const
|
|
454
|
-
|
|
486
|
+
// Find the runs containing each marker. Pandoc may split a single
|
|
487
|
+
// markdown anchor across multiple <w:r> blocks when it applies styling
|
|
488
|
+
// mid-anchor (smart-quote substitution, *italic*, `code`, **bold**).
|
|
489
|
+
// The same-run path (current happy path) collapses into the multi-run
|
|
490
|
+
// path when start and end runs coincide.
|
|
491
|
+
const startRunOpen = Math.max(
|
|
492
|
+
documentXml.lastIndexOf('<w:r>', startPos),
|
|
493
|
+
documentXml.lastIndexOf('<w:r ', startPos),
|
|
494
|
+
);
|
|
495
|
+
const startRunCloseIdx = documentXml.indexOf('</w:r>', startPos);
|
|
496
|
+
const endRunOpen = Math.max(
|
|
497
|
+
documentXml.lastIndexOf('<w:r>', endPos),
|
|
498
|
+
documentXml.lastIndexOf('<w:r ', endPos),
|
|
499
|
+
);
|
|
500
|
+
const endRunCloseIdx = documentXml.indexOf('</w:r>', endPos);
|
|
501
|
+
|
|
502
|
+
if (
|
|
503
|
+
startRunOpen === -1 || startRunCloseIdx === -1 ||
|
|
504
|
+
endRunOpen === -1 || endRunCloseIdx === -1
|
|
505
|
+
) continue;
|
|
506
|
+
|
|
507
|
+
const startRunClose = startRunCloseIdx + '</w:r>'.length;
|
|
508
|
+
const endRunClose = endRunCloseIdx + '</w:r>'.length;
|
|
509
|
+
|
|
510
|
+
const startRunFull = documentXml.slice(startRunOpen, startRunClose);
|
|
511
|
+
const endRunFull = documentXml.slice(endRunOpen, endRunClose);
|
|
512
|
+
|
|
513
|
+
// Extract <w:rPr> and <w:t> element shape from each run. Both pieces
|
|
514
|
+
// are needed verbatim so a textBefore split keeps its original styling
|
|
515
|
+
// and so the post-anchor textAfter render keeps the end run's styling.
|
|
516
|
+
function dissectRun(runXml: string, marker: string): {
|
|
517
|
+
rPr: string;
|
|
518
|
+
tElement: string;
|
|
519
|
+
textBefore: string;
|
|
520
|
+
textAfter: string;
|
|
521
|
+
} | null {
|
|
522
|
+
const rPrMatch = runXml.match(/<w:rPr>[\s\S]*?<\/w:rPr>/);
|
|
523
|
+
const tMatch = runXml.match(/<w:t[^>]*>([\s\S]*?)<\/w:t>/);
|
|
524
|
+
if (!tMatch) return null;
|
|
525
|
+
const tOpenMatch = tMatch[0].match(/<w:t[^>]*>/);
|
|
526
|
+
if (!tOpenMatch) return null;
|
|
527
|
+
const tContent = tMatch[1] ?? '';
|
|
528
|
+
const markerInT = tContent.indexOf(marker);
|
|
529
|
+
if (markerInT === -1) return null;
|
|
530
|
+
return {
|
|
531
|
+
rPr: rPrMatch ? rPrMatch[0] : '',
|
|
532
|
+
tElement: tOpenMatch[0],
|
|
533
|
+
textBefore: tContent.slice(0, markerInT),
|
|
534
|
+
textAfter: tContent.slice(markerInT + marker.length),
|
|
535
|
+
};
|
|
536
|
+
}
|
|
455
537
|
|
|
456
|
-
let
|
|
457
|
-
|
|
458
|
-
let textAfter = fullText.slice(endInText + endMarker.length);
|
|
538
|
+
let replacement = '';
|
|
539
|
+
const replies = commentsWithIds.filter(c => c.isReply && c.parentIdx === comment?.commentIdx);
|
|
459
540
|
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
const
|
|
463
|
-
|
|
464
|
-
anchorText = wordMatch[1] ?? '';
|
|
465
|
-
textAfter = textAfter.slice(wordMatch[0].length);
|
|
541
|
+
const emitRangeStarts = () => {
|
|
542
|
+
replacement += `<w:commentRangeStart w:id="${comment.id}"/>`;
|
|
543
|
+
for (const reply of replies) {
|
|
544
|
+
replacement += `<w:commentRangeStart w:id="${reply.id}"/>`;
|
|
466
545
|
}
|
|
467
|
-
}
|
|
546
|
+
};
|
|
547
|
+
|
|
548
|
+
const emitRangeEnds = () => {
|
|
549
|
+
replacement += `<w:commentRangeEnd w:id="${comment.id}"/>`;
|
|
550
|
+
replacement += `<w:r><w:commentReference w:id="${comment.id}"/></w:r>`;
|
|
551
|
+
for (const reply of replies) {
|
|
552
|
+
replacement += `<w:commentRangeEnd w:id="${reply.id}"/>`;
|
|
553
|
+
replacement += `<w:r><w:commentReference w:id="${reply.id}"/></w:r>`;
|
|
554
|
+
injectedIds.add(reply.id);
|
|
555
|
+
}
|
|
556
|
+
};
|
|
557
|
+
|
|
558
|
+
if (startRunOpen === endRunOpen) {
|
|
559
|
+
// Same-run path: both markers live inside one <w:t>. Original logic.
|
|
560
|
+
const startInfo = dissectRun(startRunFull, startMarker);
|
|
561
|
+
if (!startInfo) continue;
|
|
562
|
+
const fullText = startInfo.textBefore + startMarker + startInfo.textAfter;
|
|
563
|
+
const endInTextRel = startInfo.textAfter.indexOf(endMarker);
|
|
564
|
+
if (endInTextRel === -1) continue;
|
|
565
|
+
const anchorTextSame = startInfo.textAfter.slice(0, endInTextRel);
|
|
566
|
+
let textAfter = startInfo.textAfter.slice(endInTextRel + endMarker.length);
|
|
567
|
+
let anchorText = anchorTextSame;
|
|
568
|
+
let textBefore = startInfo.textBefore;
|
|
569
|
+
|
|
570
|
+
// Empty anchor: borrow the next word so the comment has something
|
|
571
|
+
// to anchor on. Then normalize the trailing double space.
|
|
572
|
+
if (!anchorText && textAfter) {
|
|
573
|
+
const wordMatch = textAfter.match(/^\s*(\S+)/);
|
|
574
|
+
if (wordMatch) {
|
|
575
|
+
anchorText = wordMatch[1] ?? '';
|
|
576
|
+
textAfter = textAfter.slice(wordMatch[0].length);
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
if (!anchorText && textBefore.endsWith(' ') && textAfter.startsWith(' ')) {
|
|
580
|
+
textAfter = textAfter.slice(1);
|
|
581
|
+
}
|
|
582
|
+
// Suppress unused warning for pre-empty-anchor fullText var
|
|
583
|
+
void fullText;
|
|
468
584
|
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
585
|
+
if (textBefore) {
|
|
586
|
+
replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${textBefore}</w:t></w:r>`;
|
|
587
|
+
}
|
|
588
|
+
emitRangeStarts();
|
|
589
|
+
if (anchorText) {
|
|
590
|
+
replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${anchorText}</w:t></w:r>`;
|
|
591
|
+
}
|
|
592
|
+
emitRangeEnds();
|
|
593
|
+
if (textAfter) {
|
|
594
|
+
replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${textAfter}</w:t></w:r>`;
|
|
595
|
+
}
|
|
596
|
+
documentXml = documentXml.slice(0, startRunOpen) + replacement + documentXml.slice(startRunClose);
|
|
597
|
+
injectedIds.add(comment.id);
|
|
598
|
+
continue;
|
|
472
599
|
}
|
|
473
600
|
|
|
474
|
-
//
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
601
|
+
// Multi-run path: markers sit in different <w:r> blocks because pandoc
|
|
602
|
+
// applied mid-anchor styling. Split the start run at the start marker,
|
|
603
|
+
// keep all middle runs verbatim (they carry the styled anchor portions),
|
|
604
|
+
// split the end run at the end marker.
|
|
605
|
+
const startInfo = dissectRun(startRunFull, startMarker);
|
|
606
|
+
const endInfo = dissectRun(endRunFull, endMarker);
|
|
607
|
+
if (!startInfo || !endInfo) continue;
|
|
480
608
|
|
|
481
|
-
|
|
482
|
-
const replies = commentsWithIds.filter(c => c.isReply && c.parentIdx === comment?.commentIdx);
|
|
609
|
+
const middle = documentXml.slice(startRunClose, endRunOpen);
|
|
483
610
|
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
for (const reply of replies) {
|
|
487
|
-
replacement += `<w:commentRangeStart w:id="${reply.id}"/>`;
|
|
611
|
+
if (startInfo.textBefore) {
|
|
612
|
+
replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${startInfo.textBefore}</w:t></w:r>`;
|
|
488
613
|
}
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
replacement += `<w:r>${rPr}${tElement}${anchorText}</w:t></w:r>`;
|
|
614
|
+
emitRangeStarts();
|
|
615
|
+
if (startInfo.textAfter) {
|
|
616
|
+
replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${startInfo.textAfter}</w:t></w:r>`;
|
|
493
617
|
}
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
replacement += `<w:r><w:commentReference w:id="${comment.id}"/></w:r>`;
|
|
498
|
-
|
|
499
|
-
// End reply ranges and references (same position as parent, NO rStyle wrapper)
|
|
500
|
-
for (const reply of replies) {
|
|
501
|
-
replacement += `<w:commentRangeEnd w:id="${reply.id}"/>`;
|
|
502
|
-
replacement += `<w:r><w:commentReference w:id="${reply.id}"/></w:r>`;
|
|
503
|
-
injectedIds.add(reply.id);
|
|
618
|
+
replacement += middle;
|
|
619
|
+
if (endInfo.textBefore) {
|
|
620
|
+
replacement += `<w:r>${endInfo.rPr}${endInfo.tElement}${endInfo.textBefore}</w:t></w:r>`;
|
|
504
621
|
}
|
|
505
|
-
|
|
506
|
-
if (textAfter) {
|
|
507
|
-
replacement += `<w:r>${rPr}${tElement}${textAfter}</w:t></w:r>`;
|
|
622
|
+
emitRangeEnds();
|
|
623
|
+
if (endInfo.textAfter) {
|
|
624
|
+
replacement += `<w:r>${endInfo.rPr}${endInfo.tElement}${endInfo.textAfter}</w:t></w:r>`;
|
|
508
625
|
}
|
|
509
626
|
|
|
510
|
-
documentXml = documentXml.slice(0,
|
|
627
|
+
documentXml = documentXml.slice(0, startRunOpen) + replacement + documentXml.slice(endRunClose);
|
|
511
628
|
injectedIds.add(comment.id);
|
|
512
629
|
}
|
|
513
630
|
|
package/package.json
CHANGED
package/dist/package.json
DELETED
|
@@ -1,137 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "docrev",
|
|
3
|
-
"version": "0.9.4",
|
|
4
|
-
"description": "Academic paper revision workflow: Word ↔ Markdown round-trips, DOI validation, reviewer comments",
|
|
5
|
-
"type": "module",
|
|
6
|
-
"types": "dist/lib/types.d.ts",
|
|
7
|
-
"exports": {
|
|
8
|
-
".": {
|
|
9
|
-
"types": "./dist/lib/annotations.d.ts",
|
|
10
|
-
"import": "./dist/lib/annotations.js"
|
|
11
|
-
},
|
|
12
|
-
"./annotations": {
|
|
13
|
-
"types": "./dist/lib/annotations.d.ts",
|
|
14
|
-
"import": "./dist/lib/annotations.js"
|
|
15
|
-
},
|
|
16
|
-
"./build": {
|
|
17
|
-
"types": "./dist/lib/build.d.ts",
|
|
18
|
-
"import": "./dist/lib/build.js"
|
|
19
|
-
},
|
|
20
|
-
"./citations": {
|
|
21
|
-
"types": "./dist/lib/citations.d.ts",
|
|
22
|
-
"import": "./dist/lib/citations.js"
|
|
23
|
-
},
|
|
24
|
-
"./crossref": {
|
|
25
|
-
"types": "./dist/lib/crossref.d.ts",
|
|
26
|
-
"import": "./dist/lib/crossref.js"
|
|
27
|
-
},
|
|
28
|
-
"./doi": {
|
|
29
|
-
"types": "./dist/lib/doi.d.ts",
|
|
30
|
-
"import": "./dist/lib/doi.js"
|
|
31
|
-
},
|
|
32
|
-
"./equations": {
|
|
33
|
-
"types": "./dist/lib/equations.d.ts",
|
|
34
|
-
"import": "./dist/lib/equations.js"
|
|
35
|
-
},
|
|
36
|
-
"./git": {
|
|
37
|
-
"types": "./dist/lib/git.d.ts",
|
|
38
|
-
"import": "./dist/lib/git.js"
|
|
39
|
-
},
|
|
40
|
-
"./journals": {
|
|
41
|
-
"types": "./dist/lib/journals.d.ts",
|
|
42
|
-
"import": "./dist/lib/journals.js"
|
|
43
|
-
},
|
|
44
|
-
"./merge": {
|
|
45
|
-
"types": "./dist/lib/merge.d.ts",
|
|
46
|
-
"import": "./dist/lib/merge.js"
|
|
47
|
-
},
|
|
48
|
-
"./sections": {
|
|
49
|
-
"types": "./dist/lib/sections.d.ts",
|
|
50
|
-
"import": "./dist/lib/sections.js"
|
|
51
|
-
},
|
|
52
|
-
"./word": {
|
|
53
|
-
"types": "./dist/lib/word.d.ts",
|
|
54
|
-
"import": "./dist/lib/word.js"
|
|
55
|
-
},
|
|
56
|
-
"./variables": {
|
|
57
|
-
"types": "./dist/lib/variables.d.ts",
|
|
58
|
-
"import": "./dist/lib/variables.js"
|
|
59
|
-
},
|
|
60
|
-
"./grammar": {
|
|
61
|
-
"types": "./dist/lib/grammar.d.ts",
|
|
62
|
-
"import": "./dist/lib/grammar.js"
|
|
63
|
-
},
|
|
64
|
-
"./trackchanges": {
|
|
65
|
-
"types": "./dist/lib/trackchanges.d.ts",
|
|
66
|
-
"import": "./dist/lib/trackchanges.js"
|
|
67
|
-
},
|
|
68
|
-
"./spelling": {
|
|
69
|
-
"types": "./dist/lib/spelling.d.ts",
|
|
70
|
-
"import": "./dist/lib/spelling.js"
|
|
71
|
-
},
|
|
72
|
-
"./wordcomments": {
|
|
73
|
-
"types": "./dist/lib/wordcomments.d.ts",
|
|
74
|
-
"import": "./dist/lib/wordcomments.js"
|
|
75
|
-
}
|
|
76
|
-
},
|
|
77
|
-
"engines": {
|
|
78
|
-
"node": ">=18.0.0"
|
|
79
|
-
},
|
|
80
|
-
"bin": {
|
|
81
|
-
"rev": "bin/rev.js"
|
|
82
|
-
},
|
|
83
|
-
"scripts": {
|
|
84
|
-
"build": "tsc && node scripts/postbuild.js",
|
|
85
|
-
"build:watch": "tsc --watch",
|
|
86
|
-
"dev": "tsx bin/rev.ts",
|
|
87
|
-
"test": "tsx --test test/*.test.js",
|
|
88
|
-
"test:ts": "tsx --test test/*.test.ts",
|
|
89
|
-
"test:watch": "node --test --watch test/*.test.js",
|
|
90
|
-
"test:coverage": "c8 --reporter=text --reporter=lcov node --test test/*.test.js",
|
|
91
|
-
"typecheck": "tsc --noEmit",
|
|
92
|
-
"prepublishOnly": "npm run build"
|
|
93
|
-
},
|
|
94
|
-
"repository": {
|
|
95
|
-
"type": "git",
|
|
96
|
-
"url": "git+https://github.com/gcol33/docrev.git"
|
|
97
|
-
},
|
|
98
|
-
"bugs": {
|
|
99
|
-
"url": "https://github.com/gcol33/docrev/issues"
|
|
100
|
-
},
|
|
101
|
-
"homepage": "https://github.com/gcol33/docrev#readme",
|
|
102
|
-
"keywords": [
|
|
103
|
-
"markdown",
|
|
104
|
-
"word",
|
|
105
|
-
"docx",
|
|
106
|
-
"track-changes",
|
|
107
|
-
"comments",
|
|
108
|
-
"academic",
|
|
109
|
-
"writing",
|
|
110
|
-
"pandoc",
|
|
111
|
-
"criticmarkup"
|
|
112
|
-
],
|
|
113
|
-
"author": "Gilles Colling",
|
|
114
|
-
"license": "MIT",
|
|
115
|
-
"dependencies": {
|
|
116
|
-
"adm-zip": "^0.5.16",
|
|
117
|
-
"chalk": "^5.3.0",
|
|
118
|
-
"commander": "^12.0.0",
|
|
119
|
-
"dictionary-en": "^4.0.0",
|
|
120
|
-
"dictionary-en-gb": "^3.0.0",
|
|
121
|
-
"diff": "^8.0.2",
|
|
122
|
-
"mathml-to-latex": "^1.5.0",
|
|
123
|
-
"nspell": "^2.1.5",
|
|
124
|
-
"pdf-lib": "^1.17.1",
|
|
125
|
-
"pdfjs-dist": "^5.4.530",
|
|
126
|
-
"tsx": "^4.21.0",
|
|
127
|
-
"xml2js": "^0.6.2",
|
|
128
|
-
"yaml": "^2.8.2"
|
|
129
|
-
},
|
|
130
|
-
"devDependencies": {
|
|
131
|
-
"@types/adm-zip": "^0.5.7",
|
|
132
|
-
"@types/node": "^25.2.0",
|
|
133
|
-
"@types/xml2js": "^0.4.14",
|
|
134
|
-
"c8": "^10.1.2",
|
|
135
|
-
"typescript": "^5.9.3"
|
|
136
|
-
}
|
|
137
|
-
}
|