docrev 0.9.4 → 0.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/lib/commands/comments.d.ts.map +1 -1
- package/dist/lib/commands/comments.js +19 -27
- package/dist/lib/commands/comments.js.map +1 -1
- package/dist/lib/commands/context.d.ts +1 -0
- package/dist/lib/commands/context.d.ts.map +1 -1
- package/dist/lib/commands/context.js +1 -2
- package/dist/lib/commands/context.js.map +1 -1
- package/dist/lib/commands/file-ops.d.ts +11 -0
- package/dist/lib/commands/file-ops.d.ts.map +1 -0
- package/dist/lib/commands/file-ops.js +301 -0
- package/dist/lib/commands/file-ops.js.map +1 -0
- package/dist/lib/commands/index.d.ts +9 -1
- package/dist/lib/commands/index.d.ts.map +1 -1
- package/dist/lib/commands/index.js +17 -1
- package/dist/lib/commands/index.js.map +1 -1
- package/dist/lib/commands/merge-resolve.d.ts +12 -0
- package/dist/lib/commands/merge-resolve.d.ts.map +1 -0
- package/dist/lib/commands/merge-resolve.js +318 -0
- package/dist/lib/commands/merge-resolve.js.map +1 -0
- package/dist/lib/commands/preview.d.ts +11 -0
- package/dist/lib/commands/preview.d.ts.map +1 -0
- package/dist/lib/commands/preview.js +138 -0
- package/dist/lib/commands/preview.js.map +1 -0
- package/dist/lib/commands/project-info.d.ts +11 -0
- package/dist/lib/commands/project-info.d.ts.map +1 -0
- package/dist/lib/commands/project-info.js +187 -0
- package/dist/lib/commands/project-info.js.map +1 -0
- package/dist/lib/commands/quality.d.ts +11 -0
- package/dist/lib/commands/quality.d.ts.map +1 -0
- package/dist/lib/commands/quality.js +384 -0
- package/dist/lib/commands/quality.js.map +1 -0
- package/dist/lib/commands/sections.d.ts +3 -2
- package/dist/lib/commands/sections.d.ts.map +1 -1
- package/dist/lib/commands/sections.js +4 -723
- package/dist/lib/commands/sections.js.map +1 -1
- package/dist/lib/commands/sync.d.ts +11 -0
- package/dist/lib/commands/sync.d.ts.map +1 -0
- package/dist/lib/commands/sync.js +441 -0
- package/dist/lib/commands/sync.js.map +1 -0
- package/dist/lib/commands/text-ops.d.ts +11 -0
- package/dist/lib/commands/text-ops.d.ts.map +1 -0
- package/dist/lib/commands/text-ops.js +357 -0
- package/dist/lib/commands/text-ops.js.map +1 -0
- package/dist/lib/commands/utilities.d.ts +2 -4
- package/dist/lib/commands/utilities.d.ts.map +1 -1
- package/dist/lib/commands/utilities.js +3 -1605
- package/dist/lib/commands/utilities.js.map +1 -1
- package/dist/lib/commands/word-tools.d.ts +11 -0
- package/dist/lib/commands/word-tools.d.ts.map +1 -0
- package/dist/lib/commands/word-tools.js +272 -0
- package/dist/lib/commands/word-tools.js.map +1 -0
- package/dist/lib/comment-realign.d.ts.map +1 -1
- package/dist/lib/comment-realign.js +0 -7
- package/dist/lib/comment-realign.js.map +1 -1
- package/dist/lib/dependencies.d.ts.map +1 -1
- package/dist/lib/dependencies.js +11 -23
- package/dist/lib/dependencies.js.map +1 -1
- package/dist/lib/diff-engine.d.ts +25 -0
- package/dist/lib/diff-engine.d.ts.map +1 -0
- package/dist/lib/diff-engine.js +354 -0
- package/dist/lib/diff-engine.js.map +1 -0
- package/dist/lib/git.d.ts.map +1 -1
- package/dist/lib/git.js +18 -28
- package/dist/lib/git.js.map +1 -1
- package/dist/lib/import.d.ts +37 -117
- package/dist/lib/import.d.ts.map +1 -1
- package/dist/lib/import.js +10 -1039
- package/dist/lib/import.js.map +1 -1
- package/dist/lib/merge.d.ts.map +1 -1
- package/dist/lib/merge.js +29 -117
- package/dist/lib/merge.js.map +1 -1
- package/dist/lib/pdf-comments.d.ts.map +1 -1
- package/dist/lib/pdf-comments.js +1 -13
- package/dist/lib/pdf-comments.js.map +1 -1
- package/dist/lib/pptx-themes.d.ts.map +1 -1
- package/dist/lib/pptx-themes.js +0 -403
- package/dist/lib/pptx-themes.js.map +1 -1
- package/dist/lib/protect-restore.d.ts.map +1 -1
- package/dist/lib/protect-restore.js +34 -36
- package/dist/lib/protect-restore.js.map +1 -1
- package/dist/lib/restore-references.d.ts +35 -0
- package/dist/lib/restore-references.d.ts.map +1 -0
- package/dist/lib/restore-references.js +188 -0
- package/dist/lib/restore-references.js.map +1 -0
- package/dist/lib/slides.d.ts.map +1 -1
- package/dist/lib/slides.js +0 -35
- package/dist/lib/slides.js.map +1 -1
- package/dist/lib/trackchanges.d.ts.map +1 -1
- package/dist/lib/trackchanges.js +1 -11
- package/dist/lib/trackchanges.js.map +1 -1
- package/dist/lib/tui.d.ts +36 -45
- package/dist/lib/tui.d.ts.map +1 -1
- package/dist/lib/tui.js +92 -108
- package/dist/lib/tui.js.map +1 -1
- package/dist/lib/undo.d.ts +3 -4
- package/dist/lib/undo.d.ts.map +1 -1
- package/dist/lib/undo.js +0 -7
- package/dist/lib/undo.js.map +1 -1
- package/dist/lib/utils.d.ts +12 -0
- package/dist/lib/utils.d.ts.map +1 -1
- package/dist/lib/utils.js +26 -0
- package/dist/lib/utils.js.map +1 -1
- package/dist/lib/word-extraction.d.ts +77 -0
- package/dist/lib/word-extraction.d.ts.map +1 -0
- package/dist/lib/word-extraction.js +515 -0
- package/dist/lib/word-extraction.js.map +1 -0
- package/dist/lib/wordcomments.d.ts.map +1 -1
- package/dist/lib/wordcomments.js +1 -8
- package/dist/lib/wordcomments.js.map +1 -1
- package/dist/package.json +137 -0
- package/lib/commands/comments.ts +20 -25
- package/lib/commands/context.ts +1 -2
- package/lib/commands/file-ops.ts +372 -0
- package/lib/commands/index.ts +24 -0
- package/lib/commands/merge-resolve.ts +378 -0
- package/lib/commands/preview.ts +178 -0
- package/lib/commands/project-info.ts +244 -0
- package/lib/commands/quality.ts +517 -0
- package/lib/commands/sections.ts +3 -857
- package/lib/commands/sync.ts +536 -0
- package/lib/commands/text-ops.ts +449 -0
- package/lib/commands/utilities.ts +62 -2066
- package/lib/commands/word-tools.ts +340 -0
- package/lib/comment-realign.ts +0 -8
- package/lib/dependencies.ts +12 -20
- package/lib/diff-engine.ts +465 -0
- package/lib/git.ts +24 -31
- package/lib/import.ts +78 -1348
- package/lib/merge.ts +42 -132
- package/lib/pdf-comments.ts +2 -14
- package/lib/pptx-themes.ts +0 -413
- package/lib/protect-restore.ts +48 -44
- package/lib/restore-references.ts +240 -0
- package/lib/slides.ts +0 -37
- package/lib/trackchanges.ts +1 -12
- package/lib/{tui.js → tui.ts} +139 -126
- package/lib/undo.ts +3 -12
- package/lib/utils.ts +28 -0
- package/lib/word-extraction.ts +666 -0
- package/lib/wordcomments.ts +1 -9
- package/package.json +1 -1
|
@@ -0,0 +1,465 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Diff engine - diffing and annotation processing for Word→Markdown import
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { diffWords, Change } from 'diff';
|
|
6
|
+
import {
|
|
7
|
+
extractMarkdownPrefix,
|
|
8
|
+
protectAnchors,
|
|
9
|
+
restoreAnchors,
|
|
10
|
+
protectCrossrefs,
|
|
11
|
+
restoreCrossrefs,
|
|
12
|
+
protectMath,
|
|
13
|
+
restoreMath,
|
|
14
|
+
replaceRenderedMath,
|
|
15
|
+
protectCitations,
|
|
16
|
+
restoreCitations,
|
|
17
|
+
replaceRenderedCitations,
|
|
18
|
+
protectImages,
|
|
19
|
+
restoreImages,
|
|
20
|
+
matchWordImagesToOriginal,
|
|
21
|
+
protectTables,
|
|
22
|
+
restoreTables,
|
|
23
|
+
} from './protect-restore.js';
|
|
24
|
+
import { normalizeWhitespace } from './utils.js';
|
|
25
|
+
import type { WordTable } from './word-extraction.js';
|
|
26
|
+
|
|
27
|
+
// ============================================
|
|
28
|
+
// Type Definitions
|
|
29
|
+
// ============================================
|
|
30
|
+
|
|
31
|
+
export interface GenerateSmartDiffOptions {
|
|
32
|
+
wordTables?: WordTable[];
|
|
33
|
+
imageRegistry?: any;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// ============================================
|
|
37
|
+
// Functions
|
|
38
|
+
// ============================================
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Fix citation and math annotations by preserving original markdown syntax
|
|
42
|
+
*/
|
|
43
|
+
export function fixCitationAnnotations(text: string, originalMd: string): string {
|
|
44
|
+
// Fix math annotations - preserve inline and display math
|
|
45
|
+
text = text.replace(/\{--(\$[^$]+\$)--\}/g, '$1');
|
|
46
|
+
text = text.replace(/\{--(\$\$[^$]+\$\$)--\}/g, '$1');
|
|
47
|
+
|
|
48
|
+
text = text.replace(/\{~~(\$[^$]+\$)~>[^~]+~~\}/g, '$1');
|
|
49
|
+
text = text.replace(/\{~~(\$\$[^$]+\$\$)~>[^~]+~~\}/g, '$1');
|
|
50
|
+
|
|
51
|
+
// Extract all citations from original markdown
|
|
52
|
+
const citationPattern = /\[@[^\]]+\]/g;
|
|
53
|
+
const originalCitations = [...originalMd.matchAll(citationPattern)].map(m => m[0]);
|
|
54
|
+
|
|
55
|
+
// Fix substitutions where left side has markdown citation
|
|
56
|
+
text = text.replace(/\{~~(\[@[^\]]+\])~>[^~]+~~\}/g, '$1');
|
|
57
|
+
|
|
58
|
+
// Fix substitutions where left side STARTS with markdown citation
|
|
59
|
+
text = text.replace(/\{~~(\[@[^\]]+\])\s*([^~]*)~>([^~]*)~~\}/g, (match, cite, oldText, newText) => {
|
|
60
|
+
if (oldText.trim() === '' && newText.trim() === '') {
|
|
61
|
+
return cite;
|
|
62
|
+
}
|
|
63
|
+
if (oldText.trim() || newText.trim()) {
|
|
64
|
+
return cite + (oldText.trim() !== newText.trim() ? ` {~~${oldText.trim()}~>${newText.trim()}~~}` : ` ${newText}`);
|
|
65
|
+
}
|
|
66
|
+
return cite;
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
// Fix deletions of markdown citations
|
|
70
|
+
text = text.replace(/\{--(\[@[^\]]+\])--\}/g, '$1');
|
|
71
|
+
|
|
72
|
+
// Fix insertions of rendered citations
|
|
73
|
+
text = text.replace(/\{\+\+\([A-Z][^)]*\d{4}[^)]*\)\+\+\}/g, '');
|
|
74
|
+
|
|
75
|
+
// Clean up broken multi-part substitutions
|
|
76
|
+
text = text.replace(/\{~~(@[A-Za-z]+\d{4})~>[^~]+~~\}/g, '[$1]');
|
|
77
|
+
|
|
78
|
+
// Fix citations split across substitution boundaries
|
|
79
|
+
text = text.replace(/\{~~\[@~>[^~]*~~\}([A-Za-z]+\d{4})\]/g, '[@$1]');
|
|
80
|
+
|
|
81
|
+
// Clean up any remaining partial citations
|
|
82
|
+
text = text.replace(/\{~~;\s*@([A-Za-z]+\d{4})\]~>[^~]*~~\}/g, '; [@$1]');
|
|
83
|
+
|
|
84
|
+
// Remove rendered citation insertions (with Unicode support)
|
|
85
|
+
text = text.replace(/\{\+\+\(\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?(?:[;,]\s*\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?)*\)\+\+\}/gu, '');
|
|
86
|
+
text = text.replace(/\{\+\+\(\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?(?:[;,]\s*\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?)*\)\.\s*\+\+\}/gu, '');
|
|
87
|
+
|
|
88
|
+
// Trailing citation fragments
|
|
89
|
+
text = text.replace(/\{\+\+\d{4}[a-z]?(?:[;,]\s*(?:\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+)?\d{4}[a-z]?)*\)\.\s*\+\+\}/gu, '');
|
|
90
|
+
text = text.replace(/\{\+\+\d{4}[a-z]?(?:[;,]\s*(?:\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+)?\d{4}[a-z]?)*\)\s*\+\+\}/gu, '');
|
|
91
|
+
|
|
92
|
+
// Just year with closing paren
|
|
93
|
+
text = text.replace(/\{\+\+\d{4}[a-z]?\)\.\s*\+\+\}/g, '');
|
|
94
|
+
text = text.replace(/\{\+\+\d{4}[a-z]?\)\s*\+\+\}/g, '');
|
|
95
|
+
|
|
96
|
+
// Leading citation fragments
|
|
97
|
+
text = text.replace(/\{\+\+\(?\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s*\+\+\}/gu, '');
|
|
98
|
+
|
|
99
|
+
// Semicolon-separated fragments
|
|
100
|
+
text = text.replace(/\{\+\+[;,]\s*\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?\+\+\}/gu, '');
|
|
101
|
+
|
|
102
|
+
// Year ranges with authors
|
|
103
|
+
text = text.replace(/\{\+\+\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?(?:[;,]\s*\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?)*\)\s*\+\+\}/gu, '');
|
|
104
|
+
text = text.replace(/\{\+\+\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?(?:[;,]\s*\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?)*\)\.\s*\+\+\}/gu, '');
|
|
105
|
+
|
|
106
|
+
// Clean up double spaces and orphaned punctuation
|
|
107
|
+
text = text.replace(/ +/g, ' ');
|
|
108
|
+
text = text.replace(/\s+\./g, '.');
|
|
109
|
+
text = text.replace(/\s+,/g, ',');
|
|
110
|
+
|
|
111
|
+
// Final cleanup - remove empty annotations
|
|
112
|
+
text = text.replace(/\{~~\s*~>\s*~~\}/g, '');
|
|
113
|
+
text = text.replace(/\{\+\+\s*\+\+\}/g, '');
|
|
114
|
+
text = text.replace(/\{--\s*--\}/g, '');
|
|
115
|
+
|
|
116
|
+
return text;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Strip markdown syntax to get plain text
|
|
121
|
+
*/
|
|
122
|
+
function stripMarkdownSyntax(md: string): string {
|
|
123
|
+
return md
|
|
124
|
+
.replace(/^---[\s\S]*?---\n*/m, '')
|
|
125
|
+
.replace(/^#{1,6}\s+/gm, '')
|
|
126
|
+
.replace(/(\*\*|__)(.*?)\1/g, '$2')
|
|
127
|
+
.replace(/(\*|_)(.*?)\1/g, '$2')
|
|
128
|
+
.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')
|
|
129
|
+
.replace(/!\[([^\]]*)\]\([^)]+\)/g, '')
|
|
130
|
+
.replace(/`([^`]+)`/g, '$1')
|
|
131
|
+
.replace(/```[\s\S]*?```/g, '')
|
|
132
|
+
.replace(/^>\s*/gm, '')
|
|
133
|
+
.replace(/^[-*_]{3,}\s*$/gm, '')
|
|
134
|
+
.replace(/^[\s]*[-*+]\s+/gm, '')
|
|
135
|
+
.replace(/^[\s]*\d+\.\s+/gm, '')
|
|
136
|
+
.replace(/\|/g, ' ')
|
|
137
|
+
.replace(/^[-:]+$/gm, '')
|
|
138
|
+
.replace(/\n{3,}/g, '\n\n')
|
|
139
|
+
.trim();
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* Inject Word tables (extracted from XML) into pandoc text output
|
|
144
|
+
*/
|
|
145
|
+
function injectWordTables(pandocText: string, wordTables: WordTable[]): string {
|
|
146
|
+
if (!wordTables || wordTables.length === 0) {
|
|
147
|
+
return pandocText;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
let result = pandocText;
|
|
151
|
+
|
|
152
|
+
for (const table of wordTables) {
|
|
153
|
+
const firstLine = table.markdown.split('\n')[0];
|
|
154
|
+
const headerCells = firstLine
|
|
155
|
+
.split('|')
|
|
156
|
+
.map((c) => c.trim())
|
|
157
|
+
.filter((c) => c.length > 0);
|
|
158
|
+
|
|
159
|
+
if (headerCells.length === 0) continue;
|
|
160
|
+
|
|
161
|
+
const firstCell = headerCells[0];
|
|
162
|
+
const startIdx = result.indexOf(firstCell);
|
|
163
|
+
|
|
164
|
+
if (startIdx === -1) continue;
|
|
165
|
+
|
|
166
|
+
const lastLine = table.markdown.split('\n').pop();
|
|
167
|
+
const lastCells = lastLine!
|
|
168
|
+
.split('|')
|
|
169
|
+
.map((c) => c.trim())
|
|
170
|
+
.filter((c) => c.length > 0);
|
|
171
|
+
const lastCell = lastCells[lastCells.length - 1] || lastCells[0];
|
|
172
|
+
|
|
173
|
+
const endIdx = result.indexOf(lastCell, startIdx);
|
|
174
|
+
if (endIdx === -1) continue;
|
|
175
|
+
|
|
176
|
+
let regionStart = result.lastIndexOf('\n\n', startIdx);
|
|
177
|
+
if (regionStart === -1) regionStart = 0;
|
|
178
|
+
else regionStart += 2;
|
|
179
|
+
|
|
180
|
+
let regionEnd = result.indexOf('\n\n', endIdx + lastCell.length);
|
|
181
|
+
if (regionEnd === -1) regionEnd = result.length;
|
|
182
|
+
|
|
183
|
+
result = result.slice(0, regionStart) + table.markdown + '\n\n' + result.slice(regionEnd);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
return result;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
/**
|
|
190
|
+
* Generate annotated markdown by diffing original MD against Word text
|
|
191
|
+
*/
|
|
192
|
+
export function generateAnnotatedDiff(originalMd: string, wordText: string, author: string = 'Reviewer'): string {
|
|
193
|
+
const normalizedOriginal = normalizeWhitespace(originalMd);
|
|
194
|
+
const normalizedWord = normalizeWhitespace(wordText);
|
|
195
|
+
|
|
196
|
+
const changes = diffWords(normalizedOriginal, normalizedWord);
|
|
197
|
+
|
|
198
|
+
let result = '';
|
|
199
|
+
|
|
200
|
+
for (const part of changes) {
|
|
201
|
+
if (part.added) {
|
|
202
|
+
result += `{++${part.value}++}`;
|
|
203
|
+
} else if (part.removed) {
|
|
204
|
+
result += `{--${part.value}--}`;
|
|
205
|
+
} else {
|
|
206
|
+
result += part.value;
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
return result;
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
* Smart paragraph-level diff that preserves markdown structure
|
|
215
|
+
*/
|
|
216
|
+
export function generateSmartDiff(
|
|
217
|
+
originalMd: string,
|
|
218
|
+
wordText: string,
|
|
219
|
+
author: string = 'Reviewer',
|
|
220
|
+
options: GenerateSmartDiffOptions = {}
|
|
221
|
+
): string {
|
|
222
|
+
const { wordTables = [], imageRegistry = null } = options;
|
|
223
|
+
|
|
224
|
+
// Inject Word tables into pandoc output
|
|
225
|
+
let wordTextWithTables = injectWordTables(wordText, wordTables);
|
|
226
|
+
|
|
227
|
+
// Protect markdown tables
|
|
228
|
+
const { text: mdWithTablesProtected, tables } = protectTables(originalMd);
|
|
229
|
+
|
|
230
|
+
// Also protect tables in Word text
|
|
231
|
+
const { text: wordWithTablesProtected, tables: wordTableBlocks } = protectTables(wordTextWithTables);
|
|
232
|
+
|
|
233
|
+
// Protect images
|
|
234
|
+
const { text: mdWithImagesProtected, images: origImages } = protectImages(mdWithTablesProtected, imageRegistry);
|
|
235
|
+
|
|
236
|
+
const { text: wordWithImagesProtected, images: wordImages } = protectImages(wordWithTablesProtected, imageRegistry);
|
|
237
|
+
|
|
238
|
+
// Match Word images to original images
|
|
239
|
+
const imageMapping = matchWordImagesToOriginal(origImages, wordImages, imageRegistry);
|
|
240
|
+
|
|
241
|
+
// Replace Word image placeholders with matching original placeholders
|
|
242
|
+
let wordWithMappedImages = wordWithImagesProtected;
|
|
243
|
+
for (const [wordPlaceholder, origPlaceholder] of imageMapping) {
|
|
244
|
+
wordWithMappedImages = wordWithMappedImages.split(wordPlaceholder).join(origPlaceholder);
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// Protect figure/table anchors
|
|
248
|
+
const { text: mdWithAnchorsProtected, anchors: figAnchors } = protectAnchors(mdWithImagesProtected);
|
|
249
|
+
|
|
250
|
+
// Protect cross-references
|
|
251
|
+
const { text: mdWithXrefsProtected, crossrefs } = protectCrossrefs(mdWithAnchorsProtected);
|
|
252
|
+
|
|
253
|
+
// Protect math
|
|
254
|
+
const { text: mdWithMathProtected, mathBlocks } = protectMath(mdWithXrefsProtected);
|
|
255
|
+
|
|
256
|
+
// Protect citations
|
|
257
|
+
const { text: mdProtected, citations } = protectCitations(mdWithMathProtected);
|
|
258
|
+
|
|
259
|
+
// Replace rendered elements in Word text
|
|
260
|
+
let wordProtected = wordWithMappedImages;
|
|
261
|
+
wordProtected = replaceRenderedMath(wordProtected, mathBlocks);
|
|
262
|
+
wordProtected = replaceRenderedCitations(wordProtected, citations.length);
|
|
263
|
+
|
|
264
|
+
// Split into paragraphs
|
|
265
|
+
const originalParas = mdProtected.split(/\n\n+/);
|
|
266
|
+
const wordParas = wordProtected.split(/\n\n+/);
|
|
267
|
+
|
|
268
|
+
const result: string[] = [];
|
|
269
|
+
|
|
270
|
+
// Try to match paragraphs intelligently
|
|
271
|
+
let wordIdx = 0;
|
|
272
|
+
|
|
273
|
+
for (let i = 0; i < originalParas.length; i++) {
|
|
274
|
+
const orig = originalParas[i] || '';
|
|
275
|
+
const { prefix: mdPrefix, content: origContent } = extractMarkdownPrefix(orig.split('\n')[0]);
|
|
276
|
+
|
|
277
|
+
// Find best matching word paragraph
|
|
278
|
+
let bestMatch = -1;
|
|
279
|
+
let bestScore = 0;
|
|
280
|
+
|
|
281
|
+
for (let j = wordIdx; j < Math.min(wordIdx + 3, wordParas.length); j++) {
|
|
282
|
+
const wordPara = wordParas[j] || '';
|
|
283
|
+
const origWords = new Set(origContent.toLowerCase().split(/\s+/));
|
|
284
|
+
const wordWords = wordPara.toLowerCase().split(/\s+/);
|
|
285
|
+
const common = wordWords.filter((w) => origWords.has(w)).length;
|
|
286
|
+
const score = common / Math.max(origWords.size, wordWords.length);
|
|
287
|
+
|
|
288
|
+
if (score > bestScore && score > 0.3) {
|
|
289
|
+
bestScore = score;
|
|
290
|
+
bestMatch = j;
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
if (bestMatch === -1) {
|
|
295
|
+
if (mdPrefix && wordIdx < wordParas.length) {
|
|
296
|
+
const wordPara = wordParas[wordIdx];
|
|
297
|
+
if (wordPara.toLowerCase().includes(origContent.toLowerCase().slice(0, 20))) {
|
|
298
|
+
bestMatch = wordIdx;
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
if (bestMatch >= 0) {
|
|
304
|
+
const word = wordParas[bestMatch];
|
|
305
|
+
|
|
306
|
+
const origStripped = stripMarkdownSyntax(orig);
|
|
307
|
+
const wordNormalized = normalizeWhitespace(word);
|
|
308
|
+
|
|
309
|
+
if (origStripped === wordNormalized) {
|
|
310
|
+
result.push(orig);
|
|
311
|
+
} else {
|
|
312
|
+
const changes = diffWords(origStripped, wordNormalized);
|
|
313
|
+
let annotated = mdPrefix;
|
|
314
|
+
|
|
315
|
+
for (const part of changes) {
|
|
316
|
+
if (part.added) {
|
|
317
|
+
annotated += `{++${part.value}++}`;
|
|
318
|
+
} else if (part.removed) {
|
|
319
|
+
annotated += `{--${part.value}--}`;
|
|
320
|
+
} else {
|
|
321
|
+
annotated += part.value;
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
result.push(annotated);
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
wordIdx = bestMatch + 1;
|
|
329
|
+
} else {
|
|
330
|
+
// Paragraph deleted entirely
|
|
331
|
+
if (mdPrefix && mdPrefix.match(/^#{1,6}\s+/)) {
|
|
332
|
+
result.push(orig);
|
|
333
|
+
} else {
|
|
334
|
+
result.push(`{--${orig}--}`);
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
// Any remaining word paragraphs are additions
|
|
340
|
+
for (let j = wordIdx; j < wordParas.length; j++) {
|
|
341
|
+
const word = wordParas[j];
|
|
342
|
+
if (word.trim()) {
|
|
343
|
+
result.push(`{++${word}++}`);
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
// Restore protected content
|
|
348
|
+
let finalResult = result.join('\n\n');
|
|
349
|
+
finalResult = restoreCitations(finalResult, citations);
|
|
350
|
+
finalResult = restoreMath(finalResult, mathBlocks);
|
|
351
|
+
finalResult = restoreCrossrefs(finalResult, crossrefs);
|
|
352
|
+
finalResult = restoreAnchors(finalResult, figAnchors);
|
|
353
|
+
finalResult = restoreImages(finalResult, origImages);
|
|
354
|
+
finalResult = restoreImages(finalResult, wordImages);
|
|
355
|
+
finalResult = restoreTables(finalResult, tables);
|
|
356
|
+
finalResult = restoreTables(finalResult, wordTableBlocks);
|
|
357
|
+
|
|
358
|
+
return finalResult;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
/**
|
|
362
|
+
* Clean up redundant adjacent annotations
|
|
363
|
+
*/
|
|
364
|
+
export function cleanupAnnotations(text: string): string {
|
|
365
|
+
// Convert adjacent delete+insert to substitution
|
|
366
|
+
text = text.replace(/\{--(.+?)--\}\s*\{\+\+(.+?)\+\+\}/g, '{~~$1~>$2~~}');
|
|
367
|
+
|
|
368
|
+
// Also handle insert+delete
|
|
369
|
+
text = text.replace(/\{\+\+(.+?)\+\+\}\s*\{--(.+?)--\}/g, '{~~$2~>$1~~}');
|
|
370
|
+
|
|
371
|
+
// Fix malformed patterns
|
|
372
|
+
text = text.replace(/\{--([^}]+?)~>([^}]+?)~~\}/g, '{~~$1~>$2~~}');
|
|
373
|
+
|
|
374
|
+
// Fix malformed substitutions that got split
|
|
375
|
+
text = text.replace(/\{~~([^~]+)\s*--\}/g, '{--$1--}');
|
|
376
|
+
text = text.replace(/\{\+\+([^+]+)~~\}/g, '{++$1++}');
|
|
377
|
+
|
|
378
|
+
// Clean up empty annotations
|
|
379
|
+
text = text.replace(/\{--\s*--\}/g, '');
|
|
380
|
+
text = text.replace(/\{\+\+\s*\+\+\}/g, '');
|
|
381
|
+
|
|
382
|
+
// Clean up double spaces in prose, but preserve table formatting
|
|
383
|
+
const lines = text.split('\n');
|
|
384
|
+
let inTable = false;
|
|
385
|
+
|
|
386
|
+
const processedLines = lines.map((line, idx) => {
|
|
387
|
+
const isSeparator = /^[-]+(\s+[-]+)+\s*$/.test(line.trim());
|
|
388
|
+
|
|
389
|
+
const looksLikeTableRow = /\S+\s{2,}\S+/.test(line);
|
|
390
|
+
|
|
391
|
+
if (isSeparator) {
|
|
392
|
+
if (!inTable) {
|
|
393
|
+
inTable = true;
|
|
394
|
+
}
|
|
395
|
+
return line;
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
if (inTable) {
|
|
399
|
+
if (line.trim() === '') {
|
|
400
|
+
let lookAhead = idx + 1;
|
|
401
|
+
let foundTableContent = false;
|
|
402
|
+
let foundEndSeparator = false;
|
|
403
|
+
|
|
404
|
+
while (lookAhead < lines.length && lookAhead < idx + 20) {
|
|
405
|
+
const nextLine = lines[lookAhead].trim();
|
|
406
|
+
|
|
407
|
+
if (nextLine === '') {
|
|
408
|
+
lookAhead++;
|
|
409
|
+
continue;
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
if (/^[-]+(\s+[-]+)+\s*$/.test(nextLine)) {
|
|
413
|
+
foundEndSeparator = true;
|
|
414
|
+
break;
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
if (/\S+\s{2,}\S+/.test(nextLine)) {
|
|
418
|
+
foundTableContent = true;
|
|
419
|
+
break;
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
if (/^\*[^*]+\*\s*$/.test(nextLine)) {
|
|
423
|
+
foundTableContent = true;
|
|
424
|
+
break;
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
if (lines[lookAhead].startsWith(' ')) {
|
|
428
|
+
lookAhead++;
|
|
429
|
+
continue;
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
break;
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
if (foundTableContent || foundEndSeparator) {
|
|
436
|
+
return line;
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
inTable = false;
|
|
440
|
+
return line;
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
return line;
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
if (looksLikeTableRow) {
|
|
447
|
+
let nextIdx = idx + 1;
|
|
448
|
+
while (nextIdx < lines.length && lines[nextIdx].trim() === '') {
|
|
449
|
+
nextIdx++;
|
|
450
|
+
}
|
|
451
|
+
if (nextIdx < lines.length && /^[-]+(\s+[-]+)+\s*$/.test(lines[nextIdx].trim())) {
|
|
452
|
+
return line;
|
|
453
|
+
}
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
if (line.trim().startsWith('|')) {
|
|
457
|
+
return line;
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
return line.replace(/ +/g, ' ');
|
|
461
|
+
});
|
|
462
|
+
text = processedLines.join('\n');
|
|
463
|
+
|
|
464
|
+
return text;
|
|
465
|
+
}
|
package/lib/git.ts
CHANGED
|
@@ -103,14 +103,18 @@ export function getChangedFiles(fromRef: string, toRef: string = 'HEAD'): Change
|
|
|
103
103
|
}
|
|
104
104
|
|
|
105
105
|
/**
|
|
106
|
-
*
|
|
107
|
-
* @param filePath - Path to file
|
|
108
|
-
* @param limit - Maximum number of commits to return
|
|
106
|
+
* Run git log with a given format and optional file path, parse pipe-delimited output
|
|
109
107
|
*/
|
|
110
|
-
|
|
108
|
+
function runGitLog(
|
|
109
|
+
format: string,
|
|
110
|
+
limit: number,
|
|
111
|
+
fields: (keyof CommitInfo)[],
|
|
112
|
+
filePath?: string,
|
|
113
|
+
): CommitInfo[] {
|
|
111
114
|
try {
|
|
115
|
+
const fileArg = filePath ? ` -- "${filePath}"` : '';
|
|
112
116
|
const output = execSync(
|
|
113
|
-
`git log --format="
|
|
117
|
+
`git log --format="${format}" -n ${limit}${fileArg}`,
|
|
114
118
|
{ stdio: 'pipe' }
|
|
115
119
|
).toString().trim();
|
|
116
120
|
|
|
@@ -118,18 +122,26 @@ export function getFileHistory(filePath: string, limit: number = 10): CommitInfo
|
|
|
118
122
|
|
|
119
123
|
return output.split('\n').map(line => {
|
|
120
124
|
const parts = line.split('|');
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
};
|
|
125
|
+
const entry: CommitInfo = { hash: '', date: '', author: '', message: '' };
|
|
126
|
+
for (let i = 0; i < fields.length; i++) {
|
|
127
|
+
entry[fields[i]] = parts[i] ?? '';
|
|
128
|
+
}
|
|
129
|
+
return entry;
|
|
127
130
|
});
|
|
128
131
|
} catch {
|
|
129
132
|
return [];
|
|
130
133
|
}
|
|
131
134
|
}
|
|
132
135
|
|
|
136
|
+
/**
|
|
137
|
+
* Get commit history for a file
|
|
138
|
+
* @param filePath - Path to file
|
|
139
|
+
* @param limit - Maximum number of commits to return
|
|
140
|
+
*/
|
|
141
|
+
export function getFileHistory(filePath: string, limit: number = 10): CommitInfo[] {
|
|
142
|
+
return runGitLog('%h|%ci|%s', limit, ['hash', 'date', 'message'], filePath);
|
|
143
|
+
}
|
|
144
|
+
|
|
133
145
|
/**
|
|
134
146
|
* Compare file content between two refs
|
|
135
147
|
* @param filePath - Path to file
|
|
@@ -194,26 +206,7 @@ export function getWordCountDiff(
|
|
|
194
206
|
* @param limit - Maximum number of commits to return
|
|
195
207
|
*/
|
|
196
208
|
export function getRecentCommits(limit: number = 10): CommitInfo[] {
|
|
197
|
-
|
|
198
|
-
const output = execSync(
|
|
199
|
-
`git log --format="%h|%ci|%an|%s" -n ${limit}`,
|
|
200
|
-
{ stdio: 'pipe' }
|
|
201
|
-
).toString().trim();
|
|
202
|
-
|
|
203
|
-
if (!output) return [];
|
|
204
|
-
|
|
205
|
-
return output.split('\n').map(line => {
|
|
206
|
-
const parts = line.split('|');
|
|
207
|
-
return {
|
|
208
|
-
hash: parts[0] ?? '',
|
|
209
|
-
date: parts[1] ?? '',
|
|
210
|
-
author: parts[2] ?? '',
|
|
211
|
-
message: parts[3] ?? ''
|
|
212
|
-
};
|
|
213
|
-
});
|
|
214
|
-
} catch {
|
|
215
|
-
return [];
|
|
216
|
-
}
|
|
209
|
+
return runGitLog('%h|%ci|%an|%s', limit, ['hash', 'date', 'author', 'message']);
|
|
217
210
|
}
|
|
218
211
|
|
|
219
212
|
/**
|