docrev 0.9.5 → 0.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/lib/commands/file-ops.d.ts +11 -0
- package/dist/lib/commands/file-ops.d.ts.map +1 -0
- package/dist/lib/commands/file-ops.js +301 -0
- package/dist/lib/commands/file-ops.js.map +1 -0
- package/dist/lib/commands/index.d.ts +9 -1
- package/dist/lib/commands/index.d.ts.map +1 -1
- package/dist/lib/commands/index.js +17 -1
- package/dist/lib/commands/index.js.map +1 -1
- package/dist/lib/commands/merge-resolve.d.ts +12 -0
- package/dist/lib/commands/merge-resolve.d.ts.map +1 -0
- package/dist/lib/commands/merge-resolve.js +318 -0
- package/dist/lib/commands/merge-resolve.js.map +1 -0
- package/dist/lib/commands/preview.d.ts +11 -0
- package/dist/lib/commands/preview.d.ts.map +1 -0
- package/dist/lib/commands/preview.js +138 -0
- package/dist/lib/commands/preview.js.map +1 -0
- package/dist/lib/commands/project-info.d.ts +11 -0
- package/dist/lib/commands/project-info.d.ts.map +1 -0
- package/dist/lib/commands/project-info.js +187 -0
- package/dist/lib/commands/project-info.js.map +1 -0
- package/dist/lib/commands/quality.d.ts +11 -0
- package/dist/lib/commands/quality.d.ts.map +1 -0
- package/dist/lib/commands/quality.js +384 -0
- package/dist/lib/commands/quality.js.map +1 -0
- package/dist/lib/commands/sections.d.ts +3 -2
- package/dist/lib/commands/sections.d.ts.map +1 -1
- package/dist/lib/commands/sections.js +4 -736
- package/dist/lib/commands/sections.js.map +1 -1
- package/dist/lib/commands/sync.d.ts +11 -0
- package/dist/lib/commands/sync.d.ts.map +1 -0
- package/dist/lib/commands/sync.js +441 -0
- package/dist/lib/commands/sync.js.map +1 -0
- package/dist/lib/commands/text-ops.d.ts +11 -0
- package/dist/lib/commands/text-ops.d.ts.map +1 -0
- package/dist/lib/commands/text-ops.js +357 -0
- package/dist/lib/commands/text-ops.js.map +1 -0
- package/dist/lib/commands/utilities.d.ts +2 -4
- package/dist/lib/commands/utilities.d.ts.map +1 -1
- package/dist/lib/commands/utilities.js +3 -1572
- package/dist/lib/commands/utilities.js.map +1 -1
- package/dist/lib/commands/word-tools.d.ts +11 -0
- package/dist/lib/commands/word-tools.d.ts.map +1 -0
- package/dist/lib/commands/word-tools.js +272 -0
- package/dist/lib/commands/word-tools.js.map +1 -0
- package/dist/lib/diff-engine.d.ts +25 -0
- package/dist/lib/diff-engine.d.ts.map +1 -0
- package/dist/lib/diff-engine.js +354 -0
- package/dist/lib/diff-engine.js.map +1 -0
- package/dist/lib/import.d.ts +37 -117
- package/dist/lib/import.d.ts.map +1 -1
- package/dist/lib/import.js +10 -1030
- package/dist/lib/import.js.map +1 -1
- package/dist/lib/restore-references.d.ts +35 -0
- package/dist/lib/restore-references.d.ts.map +1 -0
- package/dist/lib/restore-references.js +188 -0
- package/dist/lib/restore-references.js.map +1 -0
- package/dist/lib/word-extraction.d.ts +77 -0
- package/dist/lib/word-extraction.d.ts.map +1 -0
- package/dist/lib/word-extraction.js +515 -0
- package/dist/lib/word-extraction.js.map +1 -0
- package/lib/commands/file-ops.ts +372 -0
- package/lib/commands/index.ts +24 -0
- package/lib/commands/merge-resolve.ts +378 -0
- package/lib/commands/preview.ts +178 -0
- package/lib/commands/project-info.ts +244 -0
- package/lib/commands/quality.ts +517 -0
- package/lib/commands/sections.ts +3 -870
- package/lib/commands/sync.ts +536 -0
- package/lib/commands/text-ops.ts +449 -0
- package/lib/commands/utilities.ts +62 -2043
- package/lib/commands/word-tools.ts +340 -0
- package/lib/diff-engine.ts +465 -0
- package/lib/import.ts +78 -1338
- package/lib/restore-references.ts +240 -0
- package/lib/word-extraction.ts +666 -0
- package/package.json +1 -1
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Post-extraction reference restoration and comment parsing
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { readImageRegistry } from './image-registry.js';
|
|
6
|
+
|
|
7
|
+
// ============================================
|
|
8
|
+
// Type Definitions
|
|
9
|
+
// ============================================
|
|
10
|
+
|
|
11
|
+
export interface RestoreCrossrefResult {
|
|
12
|
+
text: string;
|
|
13
|
+
restored: number;
|
|
14
|
+
messages: string[];
|
|
15
|
+
restoredLabels: Set<string>;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export interface RestoreImagesResult {
|
|
19
|
+
text: string;
|
|
20
|
+
restored: number;
|
|
21
|
+
messages: string[];
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// ============================================
|
|
25
|
+
// Functions
|
|
26
|
+
// ============================================
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Parse visible comment markers from Word text
|
|
30
|
+
*/
|
|
31
|
+
export function parseVisibleComments(text: string): Array<{ author: string; text: string; position: number }> {
|
|
32
|
+
const comments: Array<{ author: string; text: string; position: number }> = [];
|
|
33
|
+
const pattern = /\[([^\]:]+):\s*([^\]]+)\]/g;
|
|
34
|
+
|
|
35
|
+
let match;
|
|
36
|
+
while ((match = pattern.exec(text)) !== null) {
|
|
37
|
+
comments.push({
|
|
38
|
+
author: match[1].trim(),
|
|
39
|
+
text: match[2].trim(),
|
|
40
|
+
position: match.index,
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return comments;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Convert visible comments to CriticMarkup format
|
|
49
|
+
*/
|
|
50
|
+
export function convertVisibleComments(text: string): string {
|
|
51
|
+
return text.replace(/\[([^\]:]+):\s*([^\]]+)\]/g, '{>>$1: $2<<}');
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Restore pandoc-crossref figure/table references from Word-rendered format
|
|
56
|
+
*/
|
|
57
|
+
export function restoreCrossrefFromWord(
|
|
58
|
+
text: string,
|
|
59
|
+
projectDir: string,
|
|
60
|
+
restoredLabels: Set<string> | null = null
|
|
61
|
+
): RestoreCrossrefResult {
|
|
62
|
+
const messages: string[] = [];
|
|
63
|
+
let restored = 0;
|
|
64
|
+
let result = text;
|
|
65
|
+
|
|
66
|
+
const registry = readImageRegistry(projectDir);
|
|
67
|
+
|
|
68
|
+
if (!restoredLabels) {
|
|
69
|
+
restoredLabels = new Set<string>();
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Pattern 1: [Figure]{.mark} [N]{.mark}
|
|
73
|
+
result = result.replace(/\[(Figure|Table|Fig\.?)\]\{\.mark\}\s*\[(\d+|S\d+)\]\{\.mark\}/gi, (match, type, num) => {
|
|
74
|
+
const prefix = type.toLowerCase().startsWith('tab') ? 'tbl' : 'fig';
|
|
75
|
+
if (registry) {
|
|
76
|
+
const entry = registry.byNumber?.get(`${prefix}:${num}`);
|
|
77
|
+
if (entry && entry.label) {
|
|
78
|
+
restored++;
|
|
79
|
+
return `@${prefix}:${entry.label}`;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
restored++;
|
|
83
|
+
messages.push(`Restored ${type} ${num} (no label found, using placeholder)`);
|
|
84
|
+
return `@${prefix}:fig${num}`;
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
// Pattern 2: Plain "Figure N" or "Fig. N"
|
|
88
|
+
result = result.replace(/(?<!!)\b(Figure|Fig\.?|Table|Tbl\.?)\s+(\d+|S\d+)\b(?!\s*:)/gi, (match, type, num) => {
|
|
89
|
+
const prefix = type.toLowerCase().startsWith('tab') ? 'tbl' : 'fig';
|
|
90
|
+
if (registry) {
|
|
91
|
+
const entry = registry.byNumber?.get(`${prefix}:${num}`);
|
|
92
|
+
if (entry && entry.label) {
|
|
93
|
+
restored++;
|
|
94
|
+
return `@${prefix}:${entry.label}`;
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
return match;
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
// Pattern 3: Remove duplicate plain-text captions
|
|
101
|
+
result = result.replace(/(\!\[[^\]]+\]\([^)]+\)(?:\{[^}]*\})?)\s*\n+\s*(?:Figure|Fig\.?|Table|Tbl\.?)\s+\d+[:\.]?\s*[^\n]+/gi, '$1');
|
|
102
|
+
|
|
103
|
+
// Pattern 4: Clean up image captions that start with "Figure N: "
|
|
104
|
+
result = result.replace(/!\[(Figure|Fig\.?|Table|Tbl\.?)\s+(\d+|S\d+)[:\.]?\s*([^\]]*)\]\(([^)]+)\)(?:\{[^}]*\})?/gi,
|
|
105
|
+
(match, type, num, caption, imgPath) => {
|
|
106
|
+
const prefix = type.toLowerCase().startsWith('tab') ? 'tbl' : 'fig';
|
|
107
|
+
const labelKey = `${prefix}:${num}`;
|
|
108
|
+
|
|
109
|
+
if (registry) {
|
|
110
|
+
const entry = registry.byNumber?.get(labelKey);
|
|
111
|
+
if (entry) {
|
|
112
|
+
if (restoredLabels!.has(labelKey)) {
|
|
113
|
+
messages.push(`Skipped duplicate ${prefix}:${entry.label} (already restored)`);
|
|
114
|
+
return ``;
|
|
115
|
+
}
|
|
116
|
+
restoredLabels!.add(labelKey);
|
|
117
|
+
restored++;
|
|
118
|
+
messages.push(`Restored image ${prefix}:${entry.label} from Figure ${num}`);
|
|
119
|
+
return `{#${prefix}:${entry.label}}`;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
const cleanCaption = caption.trim();
|
|
123
|
+
return ``;
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
return { text: result, restored, messages, restoredLabels };
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Restore proper markdown image syntax from Word-extracted text using image registry
|
|
131
|
+
*/
|
|
132
|
+
export function restoreImagesFromRegistry(
|
|
133
|
+
text: string,
|
|
134
|
+
projectDir: string,
|
|
135
|
+
restoredLabels: Set<string> | null = null
|
|
136
|
+
): RestoreImagesResult {
|
|
137
|
+
const messages: string[] = [];
|
|
138
|
+
let restored = 0;
|
|
139
|
+
|
|
140
|
+
const registry = readImageRegistry(projectDir);
|
|
141
|
+
if (!registry || !registry.figures || registry.figures.length === 0) {
|
|
142
|
+
return { text, restored: 0, messages: ['No image registry found'] };
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
if (!restoredLabels) {
|
|
146
|
+
restoredLabels = new Set<string>();
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
let result = text;
|
|
150
|
+
|
|
151
|
+
// Pattern 1: Caption-like text
|
|
152
|
+
const captionPatterns = [
|
|
153
|
+
/@(fig|tbl):([a-zA-Z0-9_-]+):\s*([^\n]+)/gi,
|
|
154
|
+
/^(Figure|Fig\.?)\s+(\d+|S\d+)[.:]\s*([^\n]+)/gim,
|
|
155
|
+
/\|\s*@(fig|tbl):([a-zA-Z0-9_-]+):\s*([^|]+)\s*\|/gi,
|
|
156
|
+
];
|
|
157
|
+
|
|
158
|
+
// Fix @fig:label: caption patterns
|
|
159
|
+
result = result.replace(captionPatterns[0], (match, type, label, caption) => {
|
|
160
|
+
const key = `${type}:${label}`;
|
|
161
|
+
const entry = registry.byLabel.get(key);
|
|
162
|
+
if (entry) {
|
|
163
|
+
if (restoredLabels!.has(key)) {
|
|
164
|
+
messages.push(`Skipped duplicate ${key} (already restored)`);
|
|
165
|
+
return ``;
|
|
166
|
+
}
|
|
167
|
+
restoredLabels!.add(key);
|
|
168
|
+
restored++;
|
|
169
|
+
messages.push(`Restored ${type}:${label} from registry`);
|
|
170
|
+
return `{#${type}:${label}}`;
|
|
171
|
+
}
|
|
172
|
+
return match;
|
|
173
|
+
});
|
|
174
|
+
|
|
175
|
+
// Fix table-wrapped captions
|
|
176
|
+
result = result.replace(captionPatterns[2], (match, type, label, caption) => {
|
|
177
|
+
const key = `${type}:${label}`;
|
|
178
|
+
const entry = registry.byLabel.get(key);
|
|
179
|
+
if (entry) {
|
|
180
|
+
if (restoredLabels!.has(key)) {
|
|
181
|
+
messages.push(`Skipped duplicate ${key} from table wrapper`);
|
|
182
|
+
return ``;
|
|
183
|
+
}
|
|
184
|
+
restoredLabels!.add(key);
|
|
185
|
+
restored++;
|
|
186
|
+
messages.push(`Restored ${type}:${label} from table wrapper`);
|
|
187
|
+
return `{#${type}:${label}}`;
|
|
188
|
+
}
|
|
189
|
+
return match;
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
// Clean up empty table structures
|
|
193
|
+
result = result.replace(/\|\s*\|\s*\n\|:--:\|\s*\n/g, '');
|
|
194
|
+
|
|
195
|
+
// Fix "Figure N:" standalone lines
|
|
196
|
+
result = result.replace(captionPatterns[1], (match, prefix, num, caption) => {
|
|
197
|
+
const numKey = `fig:${num}`;
|
|
198
|
+
const entry = registry.byNumber.get(numKey);
|
|
199
|
+
if (entry) {
|
|
200
|
+
const labelKey = `fig:${entry.label}`;
|
|
201
|
+
if (restoredLabels!.has(labelKey)) {
|
|
202
|
+
messages.push(`Skipped duplicate Figure ${num} (already restored)`);
|
|
203
|
+
return ``;
|
|
204
|
+
}
|
|
205
|
+
restoredLabels!.add(labelKey);
|
|
206
|
+
restored++;
|
|
207
|
+
messages.push(`Restored Figure ${num} by number lookup`);
|
|
208
|
+
return `{#fig:${entry.label}}`;
|
|
209
|
+
}
|
|
210
|
+
return match;
|
|
211
|
+
});
|
|
212
|
+
|
|
213
|
+
// Fix generic media paths by matching caption text
|
|
214
|
+
const genericImagePattern = /!\[([^\]]*)\]\(media\/[^)]+\)/g;
|
|
215
|
+
result = result.replace(genericImagePattern, (match, caption) => {
|
|
216
|
+
if (!caption || caption.trim() === '') {
|
|
217
|
+
return match;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
const captionKey = caption.slice(0, 50).toLowerCase().trim();
|
|
221
|
+
const entry = registry.byCaption.get(captionKey);
|
|
222
|
+
if (entry) {
|
|
223
|
+
const labelKey = entry.label ? `${entry.type}:${entry.label}` : null;
|
|
224
|
+
if (labelKey && restoredLabels!.has(labelKey)) {
|
|
225
|
+
messages.push(`Skipped duplicate by caption match: ${captionKey.slice(0, 30)}...`);
|
|
226
|
+
return ``;
|
|
227
|
+
}
|
|
228
|
+
if (labelKey) {
|
|
229
|
+
restoredLabels!.add(labelKey);
|
|
230
|
+
}
|
|
231
|
+
restored++;
|
|
232
|
+
messages.push(`Restored image by caption match: ${captionKey.slice(0, 30)}...`);
|
|
233
|
+
const anchor = (entry.label && !restoredLabels!.has(labelKey!)) ? `{#${entry.type}:${entry.label}}` : '';
|
|
234
|
+
return `${anchor}`;
|
|
235
|
+
}
|
|
236
|
+
return match;
|
|
237
|
+
});
|
|
238
|
+
|
|
239
|
+
return { text: result, restored, messages };
|
|
240
|
+
}
|