docrev 0.8.5 → 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitattributes +1 -0
- package/README.md +25 -1
- package/dist/lib/annotations.d.ts.map +1 -1
- package/dist/lib/annotations.js +6 -0
- package/dist/lib/annotations.js.map +1 -1
- package/dist/lib/build.d.ts +6 -1
- package/dist/lib/build.d.ts.map +1 -1
- package/dist/lib/build.js +67 -1
- package/dist/lib/build.js.map +1 -1
- package/dist/lib/commands/build.d.ts.map +1 -1
- package/dist/lib/commands/build.js +26 -7
- package/dist/lib/commands/build.js.map +1 -1
- package/dist/lib/commands/response.d.ts.map +1 -1
- package/dist/lib/commands/response.js +50 -2
- package/dist/lib/commands/response.js.map +1 -1
- package/dist/lib/commands/sections.d.ts.map +1 -1
- package/dist/lib/commands/sections.js +28 -9
- package/dist/lib/commands/sections.js.map +1 -1
- package/dist/lib/csl.d.ts +38 -0
- package/dist/lib/csl.d.ts.map +1 -0
- package/dist/lib/csl.js +170 -0
- package/dist/lib/csl.js.map +1 -0
- package/dist/lib/import.d.ts.map +1 -1
- package/dist/lib/import.js +20 -7
- package/dist/lib/import.js.map +1 -1
- package/dist/lib/journals.d.ts.map +1 -1
- package/dist/lib/journals.js +24 -0
- package/dist/lib/journals.js.map +1 -1
- package/dist/lib/plugins.d.ts +11 -0
- package/dist/lib/plugins.d.ts.map +1 -1
- package/dist/lib/plugins.js +21 -1
- package/dist/lib/plugins.js.map +1 -1
- package/dist/lib/pptx-template.d.ts +17 -22
- package/dist/lib/pptx-template.d.ts.map +1 -1
- package/dist/lib/pptx-template.js +296 -552
- package/dist/lib/pptx-template.js.map +1 -1
- package/dist/lib/schema.d.ts.map +1 -1
- package/dist/lib/schema.js +4 -0
- package/dist/lib/schema.js.map +1 -1
- package/dist/lib/types.d.ts +19 -1
- package/dist/lib/types.d.ts.map +1 -1
- package/dist/lib/word.d.ts +24 -11
- package/dist/lib/word.d.ts.map +1 -1
- package/dist/lib/word.js +233 -32
- package/dist/lib/word.js.map +1 -1
- package/lib/annotations.ts +8 -0
- package/lib/build.ts +75 -2
- package/lib/commands/build.ts +25 -7
- package/lib/commands/response.ts +55 -2
- package/lib/commands/sections.ts +31 -9
- package/lib/csl.ts +191 -0
- package/lib/import.ts +21 -7
- package/lib/journals.ts +25 -1
- package/lib/plugins.ts +35 -1
- package/lib/pptx-template.ts +346 -502
- package/lib/schema.ts +4 -0
- package/lib/types.ts +20 -1
- package/lib/word.ts +253 -38
- package/package.json +1 -2
- package/lib/apply-buildup-colors.py +0 -88
package/lib/schema.ts
CHANGED
|
@@ -87,6 +87,10 @@ export const revYamlSchema: Schema = {
|
|
|
87
87
|
],
|
|
88
88
|
},
|
|
89
89
|
},
|
|
90
|
+
journal: {
|
|
91
|
+
type: 'string',
|
|
92
|
+
description: 'Journal profile name for formatting defaults and validation',
|
|
93
|
+
},
|
|
90
94
|
sections: {
|
|
91
95
|
type: 'array',
|
|
92
96
|
description: 'Ordered list of section files to include',
|
package/lib/types.ts
CHANGED
|
@@ -325,13 +325,32 @@ export interface JournalRequirements {
|
|
|
325
325
|
figures?: { max?: number };
|
|
326
326
|
tables?: { max?: number };
|
|
327
327
|
sections?: string[];
|
|
328
|
-
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
export interface JournalFormatting {
|
|
331
|
+
csl?: string;
|
|
332
|
+
pdf?: {
|
|
333
|
+
documentclass?: string;
|
|
334
|
+
fontsize?: string;
|
|
335
|
+
geometry?: string;
|
|
336
|
+
linestretch?: number;
|
|
337
|
+
template?: string;
|
|
338
|
+
numbersections?: boolean;
|
|
339
|
+
};
|
|
340
|
+
docx?: {
|
|
341
|
+
reference?: string;
|
|
342
|
+
};
|
|
343
|
+
crossref?: {
|
|
344
|
+
figPrefix?: string | string[];
|
|
345
|
+
tblPrefix?: string | string[];
|
|
346
|
+
};
|
|
329
347
|
}
|
|
330
348
|
|
|
331
349
|
export interface JournalProfile {
|
|
332
350
|
name: string;
|
|
333
351
|
url: string;
|
|
334
352
|
requirements: JournalRequirements;
|
|
353
|
+
formatting?: JournalFormatting;
|
|
335
354
|
}
|
|
336
355
|
|
|
337
356
|
export interface ValidationResult {
|
package/lib/word.ts
CHANGED
|
@@ -8,7 +8,7 @@ import * as path from 'path';
|
|
|
8
8
|
import AdmZip from 'adm-zip';
|
|
9
9
|
import { parseString } from 'xml2js';
|
|
10
10
|
import { promisify } from 'util';
|
|
11
|
-
import type { WordComment, CommentAnchor,
|
|
11
|
+
import type { WordComment, CommentAnchor, WordMetadata, TrackChangesResult } from './types.js';
|
|
12
12
|
|
|
13
13
|
const parseXml = promisify(parseString);
|
|
14
14
|
|
|
@@ -166,9 +166,9 @@ export async function extractCommentAnchors(docxPath: string): Promise<Map<strin
|
|
|
166
166
|
}
|
|
167
167
|
|
|
168
168
|
/**
|
|
169
|
-
* Extract plain text from Word document
|
|
169
|
+
* Extract plain text from Word document (strips track change markup)
|
|
170
170
|
* @param docxPath - Path to .docx file
|
|
171
|
-
* @returns Extracted plain text
|
|
171
|
+
* @returns Extracted plain text (accepted changes applied)
|
|
172
172
|
* @throws {TypeError} If docxPath is not a string
|
|
173
173
|
* @throws {Error} If file not found
|
|
174
174
|
*/
|
|
@@ -176,41 +176,13 @@ export async function extractTextFromWord(docxPath: string): Promise<string> {
|
|
|
176
176
|
if (typeof docxPath !== 'string') {
|
|
177
177
|
throw new TypeError(`docxPath must be a string, got ${typeof docxPath}`);
|
|
178
178
|
}
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
return
|
|
186
|
-
}
|
|
187
|
-
|
|
188
|
-
/**
|
|
189
|
-
* Extract rich content from Word with basic formatting
|
|
190
|
-
* @param docxPath - Path to .docx file
|
|
191
|
-
* @returns Text and HTML content
|
|
192
|
-
* @throws {TypeError} If docxPath is not a string
|
|
193
|
-
* @throws {Error} If file not found
|
|
194
|
-
*/
|
|
195
|
-
export async function extractFromWord(docxPath: string): Promise<WordContent> {
|
|
196
|
-
if (typeof docxPath !== 'string') {
|
|
197
|
-
throw new TypeError(`docxPath must be a string, got ${typeof docxPath}`);
|
|
198
|
-
}
|
|
199
|
-
if (!fs.existsSync(docxPath)) {
|
|
200
|
-
throw new Error(`File not found: ${docxPath}`);
|
|
201
|
-
}
|
|
202
|
-
|
|
203
|
-
const mammoth = await import('mammoth');
|
|
204
|
-
|
|
205
|
-
const [textResult, htmlResult] = await Promise.all([
|
|
206
|
-
mammoth.extractRawText({ path: docxPath }),
|
|
207
|
-
mammoth.convertToHtml({ path: docxPath }),
|
|
208
|
-
]);
|
|
209
|
-
|
|
210
|
-
return {
|
|
211
|
-
text: textResult.value,
|
|
212
|
-
html: htmlResult.value,
|
|
213
|
-
};
|
|
179
|
+
const result = await extractPlainTextWithTrackChanges(docxPath);
|
|
180
|
+
// Strip CriticMarkup: accept insertions, remove deletions, apply substitutions
|
|
181
|
+
let text = result.text;
|
|
182
|
+
text = text.replace(/\{~~[^~]*~>([^~]*)~~\}/g, '$1'); // substitutions → new
|
|
183
|
+
text = text.replace(/\{\+\+([^+]*)\+\+\}/g, '$1'); // insertions → keep
|
|
184
|
+
text = text.replace(/\{--[^}]*--\}/g, ''); // deletions → remove
|
|
185
|
+
return text;
|
|
214
186
|
}
|
|
215
187
|
|
|
216
188
|
/**
|
|
@@ -350,6 +322,249 @@ export async function extractTrackChanges(docxPath: string): Promise<TrackChange
|
|
|
350
322
|
};
|
|
351
323
|
}
|
|
352
324
|
|
|
325
|
+
/**
|
|
326
|
+
* Extract a single marker's content starting at position i.
|
|
327
|
+
* Returns { content, end } where end is the position after the closing marker,
|
|
328
|
+
* or null if no valid closing marker found.
|
|
329
|
+
*/
|
|
330
|
+
function extractMarker(text: string, i: number, open: string, close: string): { content: string; end: number } | null {
|
|
331
|
+
if (!text.startsWith(open, i)) return null;
|
|
332
|
+
const start = i + open.length;
|
|
333
|
+
const closeIdx = text.indexOf(close, start);
|
|
334
|
+
if (closeIdx === -1) return null;
|
|
335
|
+
return { content: text.slice(start, closeIdx), end: closeIdx + close.length };
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
/**
|
|
339
|
+
* Greedily collect consecutive markers of the same type.
|
|
340
|
+
* E.g. {++a++}{++b++}{++c++} → "abc", advancing past all three.
|
|
341
|
+
*/
|
|
342
|
+
function collectConsecutive(text: string, i: number, open: string, close: string): { content: string; end: number } | null {
|
|
343
|
+
const first = extractMarker(text, i, open, close);
|
|
344
|
+
if (!first) return null;
|
|
345
|
+
|
|
346
|
+
let content = first.content;
|
|
347
|
+
let end = first.end;
|
|
348
|
+
|
|
349
|
+
while (end < text.length) {
|
|
350
|
+
const next = extractMarker(text, end, open, close);
|
|
351
|
+
if (!next) break;
|
|
352
|
+
content += next.content;
|
|
353
|
+
end = next.end;
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
return { content, end };
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
/**
|
|
360
|
+
* Scan text for adjacent CriticMarkup markers and:
|
|
361
|
+
* 1. Merge consecutive same-type markers: {++a++}{++b++} → {++ab++}
|
|
362
|
+
* 2. Merge adjacent del+ins or ins+del into substitutions: {--old--}{++new++} → {~~old~>new~~}
|
|
363
|
+
*
|
|
364
|
+
* Uses a linear scanner — no regex backtracking, no ambiguity.
|
|
365
|
+
*/
|
|
366
|
+
function mergeAdjacentMarkers(text: string): string {
|
|
367
|
+
let result = '';
|
|
368
|
+
let i = 0;
|
|
369
|
+
|
|
370
|
+
while (i < text.length) {
|
|
371
|
+
// --- Deletion block ---
|
|
372
|
+
if (text.startsWith('{--', i)) {
|
|
373
|
+
const del = collectConsecutive(text, i, '{--', '--}');
|
|
374
|
+
if (!del) { result += text[i]; i++; continue; }
|
|
375
|
+
|
|
376
|
+
// Skip spaces, then check for adjacent insertion
|
|
377
|
+
let j = del.end;
|
|
378
|
+
while (j < text.length && text[j] === ' ') j++;
|
|
379
|
+
|
|
380
|
+
const ins = collectConsecutive(text, j, '{++', '++}');
|
|
381
|
+
if (ins) {
|
|
382
|
+
// Merge into substitution
|
|
383
|
+
const trailing = del.content.endsWith(' ') || ins.content.endsWith(' ');
|
|
384
|
+
result += `{~~${del.content.trimEnd()}~>${ins.content.trimEnd()}~~}${trailing ? ' ' : ''}`;
|
|
385
|
+
i = ins.end;
|
|
386
|
+
} else {
|
|
387
|
+
// Emit merged deletion
|
|
388
|
+
result += `{--${del.content}--}`;
|
|
389
|
+
i = del.end;
|
|
390
|
+
}
|
|
391
|
+
continue;
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
// --- Insertion block ---
|
|
395
|
+
if (text.startsWith('{++', i)) {
|
|
396
|
+
const ins = collectConsecutive(text, i, '{++', '++}');
|
|
397
|
+
if (!ins) { result += text[i]; i++; continue; }
|
|
398
|
+
|
|
399
|
+
// Skip spaces, then check for adjacent deletion
|
|
400
|
+
let j = ins.end;
|
|
401
|
+
while (j < text.length && text[j] === ' ') j++;
|
|
402
|
+
|
|
403
|
+
const del = collectConsecutive(text, j, '{--', '--}');
|
|
404
|
+
if (del) {
|
|
405
|
+
// Merge into substitution (del → ins order in output)
|
|
406
|
+
const trailing = del.content.endsWith(' ') || ins.content.endsWith(' ');
|
|
407
|
+
result += `{~~${del.content.trimEnd()}~>${ins.content.trimEnd()}~~}${trailing ? ' ' : ''}`;
|
|
408
|
+
i = del.end;
|
|
409
|
+
} else {
|
|
410
|
+
// Emit merged insertion
|
|
411
|
+
result += `{++${ins.content}++}`;
|
|
412
|
+
i = ins.end;
|
|
413
|
+
}
|
|
414
|
+
continue;
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
result += text[i];
|
|
418
|
+
i++;
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
return result;
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
/**
|
|
425
|
+
* Extract plain text from Word XML with track changes preserved as CriticMarkup.
|
|
426
|
+
* This is a pandoc-free fallback that reads document.xml directly.
|
|
427
|
+
*
|
|
428
|
+
* Converts:
|
|
429
|
+
* <w:ins> content </w:ins> → {++text++}
|
|
430
|
+
* <w:del> content </w:del> → {--text--}
|
|
431
|
+
*
|
|
432
|
+
* Also detects headings (w:pStyle Heading1-6) and outputs markdown # syntax.
|
|
433
|
+
*
|
|
434
|
+
* @param docxPath - Path to Word document
|
|
435
|
+
* @returns Plain text with CriticMarkup and stats
|
|
436
|
+
*/
|
|
437
|
+
export async function extractPlainTextWithTrackChanges(docxPath: string): Promise<{
|
|
438
|
+
text: string;
|
|
439
|
+
hasTrackChanges: boolean;
|
|
440
|
+
stats: { insertions: number; deletions: number };
|
|
441
|
+
}> {
|
|
442
|
+
if (!fs.existsSync(docxPath)) {
|
|
443
|
+
throw new Error(`File not found: ${docxPath}`);
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
const zip = new AdmZip(docxPath);
|
|
447
|
+
const docEntry = zip.getEntry('word/document.xml');
|
|
448
|
+
|
|
449
|
+
if (!docEntry) {
|
|
450
|
+
throw new Error('Invalid docx: no document.xml');
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
let xml = docEntry.getData().toString('utf8');
|
|
454
|
+
let insertions = 0;
|
|
455
|
+
let deletions = 0;
|
|
456
|
+
|
|
457
|
+
// Use unique markers (null bytes) that won't appear in normal text
|
|
458
|
+
const INS_S = '\x00IS\x00';
|
|
459
|
+
const INS_E = '\x00IE\x00';
|
|
460
|
+
const DEL_S = '\x00DS\x00';
|
|
461
|
+
const DEL_E = '\x00DE\x00';
|
|
462
|
+
|
|
463
|
+
// Step 1: Replace <w:ins> with marker-wrapped text injected as <w:t>
|
|
464
|
+
// Whitespace-only insertions are kept as plain text (not markers) to preserve spacing.
|
|
465
|
+
xml = xml.replace(/<w:ins\b[^>]*>([\s\S]*?)<\/w:ins>/g, (_match, content: string) => {
|
|
466
|
+
const texts: string[] = [];
|
|
467
|
+
const tPat = /<w:t[^>]*>([^<]*)<\/w:t>/g;
|
|
468
|
+
let m: RegExpExecArray | null;
|
|
469
|
+
while ((m = tPat.exec(content)) !== null) {
|
|
470
|
+
texts.push(m[1] || '');
|
|
471
|
+
}
|
|
472
|
+
const text = texts.join('');
|
|
473
|
+
if (text.trim()) {
|
|
474
|
+
insertions++;
|
|
475
|
+
return `<w:r><w:t>${INS_S}${text}${INS_E}</w:t></w:r>`;
|
|
476
|
+
}
|
|
477
|
+
// Whitespace-only: preserve as plain text for spacing
|
|
478
|
+
if (text.length > 0) {
|
|
479
|
+
return `<w:r><w:t>${text}</w:t></w:r>`;
|
|
480
|
+
}
|
|
481
|
+
return '';
|
|
482
|
+
});
|
|
483
|
+
|
|
484
|
+
// Step 2: Replace <w:del> similarly (uses w:delText inside)
|
|
485
|
+
// Whitespace-only deletions are kept as plain text to preserve spacing.
|
|
486
|
+
xml = xml.replace(/<w:del\b[^>]*>([\s\S]*?)<\/w:del>/g, (_match, content: string) => {
|
|
487
|
+
const texts: string[] = [];
|
|
488
|
+
const tPat = /<w:delText[^>]*>([^<]*)<\/w:delText>|<w:t[^>]*>([^<]*)<\/w:t>/g;
|
|
489
|
+
let m: RegExpExecArray | null;
|
|
490
|
+
while ((m = tPat.exec(content)) !== null) {
|
|
491
|
+
texts.push(m[1] || m[2] || '');
|
|
492
|
+
}
|
|
493
|
+
const text = texts.join('');
|
|
494
|
+
if (text.trim()) {
|
|
495
|
+
deletions++;
|
|
496
|
+
return `<w:r><w:t>${DEL_S}${text}${DEL_E}</w:t></w:r>`;
|
|
497
|
+
}
|
|
498
|
+
// Whitespace-only: preserve as plain text for spacing
|
|
499
|
+
if (text.length > 0) {
|
|
500
|
+
return `<w:r><w:t>${text}</w:t></w:r>`;
|
|
501
|
+
}
|
|
502
|
+
return '';
|
|
503
|
+
});
|
|
504
|
+
|
|
505
|
+
// Step 3: Extract text paragraph by paragraph
|
|
506
|
+
const paragraphs: string[] = [];
|
|
507
|
+
const paraPattern = /<w:p\b[^>]*>([\s\S]*?)<\/w:p>/g;
|
|
508
|
+
let pm: RegExpExecArray | null;
|
|
509
|
+
|
|
510
|
+
while ((pm = paraPattern.exec(xml)) !== null) {
|
|
511
|
+
const paraXml = pm[1];
|
|
512
|
+
|
|
513
|
+
// Detect heading level from paragraph style
|
|
514
|
+
let headingLevel = 0;
|
|
515
|
+
const styleMatch = paraXml.match(/<w:pStyle\s+w:val="Heading(\d)"/i);
|
|
516
|
+
if (styleMatch && styleMatch[1]) {
|
|
517
|
+
headingLevel = parseInt(styleMatch[1], 10);
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
// Extract all <w:t> text in order
|
|
521
|
+
const texts: string[] = [];
|
|
522
|
+
const tPat = /<w:t[^>]*>([^<]*)<\/w:t>/g;
|
|
523
|
+
let tm: RegExpExecArray | null;
|
|
524
|
+
while ((tm = tPat.exec(paraXml)) !== null) {
|
|
525
|
+
texts.push(tm[1] || '');
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
let paraText = texts.join('');
|
|
529
|
+
|
|
530
|
+
// Decode XML entities
|
|
531
|
+
paraText = paraText
|
|
532
|
+
.replace(/&/g, '&')
|
|
533
|
+
.replace(/</g, '<')
|
|
534
|
+
.replace(/>/g, '>')
|
|
535
|
+
.replace(/"/g, '"')
|
|
536
|
+
.replace(/'/g, "'");
|
|
537
|
+
|
|
538
|
+
// Convert markers to CriticMarkup
|
|
539
|
+
paraText = paraText
|
|
540
|
+
.split(INS_S).join('{++')
|
|
541
|
+
.split(INS_E).join('++}')
|
|
542
|
+
.split(DEL_S).join('{--')
|
|
543
|
+
.split(DEL_E).join('--}');
|
|
544
|
+
|
|
545
|
+
// Merge adjacent del+ins (or ins+del) into substitutions.
|
|
546
|
+
// Uses a scanner instead of regex to avoid backtracking across marker boundaries.
|
|
547
|
+
paraText = mergeAdjacentMarkers(paraText);
|
|
548
|
+
|
|
549
|
+
// Collapse runs of multiple spaces into single space
|
|
550
|
+
paraText = paraText.replace(/ {2,}/g, ' ');
|
|
551
|
+
|
|
552
|
+
if (paraText.trim()) {
|
|
553
|
+
if (headingLevel > 0 && headingLevel <= 6) {
|
|
554
|
+
paragraphs.push('#'.repeat(headingLevel) + ' ' + paraText.trim());
|
|
555
|
+
} else {
|
|
556
|
+
paragraphs.push(paraText);
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
return {
|
|
562
|
+
text: paragraphs.join('\n\n'),
|
|
563
|
+
hasTrackChanges: insertions > 0 || deletions > 0,
|
|
564
|
+
stats: { insertions, deletions },
|
|
565
|
+
};
|
|
566
|
+
}
|
|
567
|
+
|
|
353
568
|
interface ExtractWithTrackChangesOptions {
|
|
354
569
|
mediaDir?: string;
|
|
355
570
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "docrev",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.9.3",
|
|
4
4
|
"description": "Academic paper revision workflow: Word ↔ Markdown round-trips, DOI validation, reviewer comments",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"types": "types/index.d.ts",
|
|
@@ -119,7 +119,6 @@
|
|
|
119
119
|
"dictionary-en": "^4.0.0",
|
|
120
120
|
"dictionary-en-gb": "^3.0.0",
|
|
121
121
|
"diff": "^8.0.2",
|
|
122
|
-
"mammoth": "^1.6.0",
|
|
123
122
|
"mathml-to-latex": "^1.5.0",
|
|
124
123
|
"nspell": "^2.1.5",
|
|
125
124
|
"pdf-lib": "^1.17.1",
|
|
@@ -1,88 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Apply buildup greying to PPTX slides.
|
|
3
|
-
Greys out all bullet items except the last one in each content placeholder.
|
|
4
|
-
"""
|
|
5
|
-
import zipfile
|
|
6
|
-
import sys
|
|
7
|
-
import re
|
|
8
|
-
import os
|
|
9
|
-
|
|
10
|
-
pptx_path = sys.argv[1]
|
|
11
|
-
temp_path = pptx_path + '.tmp'
|
|
12
|
-
|
|
13
|
-
GREY_COLOR = '888888'
|
|
14
|
-
|
|
15
|
-
def apply_grey_to_content_placeholder(text):
|
|
16
|
-
"""Find content placeholder and grey all paragraphs except the last"""
|
|
17
|
-
# Find content placeholder (idx="1") shape
|
|
18
|
-
content_match = re.search(r'(<p:sp>.*?<p:ph\s+idx="1"[^>]*/>.*?<p:txBody>)(.*?)(</p:txBody></p:sp>)', text, re.DOTALL)
|
|
19
|
-
|
|
20
|
-
if not content_match:
|
|
21
|
-
return text
|
|
22
|
-
|
|
23
|
-
before = content_match.group(1)
|
|
24
|
-
body_content = content_match.group(2)
|
|
25
|
-
after = content_match.group(3)
|
|
26
|
-
|
|
27
|
-
# Find all paragraphs in the body
|
|
28
|
-
para_pattern = r'(<a:p>.*?</a:p>)'
|
|
29
|
-
paras = list(re.finditer(para_pattern, body_content, re.DOTALL))
|
|
30
|
-
|
|
31
|
-
if len(paras) <= 1:
|
|
32
|
-
return text # Nothing to grey if 0 or 1 paragraph
|
|
33
|
-
|
|
34
|
-
# Grey out all but the last paragraph
|
|
35
|
-
new_body = body_content
|
|
36
|
-
offset = 0
|
|
37
|
-
|
|
38
|
-
for match in paras[:-1]: # All but last
|
|
39
|
-
start = match.start() + offset
|
|
40
|
-
end = match.end() + offset
|
|
41
|
-
para = match.group(0)
|
|
42
|
-
|
|
43
|
-
# Add grey color to all <a:r> (run) elements
|
|
44
|
-
def add_grey_to_run(run_match):
|
|
45
|
-
run = run_match.group(0)
|
|
46
|
-
# Find <a:rPr> and add solidFill
|
|
47
|
-
if '<a:solidFill>' in run:
|
|
48
|
-
# Replace existing color
|
|
49
|
-
run = re.sub(r'<a:srgbClr val="[^"]*"/>', f'<a:srgbClr val="{GREY_COLOR}"/>', run)
|
|
50
|
-
elif '<a:rPr />' in run:
|
|
51
|
-
# Replace self-closing rPr with one that has color
|
|
52
|
-
run = run.replace('<a:rPr />', f'<a:rPr><a:solidFill><a:srgbClr val="{GREY_COLOR}"/></a:solidFill></a:rPr>')
|
|
53
|
-
elif '<a:rPr>' in run:
|
|
54
|
-
# Add solidFill after opening rPr tag
|
|
55
|
-
run = re.sub(r'(<a:rPr[^>]*>)', r'\1<a:solidFill><a:srgbClr val="' + GREY_COLOR + r'"/></a:solidFill>', run)
|
|
56
|
-
elif '</a:rPr>' in run:
|
|
57
|
-
# Insert before closing rPr
|
|
58
|
-
run = run.replace('</a:rPr>', f'<a:solidFill><a:srgbClr val="{GREY_COLOR}"/></a:solidFill></a:rPr>')
|
|
59
|
-
else:
|
|
60
|
-
# No rPr at all, add it after <a:r>
|
|
61
|
-
run = run.replace('<a:r>', f'<a:r><a:rPr><a:solidFill><a:srgbClr val="{GREY_COLOR}"/></a:solidFill></a:rPr>')
|
|
62
|
-
return run
|
|
63
|
-
|
|
64
|
-
new_para = re.sub(r'<a:r>.*?</a:r>', add_grey_to_run, para, flags=re.DOTALL)
|
|
65
|
-
|
|
66
|
-
new_body = new_body[:start] + new_para + new_body[end:]
|
|
67
|
-
offset += len(new_para) - len(para)
|
|
68
|
-
|
|
69
|
-
# Reconstruct the full text
|
|
70
|
-
full_start = content_match.start()
|
|
71
|
-
full_end = content_match.end()
|
|
72
|
-
return text[:full_start] + before + new_body + after + text[full_end:]
|
|
73
|
-
|
|
74
|
-
with zipfile.ZipFile(pptx_path, 'r') as zin:
|
|
75
|
-
with zipfile.ZipFile(temp_path, 'w') as zout:
|
|
76
|
-
for item in zin.infolist():
|
|
77
|
-
content = zin.read(item.filename)
|
|
78
|
-
|
|
79
|
-
# Process slide XML files
|
|
80
|
-
if item.filename.startswith('ppt/slides/slide') and item.filename.endswith('.xml'):
|
|
81
|
-
text = content.decode('utf-8')
|
|
82
|
-
text = apply_grey_to_content_placeholder(text)
|
|
83
|
-
content = text.encode('utf-8')
|
|
84
|
-
|
|
85
|
-
zout.writestr(item, content)
|
|
86
|
-
|
|
87
|
-
os.replace(temp_path, pptx_path)
|
|
88
|
-
print('Buildup colors applied')
|