docrev 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,345 @@
1
+ /**
2
+ * Section handling - map between section .md files and combined documents
3
+ */
4
+
5
+ import * as fs from 'fs';
6
+ import * as path from 'path';
7
+ import * as yaml from 'js-yaml';
8
+
9
+ /**
10
+ * @typedef {Object} SectionConfig
11
+ * @property {string} header - Primary header text to match
12
+ * @property {string[]} [aliases] - Alternative header texts
13
+ * @property {number} [order] - Sort order for building
14
+ */
15
+
16
+ /**
17
+ * @typedef {Object<string, SectionConfig|string>} SectionsConfig
18
+ */
19
+
20
+ /**
21
+ * Default section order (common academic paper structure)
22
+ */
23
+ const DEFAULT_ORDER = [
24
+ 'abstract',
25
+ 'introduction',
26
+ 'background',
27
+ 'literature',
28
+ 'theory',
29
+ 'methods',
30
+ 'materials',
31
+ 'data',
32
+ 'results',
33
+ 'analysis',
34
+ 'discussion',
35
+ 'conclusion',
36
+ 'references',
37
+ 'appendix',
38
+ 'supplementary',
39
+ ];
40
+
41
+ /**
42
+ * Extract header from a markdown file
43
+ * @param {string} filePath
44
+ * @returns {string|null}
45
+ */
46
+ export function extractHeader(filePath) {
47
+ if (!fs.existsSync(filePath)) return null;
48
+
49
+ const content = fs.readFileSync(filePath, 'utf-8');
50
+ const lines = content.split('\n');
51
+
52
+ for (const line of lines) {
53
+ const match = line.match(/^#\s+(.+)$/);
54
+ if (match) {
55
+ return match[1].trim();
56
+ }
57
+ }
58
+
59
+ return null;
60
+ }
61
+
62
+ /**
63
+ * Generate sections.yaml from existing .md files
64
+ * @param {string} directory
65
+ * @param {string[]} [excludePatterns]
66
+ * @returns {object}
67
+ */
68
+ export function generateConfig(directory, excludePatterns = ['paper.md', 'README.md', 'CLAUDE.md']) {
69
+ const files = fs.readdirSync(directory).filter((f) => {
70
+ if (!f.endsWith('.md')) return false;
71
+ if (excludePatterns.some((p) => f.toLowerCase().includes(p.toLowerCase()))) return false;
72
+ return true;
73
+ });
74
+
75
+ const sections = {};
76
+
77
+ for (const file of files) {
78
+ const filePath = path.join(directory, file);
79
+ const header = extractHeader(filePath);
80
+ const baseName = path.basename(file, '.md').toLowerCase();
81
+
82
+ // Determine order based on common patterns
83
+ let order = DEFAULT_ORDER.findIndex((s) => baseName.includes(s));
84
+ if (order === -1) order = 999;
85
+
86
+ sections[file] = {
87
+ header: header || titleCase(baseName),
88
+ aliases: [],
89
+ order: order,
90
+ };
91
+ }
92
+
93
+ // Sort by order
94
+ const sorted = Object.entries(sections)
95
+ .sort((a, b) => a[1].order - b[1].order)
96
+ .reduce((acc, [k, v]) => {
97
+ acc[k] = v;
98
+ return acc;
99
+ }, {});
100
+
101
+ return {
102
+ version: 1,
103
+ description: 'Section configuration for rev import/split',
104
+ sections: sorted,
105
+ };
106
+ }
107
+
108
+ /**
109
+ * Convert string to title case
110
+ * @param {string} str
111
+ * @returns {string}
112
+ */
113
+ function titleCase(str) {
114
+ return str
115
+ .split(/[-_\s]+/)
116
+ .map((word) => word.charAt(0).toUpperCase() + word.slice(1))
117
+ .join(' ');
118
+ }
119
+
120
+ /**
121
+ * Load sections config from yaml file
122
+ * @param {string} configPath
123
+ * @returns {object}
124
+ */
125
+ export function loadConfig(configPath) {
126
+ const content = fs.readFileSync(configPath, 'utf-8');
127
+ const config = yaml.load(content);
128
+
129
+ // Normalize: convert string values to full config objects
130
+ const normalized = { ...config };
131
+ normalized.sections = {};
132
+
133
+ for (const [file, value] of Object.entries(config.sections || {})) {
134
+ if (typeof value === 'string') {
135
+ normalized.sections[file] = {
136
+ header: value,
137
+ aliases: [],
138
+ };
139
+ } else {
140
+ normalized.sections[file] = {
141
+ header: value.header,
142
+ aliases: value.aliases || [],
143
+ order: value.order,
144
+ };
145
+ }
146
+ }
147
+
148
+ return normalized;
149
+ }
150
+
151
+ /**
152
+ * Save sections config to yaml file
153
+ * @param {string} configPath
154
+ * @param {object} config
155
+ */
156
+ export function saveConfig(configPath, config) {
157
+ const yamlStr = yaml.dump(config, {
158
+ indent: 2,
159
+ lineWidth: 100,
160
+ quotingType: '"',
161
+ forceQuotes: false,
162
+ });
163
+ fs.writeFileSync(configPath, yamlStr, 'utf-8');
164
+ }
165
+
166
+ /**
167
+ * Match a heading to a section file
168
+ * @param {string} heading - Heading text from Word
169
+ * @param {object} sections - Sections config
170
+ * @returns {{file: string, config: SectionConfig}|null}
171
+ */
172
+ export function matchHeading(heading, sections) {
173
+ const normalizedHeading = heading.toLowerCase().trim();
174
+
175
+ for (const [file, config] of Object.entries(sections)) {
176
+ // Check primary header
177
+ if (config.header.toLowerCase().trim() === normalizedHeading) {
178
+ return { file, config };
179
+ }
180
+
181
+ // Check aliases
182
+ if (config.aliases) {
183
+ for (const alias of config.aliases) {
184
+ if (alias.toLowerCase().trim() === normalizedHeading) {
185
+ return { file, config };
186
+ }
187
+ }
188
+ }
189
+
190
+ // Fuzzy match: check if heading contains the key words
191
+ const headerWords = config.header.toLowerCase().split(/\s+/);
192
+ const headingWords = normalizedHeading.split(/\s+/);
193
+ const matchCount = headerWords.filter((w) => headingWords.includes(w)).length;
194
+ if (matchCount >= headerWords.length * 0.7) {
195
+ return { file, config };
196
+ }
197
+ }
198
+
199
+ return null;
200
+ }
201
+
202
+ /**
203
+ * Extract sections from Word document text
204
+ * @param {string} text - Extracted text from Word
205
+ * @param {object} sections - Sections config
206
+ * @returns {Array<{file: string, header: string, content: string, matched: boolean}>}
207
+ */
208
+ export function extractSectionsFromText(text, sections) {
209
+ const result = [];
210
+
211
+ // Find all headings (lines that look like headers - typically short, at start of "paragraph")
212
+ const paragraphs = text.split(/\n\n+/);
213
+ let currentSection = null;
214
+ let currentContent = [];
215
+
216
+ for (const para of paragraphs) {
217
+ const trimmed = para.trim();
218
+
219
+ // Detect if this paragraph is a heading
220
+ // Heuristics: short (< 100 chars), no periods, matches a known section
221
+ const isLikelyHeading = trimmed.length < 100 && !trimmed.includes('.') && trimmed.length > 0;
222
+
223
+ let matchedSection = null;
224
+ if (isLikelyHeading) {
225
+ matchedSection = matchHeading(trimmed, sections);
226
+ }
227
+
228
+ if (matchedSection) {
229
+ // Save previous section
230
+ if (currentSection) {
231
+ result.push({
232
+ file: currentSection.file,
233
+ header: currentSection.header,
234
+ content: currentContent.join('\n\n'),
235
+ matched: true,
236
+ });
237
+ }
238
+
239
+ currentSection = {
240
+ file: matchedSection.file,
241
+ header: trimmed,
242
+ };
243
+ currentContent = [];
244
+ } else {
245
+ currentContent.push(para);
246
+ }
247
+ }
248
+
249
+ // Save last section
250
+ if (currentSection) {
251
+ result.push({
252
+ file: currentSection.file,
253
+ header: currentSection.header,
254
+ content: currentContent.join('\n\n'),
255
+ matched: true,
256
+ });
257
+ }
258
+
259
+ return result;
260
+ }
261
+
262
+ /**
263
+ * Parse annotated paper.md and split back to section files
264
+ * @param {string} paperContent - Content of annotated paper.md
265
+ * @param {object} sections - Sections config
266
+ * @returns {Map<string, string>} - Map of filename → content
267
+ */
268
+ export function splitAnnotatedPaper(paperContent, sections) {
269
+ const result = new Map();
270
+
271
+ // Look for section markers: <!-- @section:filename.md -->
272
+ const markerPattern = /<!--\s*@section:(\S+\.md)\s*-->/g;
273
+ const markers = [...paperContent.matchAll(markerPattern)];
274
+
275
+ if (markers.length > 0) {
276
+ // Use markers
277
+ for (let i = 0; i < markers.length; i++) {
278
+ const marker = markers[i];
279
+ const file = marker[1];
280
+ const start = marker.index + marker[0].length;
281
+ const end = markers[i + 1]?.index || paperContent.length;
282
+
283
+ let content = paperContent.slice(start, end).trim();
284
+
285
+ // Remove trailing marker if present
286
+ content = content.replace(/<!--\s*@section:\S+\.md\s*-->$/, '').trim();
287
+
288
+ result.set(file, content);
289
+ }
290
+ } else {
291
+ // Fall back to header detection
292
+ const lines = paperContent.split('\n');
293
+ let currentFile = null;
294
+ let currentContent = [];
295
+
296
+ for (const line of lines) {
297
+ const headerMatch = line.match(/^#\s+(.+)$/);
298
+
299
+ if (headerMatch) {
300
+ // Save previous section
301
+ if (currentFile) {
302
+ result.set(currentFile, currentContent.join('\n').trim());
303
+ }
304
+
305
+ // Find matching section file
306
+ const heading = headerMatch[1].trim();
307
+ const match = matchHeading(heading, sections);
308
+
309
+ if (match) {
310
+ currentFile = match.file;
311
+ currentContent = [line];
312
+ } else {
313
+ // Unknown section - keep accumulating to previous
314
+ currentContent.push(line);
315
+ }
316
+ } else {
317
+ currentContent.push(line);
318
+ }
319
+ }
320
+
321
+ // Save last section
322
+ if (currentFile) {
323
+ result.set(currentFile, currentContent.join('\n').trim());
324
+ }
325
+ }
326
+
327
+ return result;
328
+ }
329
+
330
+ /**
331
+ * Get ordered list of section files from config
332
+ * @param {object} config
333
+ * @returns {string[]}
334
+ */
335
+ export function getOrderedSections(config) {
336
+ const entries = Object.entries(config.sections || {});
337
+
338
+ return entries
339
+ .sort((a, b) => {
340
+ const orderA = a[1].order ?? 999;
341
+ const orderB = b[1].order ?? 999;
342
+ return orderA - orderB;
343
+ })
344
+ .map(([file]) => file);
345
+ }
@@ -0,0 +1,305 @@
1
+ /**
2
+ * Built-in templates for project scaffolding
3
+ *
4
+ * Used by `rev new` command to create new paper projects
5
+ */
6
+
7
+ export const TEMPLATES = {
8
+ /**
9
+ * Standard academic paper structure
10
+ */
11
+ paper: {
12
+ name: 'Academic Paper',
13
+ description: 'Standard paper with introduction, methods, results, discussion',
14
+ files: {
15
+ 'rev.yaml': `# Paper configuration
16
+ title: "Your Paper Title"
17
+ authors:
18
+ - name: First Author
19
+ affiliation: Institution
20
+ email: author@example.com
21
+
22
+ # Section files in order
23
+ sections:
24
+ - introduction.md
25
+ - methods.md
26
+ - results.md
27
+ - discussion.md
28
+
29
+ # Bibliography (optional)
30
+ bibliography: references.bib
31
+ csl: null # uses default CSL
32
+
33
+ # Cross-reference settings
34
+ crossref:
35
+ figureTitle: Figure
36
+ tableTitle: Table
37
+ figPrefix: [Fig., Figs.]
38
+ tblPrefix: [Table, Tables]
39
+ linkReferences: true
40
+
41
+ # PDF output settings
42
+ pdf:
43
+ documentclass: article
44
+ fontsize: 12pt
45
+ geometry: margin=1in
46
+ linestretch: 1.5
47
+ numbersections: false
48
+
49
+ # Word output settings
50
+ docx:
51
+ reference: null # path to reference.docx template
52
+ keepComments: true
53
+ `,
54
+ 'introduction.md': `# Introduction
55
+
56
+ Your introduction goes here. Use dynamic figure references like @fig:example.
57
+
58
+ `,
59
+ 'methods.md': `# Methods
60
+
61
+ ## Study Design
62
+
63
+ Describe your methodology here.
64
+
65
+ ## Data Analysis
66
+
67
+ Reference tables with @tbl:summary.
68
+
69
+ `,
70
+ 'results.md': `# Results
71
+
72
+ Present your findings. See @fig:results for the main analysis.
73
+
74
+ ![Example figure caption](figures/placeholder.png){#fig:results}
75
+
76
+ `,
77
+ 'discussion.md': `# Discussion
78
+
79
+ Interpret your results here.
80
+
81
+ ## Conclusions
82
+
83
+ Summarize key findings.
84
+
85
+ `,
86
+ 'references.bib': `@article{example2024,
87
+ author = {Author, A. and Coauthor, B.},
88
+ title = {An Example Paper Title},
89
+ journal = {Journal Name},
90
+ year = {2024},
91
+ volume = {1},
92
+ pages = {1--10}
93
+ }
94
+ `,
95
+ '.gitignore': `# Build outputs
96
+ *.pdf
97
+ *.docx
98
+ *.tex
99
+ paper.md
100
+ .paper-*.md
101
+
102
+ # System
103
+ .DS_Store
104
+ `,
105
+ },
106
+ directories: ['figures'],
107
+ },
108
+
109
+ /**
110
+ * Minimal single-section document
111
+ */
112
+ minimal: {
113
+ name: 'Minimal',
114
+ description: 'Single document with basic config',
115
+ files: {
116
+ 'rev.yaml': `title: "Document Title"
117
+ authors: []
118
+ sections:
119
+ - content.md
120
+ `,
121
+ 'content.md': `# Your Document
122
+
123
+ Write your content here.
124
+
125
+ `,
126
+ },
127
+ directories: [],
128
+ },
129
+
130
+ /**
131
+ * Thesis chapter structure
132
+ */
133
+ thesis: {
134
+ name: 'Thesis Chapter',
135
+ description: 'Thesis-style with abstract, sections, appendix',
136
+ files: {
137
+ 'rev.yaml': `title: "Chapter Title"
138
+ authors:
139
+ - name: Your Name
140
+ affiliation: University
141
+
142
+ sections:
143
+ - abstract.md
144
+ - introduction.md
145
+ - literature.md
146
+ - methods.md
147
+ - results.md
148
+ - discussion.md
149
+ - conclusion.md
150
+ - appendix.md
151
+
152
+ bibliography: references.bib
153
+
154
+ pdf:
155
+ documentclass: report
156
+ fontsize: 11pt
157
+ geometry: "margin=1in"
158
+ linestretch: 2
159
+ numbersections: true
160
+ `,
161
+ 'abstract.md': `# Abstract
162
+
163
+ Brief summary of the chapter (150-300 words).
164
+
165
+ `,
166
+ 'introduction.md': `# Introduction
167
+
168
+ Background and research questions.
169
+
170
+ `,
171
+ 'literature.md': `# Literature Review
172
+
173
+ Review of relevant prior work.
174
+
175
+ `,
176
+ 'methods.md': `# Materials and Methods
177
+
178
+ Detailed methodology.
179
+
180
+ `,
181
+ 'results.md': `# Results
182
+
183
+ Findings and analysis.
184
+
185
+ `,
186
+ 'discussion.md': `# Discussion
187
+
188
+ Interpretation of results.
189
+
190
+ `,
191
+ 'conclusion.md': `# Conclusion
192
+
193
+ Summary and implications.
194
+
195
+ `,
196
+ 'appendix.md': `# Appendix
197
+
198
+ ## Supplementary Materials
199
+
200
+ Additional details here.
201
+
202
+ `,
203
+ 'references.bib': ``,
204
+ '.gitignore': `*.pdf
205
+ *.docx
206
+ *.tex
207
+ paper.md
208
+ .paper-*.md
209
+ .DS_Store
210
+ `,
211
+ },
212
+ directories: ['figures', 'tables'],
213
+ },
214
+
215
+ /**
216
+ * Review article structure
217
+ */
218
+ review: {
219
+ name: 'Review Article',
220
+ description: 'Literature review or synthesis paper',
221
+ files: {
222
+ 'rev.yaml': `title: "Review Title"
223
+ authors:
224
+ - name: Author Name
225
+ affiliation: Institution
226
+
227
+ sections:
228
+ - introduction.md
229
+ - section1.md
230
+ - section2.md
231
+ - section3.md
232
+ - synthesis.md
233
+ - conclusion.md
234
+
235
+ bibliography: references.bib
236
+
237
+ crossref:
238
+ figureTitle: Figure
239
+ tableTitle: Table
240
+ figPrefix: [Fig., Figs.]
241
+ tblPrefix: [Table, Tables]
242
+ `,
243
+ 'introduction.md': `# Introduction
244
+
245
+ Scope and objectives of the review.
246
+
247
+ `,
248
+ 'section1.md': `# Theme One
249
+
250
+ First major theme or topic.
251
+
252
+ `,
253
+ 'section2.md': `# Theme Two
254
+
255
+ Second major theme.
256
+
257
+ `,
258
+ 'section3.md': `# Theme Three
259
+
260
+ Third major theme.
261
+
262
+ `,
263
+ 'synthesis.md': `# Synthesis
264
+
265
+ Integration of themes and emerging patterns.
266
+
267
+ `,
268
+ 'conclusion.md': `# Conclusion and Future Directions
269
+
270
+ Key takeaways and research gaps.
271
+
272
+ `,
273
+ 'references.bib': ``,
274
+ '.gitignore': `*.pdf
275
+ *.docx
276
+ *.tex
277
+ paper.md
278
+ .paper-*.md
279
+ .DS_Store
280
+ `,
281
+ },
282
+ directories: ['figures'],
283
+ },
284
+ };
285
+
286
+ /**
287
+ * Get template by name
288
+ * @param {string} name
289
+ * @returns {object|null}
290
+ */
291
+ export function getTemplate(name) {
292
+ return TEMPLATES[name.toLowerCase()] || null;
293
+ }
294
+
295
+ /**
296
+ * List available templates
297
+ * @returns {Array<{id: string, name: string, description: string}>}
298
+ */
299
+ export function listTemplates() {
300
+ return Object.entries(TEMPLATES).map(([id, template]) => ({
301
+ id,
302
+ name: template.name,
303
+ description: template.description,
304
+ }));
305
+ }
package/package.json ADDED
@@ -0,0 +1,43 @@
1
+ {
2
+ "name": "docrev",
3
+ "version": "0.2.0",
4
+ "description": "Academic paper revision workflow: Word ↔ Markdown round-trips, DOI validation, reviewer comments",
5
+ "type": "module",
6
+ "bin": {
7
+ "rev": "bin/rev.js"
8
+ },
9
+ "scripts": {
10
+ "build": "echo 'No build needed'",
11
+ "test": "node bin/rev.js --help"
12
+ },
13
+ "repository": {
14
+ "type": "git",
15
+ "url": "git+https://github.com/gcol33/rev.git"
16
+ },
17
+ "bugs": {
18
+ "url": "https://github.com/gcol33/rev/issues"
19
+ },
20
+ "homepage": "https://github.com/gcol33/rev#readme",
21
+ "keywords": [
22
+ "markdown",
23
+ "word",
24
+ "docx",
25
+ "track-changes",
26
+ "comments",
27
+ "academic",
28
+ "writing",
29
+ "pandoc",
30
+ "criticmarkup"
31
+ ],
32
+ "author": "Gilles Colling",
33
+ "license": "MIT",
34
+ "dependencies": {
35
+ "adm-zip": "^0.5.16",
36
+ "chalk": "^5.3.0",
37
+ "commander": "^12.0.0",
38
+ "diff": "^8.0.2",
39
+ "js-yaml": "^4.1.1",
40
+ "mammoth": "^1.6.0",
41
+ "xml2js": "^0.6.2"
42
+ }
43
+ }