@demig0d2/skills 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +111 -0
- package/bin/cli.js +313 -0
- package/package.json +44 -0
- package/skills/book-writer/SKILL.md +1396 -0
- package/skills/book-writer/references/kdp_specs.md +139 -0
- package/skills/book-writer/scripts/kdp_check.py +255 -0
- package/skills/book-writer/scripts/toc_extract.py +151 -0
- package/skills/book-writer/scripts/word_count.py +196 -0
- package/skills/chapter-auditor/SKILL.md +231 -0
- package/skills/chapter-auditor/scripts/score_report.py +237 -0
- package/skills/concept-expander/SKILL.md +170 -0
- package/skills/concept-expander/scripts/validate_concept.py +255 -0
- package/skills/continuity-tracker/SKILL.md +251 -0
- package/skills/continuity-tracker/references/log_schema.md +149 -0
- package/skills/continuity-tracker/scripts/conflict_check.py +179 -0
- package/skills/continuity-tracker/scripts/log_manager.py +258 -0
- package/skills/humanizer/SKILL.md +632 -0
- package/skills/humanizer/references/patterns_quick_ref.md +71 -0
- package/skills/humanizer/scripts/dna_scan.py +168 -0
- package/skills/humanizer/scripts/scan_ai_patterns.py +279 -0
- package/skills/overhaul/SKILL.md +697 -0
- package/skills/overhaul/references/upgrade_checklist.md +81 -0
- package/skills/overhaul/scripts/changelog_gen.py +183 -0
- package/skills/overhaul/scripts/skill_parser.py +265 -0
- package/skills/overhaul/scripts/version_bump.py +128 -0
- package/skills/research-aggregator/SKILL.md +194 -0
- package/skills/research-aggregator/references/thinkers_reference.md +104 -0
- package/skills/research-aggregator/scripts/bank_formatter.py +206 -0
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# KDP Publishing Specifications Reference
|
|
2
|
+
|
|
3
|
+
Complete KDP interior formatting requirements for print-on-demand.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Trim Sizes (DXA units — 1440 DXA = 1 inch)
|
|
8
|
+
|
|
9
|
+
| Trim | Width DXA | Height DXA | Common Use |
|
|
10
|
+
|----------|-----------|------------|------------|
|
|
11
|
+
| 5×8 | 7,200 | 11,520 | Compact non-fiction, poetry |
|
|
12
|
+
| 5.5×8.5 | 7,920 | 12,240 | Standard non-fiction |
|
|
13
|
+
| 6×9 | 8,640 | 12,960 | Most common for trade paperback |
|
|
14
|
+
| 6.14×9.21| 8,842 | 13,262 | Slightly taller trade |
|
|
15
|
+
| 7×10 | 10,080 | 14,400 | Workbooks, textbooks |
|
|
16
|
+
| 8×10 | 11,520 | 14,400 | Illustrated books |
|
|
17
|
+
| 8.5×11 | 12,240 | 15,840 | Workbooks, journals |
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## Minimum Margins by Page Count
|
|
22
|
+
|
|
23
|
+
KDP requires larger inside margins as page count increases (to account for binding).
|
|
24
|
+
|
|
25
|
+
| Page Count | Inside (Gutter) | Outside | Top | Bottom |
|
|
26
|
+
|------------|-----------------|---------|--------|--------|
|
|
27
|
+
| 24–150 | 0.375" | 0.25" | 0.25" | 0.25" |
|
|
28
|
+
| 151–300 | 0.5" | 0.25" | 0.25" | 0.25" |
|
|
29
|
+
| 301–500 | 0.625" | 0.25" | 0.25" | 0.25" |
|
|
30
|
+
| 501–700 | 0.75" | 0.25" | 0.25" | 0.25" |
|
|
31
|
+
| 701–828 | 0.875" | 0.25" | 0.25" | 0.25" |
|
|
32
|
+
|
|
33
|
+
**Vivid's standard margins (6×9, 200–300 pages):**
|
|
34
|
+
|
|
35
|
+
| Side | Inches | DXA |
|
|
36
|
+
|---------|--------|-------|
|
|
37
|
+
| Inside | 0.75" | 1,080 |
|
|
38
|
+
| Outside | 0.6" | 864 |
|
|
39
|
+
| Top | 0.75" | 1,080 |
|
|
40
|
+
| Bottom | 0.6" | 864 |
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## Typography Standards
|
|
45
|
+
|
|
46
|
+
| Element | Font | Size | Weight | Spacing |
|
|
47
|
+
|------------------|----------|------|--------|---------|
|
|
48
|
+
| Body text | Georgia | 11pt | Normal | 1.15 |
|
|
49
|
+
| Chapter title | Georgia | 18pt | Bold | Before: 0.5" After: 0.33" |
|
|
50
|
+
| Section heading | Georgia | 14pt | Bold | Before: 0.33" After: 0.17"|
|
|
51
|
+
| Part title | Georgia | 20pt | Bold | Centered, full page |
|
|
52
|
+
| Epigraph | Georgia | 10pt | Italic | Centered, 0.5" margins |
|
|
53
|
+
| Footer/Header | Georgia | 9pt | Normal | — |
|
|
54
|
+
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
## Body Text Layout
|
|
58
|
+
|
|
59
|
+
- **Alignment**: Justified
|
|
60
|
+
- **First paragraph** after heading: No indent
|
|
61
|
+
- **Subsequent paragraphs**: First-line indent 0.25" (360 DXA)
|
|
62
|
+
- **Line spacing**: 1.15 (line: 276, lineRule: auto in docx-js)
|
|
63
|
+
- **Paragraph spacing**: 0 before, 0 after (spacing handled by line indent)
|
|
64
|
+
|
|
65
|
+
---
|
|
66
|
+
|
|
67
|
+
## Headers and Footers
|
|
68
|
+
|
|
69
|
+
- **Even pages (verso/left)**: Book title, left-aligned
|
|
70
|
+
- **Odd pages (recto/right)**: Chapter title, right-aligned
|
|
71
|
+
- **Chapter first page**: No header (suppress)
|
|
72
|
+
- **Part divider pages**: No header, no footer
|
|
73
|
+
- **Page numbers**: Bottom outside (left on even, right on odd)
|
|
74
|
+
- **Start page numbering**: From Chapter 1 (front matter uses Roman numerals or no numbers)
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
## Front Matter Order (standard KDP)
|
|
79
|
+
|
|
80
|
+
1. Half-title page (title only)
|
|
81
|
+
2. Also by the author (optional)
|
|
82
|
+
3. Title page (title + subtitle + author)
|
|
83
|
+
4. Copyright page
|
|
84
|
+
5. Dedication
|
|
85
|
+
6. Table of Contents
|
|
86
|
+
7. Foreword / Preface (if any)
|
|
87
|
+
8. Introduction
|
|
88
|
+
|
|
89
|
+
## Back Matter Order
|
|
90
|
+
|
|
91
|
+
1. Epilogue / Conclusion
|
|
92
|
+
2. Acknowledgments
|
|
93
|
+
3. About the Author
|
|
94
|
+
4. Also by the Author (optional)
|
|
95
|
+
5. Index (non-fiction)
|
|
96
|
+
6. Bibliography (academic)
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
## Image Requirements
|
|
101
|
+
|
|
102
|
+
| Type | Minimum DPI | Color Mode | Format |
|
|
103
|
+
|-------|-------------|------------|--------|
|
|
104
|
+
| B&W | 300 DPI | Grayscale | PNG/TIFF |
|
|
105
|
+
| Color | 300 DPI | RGB | PNG/TIFF |
|
|
106
|
+
| Cover | 300 DPI | RGB | JPG/TIFF |
|
|
107
|
+
|
|
108
|
+
---
|
|
109
|
+
|
|
110
|
+
## File Requirements
|
|
111
|
+
|
|
112
|
+
- **Format**: DOCX (recommended) or PDF
|
|
113
|
+
- **Max file size**: 650 MB
|
|
114
|
+
- **Fonts**: Must be embedded or standard system fonts
|
|
115
|
+
- **No password protection**
|
|
116
|
+
- **No form fields**
|
|
117
|
+
|
|
118
|
+
---
|
|
119
|
+
|
|
120
|
+
## Page Count Estimator
|
|
121
|
+
|
|
122
|
+
At 11pt Georgia, 1.15 spacing, 6×9 with standard margins:
|
|
123
|
+
- ~250–280 words per page
|
|
124
|
+
- 20,000 words ≈ 75–80 pages
|
|
125
|
+
- 40,000 words ≈ 145–160 pages
|
|
126
|
+
- 60,000 words ≈ 215–240 pages
|
|
127
|
+
- 80,000 words ≈ 290–320 pages
|
|
128
|
+
|
|
129
|
+
---
|
|
130
|
+
|
|
131
|
+
## Common KDP Rejection Reasons
|
|
132
|
+
|
|
133
|
+
1. Margins too small (especially inside/gutter)
|
|
134
|
+
2. Images below 300 DPI
|
|
135
|
+
3. Fonts not embedded
|
|
136
|
+
4. Blank pages at unexpected locations
|
|
137
|
+
5. File size over 650 MB
|
|
138
|
+
6. Page size doesn't match trim size selected at upload
|
|
139
|
+
7. Header/footer content in the margin bleed area
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
kdp_check.py — KDP pre-flight checker for manuscript DOCX files
|
|
4
|
+
|
|
5
|
+
Validates a DOCX manuscript against KDP's technical requirements
|
|
6
|
+
before upload. Checks margins, page size, fonts, images, and structure.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python kdp_check.py <manuscript.docx>
|
|
10
|
+
python kdp_check.py <manuscript.docx> --trim 6x9
|
|
11
|
+
python kdp_check.py <manuscript.docx> --json
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import sys
|
|
15
|
+
import json
|
|
16
|
+
import zipfile
|
|
17
|
+
import re
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
|
|
20
|
+
# ─── KDP Requirements ─────────────────────────────────────────────────────────
|
|
21
|
+
|
|
22
|
+
# Trim sizes in EMU (914400 EMU = 1 inch)
|
|
23
|
+
TRIM_SIZES = {
|
|
24
|
+
"5x8": {"w": 4572000, "h": 7315200},
|
|
25
|
+
"6x9": {"w": 5486400, "h": 8229600},
|
|
26
|
+
"8.5x11": {"w": 7772400, "h": 10058400},
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
# Minimum margins in EMU (KDP requires at least 0.5" outside, 0.625" inside)
|
|
30
|
+
MIN_MARGINS = {
|
|
31
|
+
"outside": 457200, # 0.5 inch
|
|
32
|
+
"inside": 571500, # 0.625 inch
|
|
33
|
+
"top": 457200,
|
|
34
|
+
"bottom": 457200,
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
# KDP safe fonts (always render correctly)
|
|
38
|
+
SAFE_FONTS = {
|
|
39
|
+
"georgia", "times new roman", "garamond", "palatino linotype",
|
|
40
|
+
"arial", "helvetica", "calibri", "cambria", "courier new",
|
|
41
|
+
"book antiqua", "century", "trebuchet ms",
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
KDP_MIN_IMAGE_DPI = 300
|
|
45
|
+
KDP_MAX_FILE_SIZE_MB = 650
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def check_file_size(filepath: str) -> dict:
|
|
49
|
+
size_mb = Path(filepath).stat().st_size / (1024 * 1024)
|
|
50
|
+
return {
|
|
51
|
+
"check": "File size",
|
|
52
|
+
"value": f"{size_mb:.1f} MB",
|
|
53
|
+
"pass": size_mb <= KDP_MAX_FILE_SIZE_MB,
|
|
54
|
+
"requirement": f"≤ {KDP_MAX_FILE_SIZE_MB} MB",
|
|
55
|
+
"note": "" if size_mb <= KDP_MAX_FILE_SIZE_MB else "File too large for KDP upload",
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def extract_docx_xml(filepath: str) -> dict:
|
|
60
|
+
"""Extract relevant XML files from DOCX."""
|
|
61
|
+
xml_files = {}
|
|
62
|
+
try:
|
|
63
|
+
with zipfile.ZipFile(filepath, "r") as z:
|
|
64
|
+
for name in z.namelist():
|
|
65
|
+
if name in ("word/document.xml", "word/settings.xml",
|
|
66
|
+
"word/styles.xml", "[Content_Types].xml"):
|
|
67
|
+
xml_files[name] = z.read(name).decode("utf-8", errors="replace")
|
|
68
|
+
except zipfile.BadZipFile:
|
|
69
|
+
print(f"Error: Not a valid DOCX file: {filepath}", file=sys.stderr)
|
|
70
|
+
sys.exit(1)
|
|
71
|
+
return xml_files
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def check_page_size(xml_files: dict, trim_key: str = None) -> dict:
|
|
75
|
+
doc_xml = xml_files.get("word/document.xml", "")
|
|
76
|
+
# Look for w:pgSz element
|
|
77
|
+
match = re.search(r'<w:pgSz[^>]+w:w="(\d+)"[^>]+w:h="(\d+)"', doc_xml)
|
|
78
|
+
if not match:
|
|
79
|
+
match = re.search(r'<w:pgSz[^>]+w:h="(\d+)"[^>]+w:w="(\d+)"', doc_xml)
|
|
80
|
+
if match:
|
|
81
|
+
w_dxa, h_dxa = int(match.group(2)), int(match.group(1))
|
|
82
|
+
else:
|
|
83
|
+
return {
|
|
84
|
+
"check": "Page size",
|
|
85
|
+
"pass": None,
|
|
86
|
+
"value": "Not detected",
|
|
87
|
+
"requirement": "Must match chosen trim size",
|
|
88
|
+
"note": "Could not read page dimensions from document",
|
|
89
|
+
}
|
|
90
|
+
else:
|
|
91
|
+
w_dxa, h_dxa = int(match.group(1)), int(match.group(2))
|
|
92
|
+
|
|
93
|
+
# DXA to inches (1440 DXA = 1 inch)
|
|
94
|
+
w_in = w_dxa / 1440
|
|
95
|
+
h_in = h_dxa / 1440
|
|
96
|
+
|
|
97
|
+
if trim_key and trim_key in TRIM_SIZES:
|
|
98
|
+
expected = TRIM_SIZES[trim_key]
|
|
99
|
+
# Convert EMU to DXA (914400 EMU = 1 inch = 1440 DXA, so EMU/635 = DXA)
|
|
100
|
+
exp_w_dxa = expected["w"] // 635
|
|
101
|
+
exp_h_dxa = expected["h"] // 635
|
|
102
|
+
passed = abs(w_dxa - exp_w_dxa) < 100 and abs(h_dxa - exp_h_dxa) < 100
|
|
103
|
+
req = f"{trim_key} ({exp_w_dxa/1440:.2f}\" × {exp_h_dxa/1440:.2f}\")"
|
|
104
|
+
else:
|
|
105
|
+
passed = any(
|
|
106
|
+
abs(w_dxa - ts["w"] // 635) < 100
|
|
107
|
+
for ts in TRIM_SIZES.values()
|
|
108
|
+
)
|
|
109
|
+
req = "One of: 5×8, 6×9, 8.5×11"
|
|
110
|
+
|
|
111
|
+
return {
|
|
112
|
+
"check": "Page size",
|
|
113
|
+
"pass": passed,
|
|
114
|
+
"value": f"{w_in:.2f}\" × {h_in:.2f}\"",
|
|
115
|
+
"requirement": req,
|
|
116
|
+
"note": "" if passed else f"Page size doesn't match expected trim",
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def check_margins(xml_files: dict) -> dict:
|
|
121
|
+
doc_xml = xml_files.get("word/document.xml", "")
|
|
122
|
+
match = re.search(
|
|
123
|
+
r'<w:pgMar[^>]+w:top="(\d+)"[^>]+w:right="(\d+)"[^>]+w:bottom="(\d+)"[^>]+w:left="(\d+)"',
|
|
124
|
+
doc_xml,
|
|
125
|
+
)
|
|
126
|
+
if not match:
|
|
127
|
+
return {
|
|
128
|
+
"check": "Margins",
|
|
129
|
+
"pass": None,
|
|
130
|
+
"value": "Not detected",
|
|
131
|
+
"requirement": "Min 0.5\" outside, 0.625\" inside",
|
|
132
|
+
"note": "Could not read margins from document",
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
top, right, bottom, left = (int(match.group(i)) for i in range(1, 5))
|
|
136
|
+
|
|
137
|
+
# KDP: inside margin (left for recto) must be at least 864 DXA (0.6")
|
|
138
|
+
# outside margin must be at least 720 DXA (0.5")
|
|
139
|
+
min_outside = 720 # DXA
|
|
140
|
+
min_inside = 864 # DXA
|
|
141
|
+
|
|
142
|
+
issues = []
|
|
143
|
+
if top < min_outside:
|
|
144
|
+
issues.append(f"Top margin {top/1440:.2f}\" < 0.5\"")
|
|
145
|
+
if bottom < min_outside:
|
|
146
|
+
issues.append(f"Bottom margin {bottom/1440:.2f}\" < 0.5\"")
|
|
147
|
+
if left < min_inside:
|
|
148
|
+
issues.append(f"Left margin {left/1440:.2f}\" < 0.6\"")
|
|
149
|
+
if right < min_outside:
|
|
150
|
+
issues.append(f"Right margin {right/1440:.2f}\" < 0.5\"")
|
|
151
|
+
|
|
152
|
+
return {
|
|
153
|
+
"check": "Margins",
|
|
154
|
+
"pass": len(issues) == 0,
|
|
155
|
+
"value": f"T:{top/1440:.2f}\" R:{right/1440:.2f}\" B:{bottom/1440:.2f}\" L:{left/1440:.2f}\"",
|
|
156
|
+
"requirement": "Min 0.5\" all sides, 0.6\" inside",
|
|
157
|
+
"note": "; ".join(issues) if issues else "",
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def check_fonts(xml_files: dict) -> dict:
|
|
162
|
+
styles_xml = xml_files.get("word/styles.xml", "")
|
|
163
|
+
fonts_found = set(re.findall(r'w:name="([^"]+)"', styles_xml))
|
|
164
|
+
# Filter to font-like names (not style names)
|
|
165
|
+
font_names = {f.lower() for f in fonts_found if len(f) > 3 and " " not in f or f.lower() in SAFE_FONTS}
|
|
166
|
+
unsafe = [f for f in font_names if f not in SAFE_FONTS and len(f) > 3]
|
|
167
|
+
|
|
168
|
+
return {
|
|
169
|
+
"check": "Fonts",
|
|
170
|
+
"pass": len(unsafe) == 0,
|
|
171
|
+
"value": f"{len(font_names)} font(s) detected",
|
|
172
|
+
"requirement": "Use standard embeddable fonts",
|
|
173
|
+
"note": f"Possibly non-standard: {', '.join(list(unsafe)[:3])}" if unsafe else "",
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def check_content_types(xml_files: dict) -> dict:
|
|
178
|
+
ct_xml = xml_files.get("[Content_Types].xml", "")
|
|
179
|
+
has_rels = "relationships" in ct_xml.lower()
|
|
180
|
+
has_doc = "wordprocessingml" in ct_xml.lower()
|
|
181
|
+
return {
|
|
182
|
+
"check": "Document structure",
|
|
183
|
+
"pass": has_rels and has_doc,
|
|
184
|
+
"value": "Valid DOCX structure" if (has_rels and has_doc) else "Invalid structure",
|
|
185
|
+
"requirement": "Valid OOXML structure",
|
|
186
|
+
"note": "" if (has_rels and has_doc) else "Document may be corrupted",
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def print_report(checks: list, filepath: str):
|
|
191
|
+
passed = sum(1 for c in checks if c["pass"] is True)
|
|
192
|
+
failed = sum(1 for c in checks if c["pass"] is False)
|
|
193
|
+
unknown = sum(1 for c in checks if c["pass"] is None)
|
|
194
|
+
|
|
195
|
+
print(f"\n{'═' * 60}")
|
|
196
|
+
print(f" KDP PRE-FLIGHT CHECK")
|
|
197
|
+
print(f" File: {filepath}")
|
|
198
|
+
print(f"{'═' * 60}")
|
|
199
|
+
print(f" {'CHECK':<24} {'STATUS':<16} {'VALUE':<22} NOTE")
|
|
200
|
+
print(f" {'─' * 56}")
|
|
201
|
+
|
|
202
|
+
for c in checks:
|
|
203
|
+
if c["pass"] is True:
|
|
204
|
+
status = "✓ PASS"
|
|
205
|
+
elif c["pass"] is False:
|
|
206
|
+
status = "✗ FAIL"
|
|
207
|
+
else:
|
|
208
|
+
status = "? UNKNOWN"
|
|
209
|
+
note = c.get("note", "")[:30]
|
|
210
|
+
print(f" {c['check']:<24} {status:<16} {c['value']:<22} {note}")
|
|
211
|
+
|
|
212
|
+
print(f"\n {'─' * 56}")
|
|
213
|
+
print(f" Result: {passed} passed {failed} failed {unknown} unknown")
|
|
214
|
+
if failed == 0 and unknown == 0:
|
|
215
|
+
print(f" ✓ Ready for KDP upload.")
|
|
216
|
+
elif failed > 0:
|
|
217
|
+
print(f" ✗ Fix {failed} issue(s) before uploading.")
|
|
218
|
+
print(f"\n{'═' * 60}\n")
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
# ─── Main ─────────────────────────────────────────────────────────────────────
|
|
222
|
+
|
|
223
|
+
if __name__ == "__main__":
|
|
224
|
+
if len(sys.argv) < 2:
|
|
225
|
+
print("Usage: python kdp_check.py <file.docx> [--trim 5x8|6x9|8.5x11] [--json]")
|
|
226
|
+
sys.exit(1)
|
|
227
|
+
|
|
228
|
+
filepath = sys.argv[1]
|
|
229
|
+
trim_key = None
|
|
230
|
+
mode = "--report"
|
|
231
|
+
|
|
232
|
+
for i, arg in enumerate(sys.argv[2:], 2):
|
|
233
|
+
if arg == "--trim" and i + 1 < len(sys.argv):
|
|
234
|
+
trim_key = sys.argv[i + 1]
|
|
235
|
+
elif arg == "--json":
|
|
236
|
+
mode = "--json"
|
|
237
|
+
|
|
238
|
+
if not Path(filepath).exists():
|
|
239
|
+
print(f"Error: File not found: {filepath}", file=sys.stderr)
|
|
240
|
+
sys.exit(1)
|
|
241
|
+
|
|
242
|
+
xml_files = extract_docx_xml(filepath)
|
|
243
|
+
|
|
244
|
+
checks = [
|
|
245
|
+
check_file_size(filepath),
|
|
246
|
+
check_page_size(xml_files, trim_key),
|
|
247
|
+
check_margins(xml_files),
|
|
248
|
+
check_fonts(xml_files),
|
|
249
|
+
check_content_types(xml_files),
|
|
250
|
+
]
|
|
251
|
+
|
|
252
|
+
if mode == "--json":
|
|
253
|
+
print(json.dumps({"file": filepath, "checks": checks}, indent=2))
|
|
254
|
+
else:
|
|
255
|
+
print_report(checks, filepath)
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
toc_extract.py — Extract table of contents from a manuscript
|
|
4
|
+
|
|
5
|
+
Detects chapter headings, part dividers, and section structure.
|
|
6
|
+
Outputs formatted TOC or JSON.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python toc_extract.py <manuscript_file>
|
|
10
|
+
python toc_extract.py <manuscript_file> --json
|
|
11
|
+
python toc_extract.py <manuscript_file> --markdown
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import sys
|
|
15
|
+
import re
|
|
16
|
+
import json
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
PART_PATTERN = re.compile(r"^(Part\s+(I{1,4}|[0-9]+)|PART\s+\w+)[:\s]?", re.IGNORECASE)
|
|
20
|
+
CHAPTER_PATTERN = re.compile(r"^(Chapter\s+\d+|CHAPTER\s+\d+|#{1,2}\s+Chapter)", re.IGNORECASE)
|
|
21
|
+
INTRO_PATTERN = re.compile(r"^(Introduction|Foreword|Preface|Prologue|How to Use)", re.IGNORECASE)
|
|
22
|
+
OUTRO_PATTERN = re.compile(r"^(Conclusion|Epilogue|Afterword|About the Author|Acknowledgments)", re.IGNORECASE)
|
|
23
|
+
HEADING_PATTERN = re.compile(r"^#{1,3}\s+(.+)")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def extract_toc(filepath: str) -> dict:
|
|
27
|
+
path = Path(filepath)
|
|
28
|
+
if not path.exists():
|
|
29
|
+
print(f"Error: File not found: {filepath}", file=sys.stderr)
|
|
30
|
+
sys.exit(1)
|
|
31
|
+
|
|
32
|
+
lines = path.read_text(encoding="utf-8").splitlines()
|
|
33
|
+
|
|
34
|
+
toc = {
|
|
35
|
+
"front_matter": [],
|
|
36
|
+
"parts": [],
|
|
37
|
+
"back_matter": [],
|
|
38
|
+
"ungrouped_chapters": [],
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
current_part = None
|
|
42
|
+
chapter_num = 0
|
|
43
|
+
|
|
44
|
+
for line_num, line in enumerate(lines, 1):
|
|
45
|
+
stripped = line.strip()
|
|
46
|
+
if not stripped:
|
|
47
|
+
continue
|
|
48
|
+
|
|
49
|
+
if INTRO_PATTERN.match(stripped):
|
|
50
|
+
toc["front_matter"].append({
|
|
51
|
+
"title": stripped.lstrip("#").strip(),
|
|
52
|
+
"line": line_num,
|
|
53
|
+
"type": "front",
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
elif OUTRO_PATTERN.match(stripped):
|
|
57
|
+
toc["back_matter"].append({
|
|
58
|
+
"title": stripped.lstrip("#").strip(),
|
|
59
|
+
"line": line_num,
|
|
60
|
+
"type": "back",
|
|
61
|
+
})
|
|
62
|
+
|
|
63
|
+
elif PART_PATTERN.match(stripped):
|
|
64
|
+
current_part = {
|
|
65
|
+
"title": stripped.lstrip("#").strip(),
|
|
66
|
+
"line": line_num,
|
|
67
|
+
"chapters": [],
|
|
68
|
+
}
|
|
69
|
+
toc["parts"].append(current_part)
|
|
70
|
+
|
|
71
|
+
elif CHAPTER_PATTERN.match(stripped) or (HEADING_PATTERN.match(stripped) and stripped.startswith("#")):
|
|
72
|
+
chapter_num += 1
|
|
73
|
+
title = stripped.lstrip("#").strip()
|
|
74
|
+
chapter = {
|
|
75
|
+
"number": chapter_num,
|
|
76
|
+
"title": title,
|
|
77
|
+
"line": line_num,
|
|
78
|
+
}
|
|
79
|
+
if current_part:
|
|
80
|
+
current_part["chapters"].append(chapter)
|
|
81
|
+
else:
|
|
82
|
+
toc["ungrouped_chapters"].append(chapter)
|
|
83
|
+
|
|
84
|
+
return toc
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def print_toc(toc: dict):
|
|
88
|
+
print(f"\n{'═' * 60}")
|
|
89
|
+
print(f" TABLE OF CONTENTS")
|
|
90
|
+
print(f"{'═' * 60}\n")
|
|
91
|
+
|
|
92
|
+
if toc["front_matter"]:
|
|
93
|
+
for item in toc["front_matter"]:
|
|
94
|
+
print(f" {item['title']}")
|
|
95
|
+
print()
|
|
96
|
+
|
|
97
|
+
if toc["parts"]:
|
|
98
|
+
for part in toc["parts"]:
|
|
99
|
+
print(f" {part['title'].upper()}")
|
|
100
|
+
for ch in part["chapters"]:
|
|
101
|
+
print(f" Chapter {ch['number']}: {ch['title']}")
|
|
102
|
+
print()
|
|
103
|
+
elif toc["ungrouped_chapters"]:
|
|
104
|
+
for ch in toc["ungrouped_chapters"]:
|
|
105
|
+
print(f" Chapter {ch['number']}: {ch['title']}")
|
|
106
|
+
print()
|
|
107
|
+
|
|
108
|
+
if toc["back_matter"]:
|
|
109
|
+
for item in toc["back_matter"]:
|
|
110
|
+
print(f" {item['title']}")
|
|
111
|
+
print()
|
|
112
|
+
|
|
113
|
+
total = sum(len(p["chapters"]) for p in toc["parts"]) + len(toc["ungrouped_chapters"])
|
|
114
|
+
print(f" Total chapters: {total}")
|
|
115
|
+
print(f"{'═' * 60}\n")
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def print_markdown_toc(toc: dict):
|
|
119
|
+
print("## Table of Contents\n")
|
|
120
|
+
for item in toc["front_matter"]:
|
|
121
|
+
print(f"- {item['title']}")
|
|
122
|
+
if toc["parts"]:
|
|
123
|
+
for part in toc["parts"]:
|
|
124
|
+
print(f"\n### {part['title']}")
|
|
125
|
+
for ch in part["chapters"]:
|
|
126
|
+
print(f"- Chapter {ch['number']}: {ch['title']}")
|
|
127
|
+
else:
|
|
128
|
+
for ch in toc["ungrouped_chapters"]:
|
|
129
|
+
print(f"- Chapter {ch['number']}: {ch['title']}")
|
|
130
|
+
if toc["back_matter"]:
|
|
131
|
+
print()
|
|
132
|
+
for item in toc["back_matter"]:
|
|
133
|
+
print(f"- {item['title']}")
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
if __name__ == "__main__":
|
|
137
|
+
if len(sys.argv) < 2:
|
|
138
|
+
print("Usage: python toc_extract.py <file> [--json|--markdown]")
|
|
139
|
+
sys.exit(1)
|
|
140
|
+
|
|
141
|
+
filepath = sys.argv[1]
|
|
142
|
+
mode = sys.argv[2] if len(sys.argv) > 2 else "--report"
|
|
143
|
+
|
|
144
|
+
toc = extract_toc(filepath)
|
|
145
|
+
|
|
146
|
+
if mode == "--json":
|
|
147
|
+
print(json.dumps(toc, indent=2))
|
|
148
|
+
elif mode == "--markdown":
|
|
149
|
+
print_markdown_toc(toc)
|
|
150
|
+
else:
|
|
151
|
+
print_toc(toc)
|