dimcode-darwin-x64 0.1.2-beta.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. package/bin/dimcode +0 -0
  2. package/package.json +1 -1
  3. package/bin/runtime/sandbox/dim-sandbox-runner +0 -0
  4. package/bin/runtime/sandbox/manifest.json +0 -15
  5. package/bin/skills-assets/deep-investigate/SKILL.md +0 -101
  6. package/bin/skills-assets/deep-investigate/references/prompts.md +0 -75
  7. package/bin/skills-assets/deep-investigate/references/templates.md +0 -73
  8. package/bin/skills-assets/deep-investigate/references/thinking-tools.md +0 -36
  9. package/bin/skills-assets/docs-sprint/SKILL.md +0 -73
  10. package/bin/skills-assets/docs-sprint/agents/openai.yaml +0 -4
  11. package/bin/skills-assets/docs-sprint/references/contract-discipline.md +0 -30
  12. package/bin/skills-assets/docs-sprint/references/delivery-plan.md +0 -162
  13. package/bin/skills-assets/docs-sprint/references/documentation-system.md +0 -109
  14. package/bin/skills-assets/docs-sprint/references/ui-layout.md +0 -73
  15. package/bin/skills-assets/docs-sprint/references/worktree-guide.md +0 -45
  16. package/bin/skills-assets/docx/SKILL.md +0 -273
  17. package/bin/skills-assets/docx/assets/styles/academic_styles.xml +0 -250
  18. package/bin/skills-assets/docx/assets/styles/corporate_styles.xml +0 -284
  19. package/bin/skills-assets/docx/assets/styles/default_styles.xml +0 -449
  20. package/bin/skills-assets/docx/assets/xsd/aesthetic-rules.xsd +0 -470
  21. package/bin/skills-assets/docx/assets/xsd/business-rules.xsd +0 -130
  22. package/bin/skills-assets/docx/assets/xsd/common-types.xsd +0 -159
  23. package/bin/skills-assets/docx/assets/xsd/wml-subset.xsd +0 -589
  24. package/bin/skills-assets/docx/references/cjk_typography.md +0 -357
  25. package/bin/skills-assets/docx/references/cjk_university_template_guide.md +0 -184
  26. package/bin/skills-assets/docx/references/comments_guide.md +0 -191
  27. package/bin/skills-assets/docx/references/design_good_bad_examples.md +0 -829
  28. package/bin/skills-assets/docx/references/design_principles.md +0 -819
  29. package/bin/skills-assets/docx/references/openxml_element_order.md +0 -308
  30. package/bin/skills-assets/docx/references/openxml_encyclopedia_part1.md +0 -4061
  31. package/bin/skills-assets/docx/references/openxml_encyclopedia_part2.md +0 -2820
  32. package/bin/skills-assets/docx/references/openxml_encyclopedia_part3.md +0 -3381
  33. package/bin/skills-assets/docx/references/openxml_namespaces.md +0 -82
  34. package/bin/skills-assets/docx/references/openxml_units.md +0 -72
  35. package/bin/skills-assets/docx/references/scenario_a_create.md +0 -284
  36. package/bin/skills-assets/docx/references/scenario_b_edit_content.md +0 -295
  37. package/bin/skills-assets/docx/references/scenario_c_apply_template.md +0 -456
  38. package/bin/skills-assets/docx/references/track_changes_guide.md +0 -200
  39. package/bin/skills-assets/docx/references/troubleshooting.md +0 -506
  40. package/bin/skills-assets/docx/references/typography_guide.md +0 -294
  41. package/bin/skills-assets/docx/references/xsd_validation_guide.md +0 -158
  42. package/bin/skills-assets/docx/scripts/doc_to_docx.sh +0 -40
  43. package/bin/skills-assets/docx/scripts/docx_preview.sh +0 -37
  44. package/bin/skills-assets/docx/scripts/dotnet/Docx.Cli/Docx.Cli.csproj +0 -19
  45. package/bin/skills-assets/docx/scripts/dotnet/Docx.Cli/Program.cs +0 -18
  46. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Commands/AnalyzeCommand.cs +0 -147
  47. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Commands/ApplyTemplateCommand.cs +0 -322
  48. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Commands/CreateCommand.cs +0 -324
  49. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Commands/DiffCommand.cs +0 -155
  50. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Commands/EditContentCommand.cs +0 -487
  51. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Commands/FixOrderCommand.cs +0 -108
  52. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Commands/MergeRunsCommand.cs +0 -122
  53. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Commands/ValidateCommand.cs +0 -107
  54. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Docx.Core.csproj +0 -15
  55. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/OpenXml/CommentSynchronizer.cs +0 -169
  56. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/OpenXml/ElementOrder.cs +0 -80
  57. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/OpenXml/NamespaceConstants.cs +0 -42
  58. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/OpenXml/RunMerger.cs +0 -81
  59. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/OpenXml/StyleAnalyzer.cs +0 -81
  60. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/OpenXml/TrackChangesHelper.cs +0 -99
  61. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/OpenXml/UnitConverter.cs +0 -23
  62. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/AestheticRecipeSamples.cs +0 -1832
  63. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/AestheticRecipeSamples_Batch1.cs +0 -910
  64. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/AestheticRecipeSamples_Batch2.cs +0 -999
  65. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/AestheticRecipeSamples_Batch3.cs +0 -1048
  66. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/AestheticRecipeSamples_Batch4.cs +0 -1038
  67. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/CharacterFormattingSamples.cs +0 -1020
  68. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/DocumentCreationSamples.cs +0 -1121
  69. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/FieldAndTocSamples.cs +0 -624
  70. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/FootnoteAndCommentSamples.cs +0 -675
  71. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/HeaderFooterSamples.cs +0 -838
  72. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/ImageSamples.cs +0 -917
  73. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/ListAndNumberingSamples.cs +0 -826
  74. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/ParagraphFormattingSamples.cs +0 -1199
  75. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/StyleSystemSamples.cs +0 -1487
  76. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/TableSamples.cs +0 -1163
  77. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Samples/TrackChangesSamples.cs +0 -595
  78. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Typography/CjkHelper.cs +0 -39
  79. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Typography/FontDefaults.cs +0 -24
  80. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Typography/PageSizes.cs +0 -20
  81. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Validation/BusinessRuleValidator.cs +0 -224
  82. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Validation/GateCheckValidator.cs +0 -148
  83. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Validation/ValidationResult.cs +0 -23
  84. package/bin/skills-assets/docx/scripts/dotnet/Docx.Core/Validation/XsdValidator.cs +0 -69
  85. package/bin/skills-assets/docx/scripts/dotnet/Docx.slnx +0 -4
  86. package/bin/skills-assets/docx/scripts/env_check.sh +0 -196
  87. package/bin/skills-assets/docx/scripts/setup.ps1 +0 -274
  88. package/bin/skills-assets/docx/scripts/setup.sh +0 -504
  89. package/bin/skills-assets/pdf/README.md +0 -222
  90. package/bin/skills-assets/pdf/SKILL.md +0 -191
  91. package/bin/skills-assets/pdf/design/design.md +0 -381
  92. package/bin/skills-assets/pdf/scripts/cover.py +0 -1579
  93. package/bin/skills-assets/pdf/scripts/fill_inspect.py +0 -200
  94. package/bin/skills-assets/pdf/scripts/fill_write.py +0 -242
  95. package/bin/skills-assets/pdf/scripts/make.sh +0 -491
  96. package/bin/skills-assets/pdf/scripts/merge.py +0 -112
  97. package/bin/skills-assets/pdf/scripts/palette.py +0 -521
  98. package/bin/skills-assets/pdf/scripts/reformat_parse.py +0 -374
  99. package/bin/skills-assets/pdf/scripts/render_body.py +0 -1052
  100. package/bin/skills-assets/pdf/scripts/render_cover.js +0 -111
  101. package/bin/skills-assets/pptx-generator/SKILL.md +0 -248
  102. package/bin/skills-assets/pptx-generator/references/design-system.md +0 -392
  103. package/bin/skills-assets/pptx-generator/references/editing.md +0 -162
  104. package/bin/skills-assets/pptx-generator/references/pitfalls.md +0 -112
  105. package/bin/skills-assets/pptx-generator/references/pptxgenjs.md +0 -420
  106. package/bin/skills-assets/pptx-generator/references/slide-types.md +0 -413
  107. package/bin/skills-assets/skill-creator/SKILL.md +0 -368
  108. package/bin/skills-assets/skill-creator/agents/openai.yaml +0 -5
  109. package/bin/skills-assets/skill-creator/assets/skill-creator-small.svg +0 -3
  110. package/bin/skills-assets/skill-creator/assets/skill-creator.png +0 -0
  111. package/bin/skills-assets/skill-creator/license.txt +0 -202
  112. package/bin/skills-assets/skill-creator/references/openai_yaml.md +0 -49
  113. package/bin/skills-assets/skill-creator/scripts/generate_openai_yaml.py +0 -226
  114. package/bin/skills-assets/skill-creator/scripts/init_skill.py +0 -397
  115. package/bin/skills-assets/skill-creator/scripts/quick_validate.py +0 -101
  116. package/bin/skills-assets/skill-installer/LICENSE.txt +0 -202
  117. package/bin/skills-assets/skill-installer/SKILL.md +0 -58
  118. package/bin/skills-assets/skill-installer/agents/openai.yaml +0 -5
  119. package/bin/skills-assets/skill-installer/assets/skill-installer-small.svg +0 -3
  120. package/bin/skills-assets/skill-installer/assets/skill-installer.png +0 -0
  121. package/bin/skills-assets/skill-installer/scripts/github_utils.py +0 -21
  122. package/bin/skills-assets/skill-installer/scripts/install-skill-from-github.py +0 -308
  123. package/bin/skills-assets/skill-installer/scripts/list-skills.py +0 -107
  124. package/bin/skills-assets/xlsx/SKILL.md +0 -137
  125. package/bin/skills-assets/xlsx/references/create.md +0 -691
  126. package/bin/skills-assets/xlsx/references/edit.md +0 -684
  127. package/bin/skills-assets/xlsx/references/fix.md +0 -37
  128. package/bin/skills-assets/xlsx/references/format.md +0 -768
  129. package/bin/skills-assets/xlsx/references/ooxml-cheatsheet.md +0 -231
  130. package/bin/skills-assets/xlsx/references/read-analyze.md +0 -97
  131. package/bin/skills-assets/xlsx/references/validate.md +0 -772
  132. package/bin/skills-assets/xlsx/scripts/formula_check.py +0 -422
  133. package/bin/skills-assets/xlsx/scripts/libreoffice_recalc.py +0 -248
  134. package/bin/skills-assets/xlsx/scripts/shared_strings_builder.py +0 -163
  135. package/bin/skills-assets/xlsx/scripts/style_audit.py +0 -575
  136. package/bin/skills-assets/xlsx/scripts/xlsx_add_column.py +0 -395
  137. package/bin/skills-assets/xlsx/scripts/xlsx_insert_row.py +0 -274
  138. package/bin/skills-assets/xlsx/scripts/xlsx_pack.py +0 -87
  139. package/bin/skills-assets/xlsx/scripts/xlsx_reader.py +0 -362
  140. package/bin/skills-assets/xlsx/scripts/xlsx_shift_rows.py +0 -396
  141. package/bin/skills-assets/xlsx/scripts/xlsx_unpack.py +0 -130
  142. package/bin/skills-assets/xlsx/templates/minimal_xlsx/[Content_Types].xml +0 -9
  143. package/bin/skills-assets/xlsx/templates/minimal_xlsx/_rels/.rels +0 -6
  144. package/bin/skills-assets/xlsx/templates/minimal_xlsx/xl/_rels/workbook.xml.rels +0 -19
  145. package/bin/skills-assets/xlsx/templates/minimal_xlsx/xl/sharedStrings.xml +0 -33
  146. package/bin/skills-assets/xlsx/templates/minimal_xlsx/xl/styles.xml +0 -160
  147. package/bin/skills-assets/xlsx/templates/minimal_xlsx/xl/workbook.xml +0 -30
  148. package/bin/skills-assets/xlsx/templates/minimal_xlsx/xl/worksheets/sheet1.xml +0 -70
@@ -1,374 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- reformat_parse.py — Convert an existing document into content.json,
4
- then hand off to the CREATE pipeline (render_body.py).
5
-
6
- Supported input formats:
7
- .md / .txt — Markdown / plain text
8
- .pdf — Extract text from existing PDF (layout preserved as best-effort)
9
- .json — Pass-through if already content.json format
10
-
11
- Usage:
12
- python3 reformat_parse.py --input doc.md --out content.json
13
- python3 reformat_parse.py --input old.pdf --out content.json
14
- python3 reformat_parse.py --input data.json --out content.json
15
-
16
- Then pipe into the CREATE pipeline:
17
- python3 render_body.py --tokens tokens.json --content content.json --out body.pdf
18
-
19
- Or use make.sh reformat which does both steps:
20
- bash make.sh reformat --input doc.md --type report --title "My Report" --out output.pdf
21
-
22
- Exit codes: 0 success, 1 bad args / unsupported format, 2 dep missing, 3 parse error
23
- """
24
-
25
- import argparse
26
- import json
27
- import os
28
- import re
29
- import sys
30
- import importlib.util
31
- from pathlib import Path
32
-
33
-
34
-
35
-
36
- def ensure_deps():
37
- missing = []
38
- if importlib.util.find_spec("pypdf") is None:
39
- missing.append("pypdf")
40
- if missing:
41
- import subprocess
42
- subprocess.check_call(
43
- [sys.executable, "-m", "pip", "install", "--break-system-packages", "-q"] + missing
44
- )
45
-
46
-
47
- ensure_deps()
48
-
49
-
50
- # ── Markdown / plain text parser ───────────────────────────────────────────────
51
- def parse_markdown(text: str) -> list:
52
- """
53
- Convert Markdown to content.json blocks.
54
- Supports: # headings, **bold**, bullet lists, > blockquotes (→ callout),
55
- | tables |, plain paragraphs.
56
- """
57
- blocks = []
58
- lines = text.splitlines()
59
- i = 0
60
-
61
- def flush_para(buf: list):
62
- t = " ".join(buf).strip()
63
- if t:
64
- blocks.append({"type": "body", "text": _md_inline(t)})
65
-
66
- para_buf = []
67
-
68
- while i < len(lines):
69
- line = lines[i]
70
- stripped = line.strip()
71
-
72
- # Blank line — flush paragraph buffer
73
- if not stripped:
74
- flush_para(para_buf)
75
- para_buf = []
76
- i += 1
77
- continue
78
-
79
- # ATX Headings: # ## ###
80
- m = re.match(r'^(#{1,3})\s+(.*)', stripped)
81
- if m:
82
- flush_para(para_buf)
83
- para_buf = []
84
- level = len(m.group(1))
85
- htype = {1: "h1", 2: "h2", 3: "h3"}.get(level, "h3")
86
- blocks.append({"type": htype, "text": _md_inline(m.group(2))})
87
- i += 1
88
- continue
89
-
90
- # Display math block: $$expr$$ on one line, or opening $$ ... closing $$
91
- if stripped.startswith("$$"):
92
- flush_para(para_buf)
93
- para_buf = []
94
- inline_expr = stripped[2:].rstrip("$").strip()
95
- if inline_expr:
96
- # Single-line: $$E = mc^2$$
97
- blocks.append({"type": "math", "text": inline_expr})
98
- i += 1
99
- else:
100
- # Multi-line: opening $$ alone, then expression lines, then closing $$
101
- math_lines = []
102
- i += 1
103
- while i < len(lines) and lines[i].strip() != "$$":
104
- math_lines.append(lines[i])
105
- i += 1
106
- if i < len(lines):
107
- i += 1 # skip closing $$
108
- blocks.append({"type": "math", "text": "\n".join(math_lines).strip()})
109
- continue
110
-
111
- # Fenced code block: ``` or ~~~
112
- if stripped.startswith("```") or stripped.startswith("~~~"):
113
- flush_para(para_buf)
114
- para_buf = []
115
- fence = stripped[:3]
116
- code_lines = []
117
- i += 1
118
- while i < len(lines) and not lines[i].strip().startswith(fence):
119
- code_lines.append(lines[i])
120
- i += 1
121
- if i < len(lines):
122
- i += 1 # skip closing fence
123
- blocks.append({"type": "code", "text": "\n".join(code_lines)})
124
- continue
125
-
126
- # Blockquote → callout
127
- if stripped.startswith(">"):
128
- flush_para(para_buf)
129
- para_buf = []
130
- qt = re.sub(r'^>\s*', '', stripped)
131
- blocks.append({"type": "callout", "text": _md_inline(qt)})
132
- i += 1
133
- continue
134
-
135
- # Unordered bullet: -, *, +
136
- if re.match(r'^[-*+]\s+', stripped):
137
- flush_para(para_buf)
138
- para_buf = []
139
- text_part = re.sub(r'^[-*+]\s+', '', stripped)
140
- blocks.append({"type": "bullet", "text": _md_inline(text_part)})
141
- i += 1
142
- continue
143
-
144
- # Ordered list: 1. 2. etc. → numbered (preserves counter in render_body)
145
- if re.match(r'^\d+\.\s+', stripped):
146
- flush_para(para_buf)
147
- para_buf = []
148
- text_part = re.sub(r'^\d+\.\s+', '', stripped)
149
- blocks.append({"type": "numbered", "text": _md_inline(text_part)})
150
- i += 1
151
- continue
152
-
153
- # Table: | col | col |
154
- if stripped.startswith("|"):
155
- flush_para(para_buf)
156
- para_buf = []
157
- table_lines = []
158
- while i < len(lines) and lines[i].strip().startswith("|"):
159
- table_lines.append(lines[i].strip())
160
- i += 1
161
- # Remove separator rows (|---|---|)
162
- data_rows = [r for r in table_lines if not re.match(r'^\|[-:| ]+\|$', r)]
163
- parsed = []
164
- for row in data_rows:
165
- cells = [c.strip() for c in row.strip("|").split("|")]
166
- parsed.append(cells)
167
- if len(parsed) >= 2:
168
- blocks.append({
169
- "type": "table",
170
- "headers": parsed[0],
171
- "rows": parsed[1:],
172
- })
173
- elif len(parsed) == 1:
174
- # Single row — treat as paragraph
175
- blocks.append({"type": "body", "text": " | ".join(parsed[0])})
176
- continue
177
-
178
- # Horizontal rule → spacer
179
- if re.match(r'^[-*_]{3,}$', stripped):
180
- flush_para(para_buf)
181
- para_buf = []
182
- blocks.append({"type": "spacer", "pt": 16})
183
- i += 1
184
- continue
185
-
186
- # Plain text → accumulate into paragraph
187
- para_buf.append(stripped)
188
- i += 1
189
-
190
- flush_para(para_buf)
191
- return blocks
192
-
193
-
194
- def _md_inline(text: str) -> str:
195
- """Convert inline Markdown to ReportLab XML markup."""
196
- # Bold: **text** or __text__
197
- text = re.sub(r'\*\*(.+?)\*\*', r'<b>\1</b>', text)
198
- text = re.sub(r'__(.+?)__', r'<b>\1</b>', text)
199
- # Italic: *text* or _text_
200
- text = re.sub(r'\*(.+?)\*', r'<i>\1</i>', text)
201
- text = re.sub(r'_(.+?)_', r'<i>\1</i>', text)
202
- # Inline code: `code`
203
- text = re.sub(r'`(.+?)`', r'<font name="Courier">\1</font>', text)
204
- # Strip markdown links, keep text
205
- text = re.sub(r'\[(.+?)\]\(.+?\)', r'\1', text)
206
- return text
207
-
208
-
209
- # ── PDF text extractor ─────────────────────────────────────────────────────────
210
- def parse_pdf(pdf_path: str) -> list:
211
- """
212
- Extract text from an existing PDF and convert to content.json blocks.
213
- Best-effort: detects headings by font size heuristics if available,
214
- otherwise falls back to paragraph splitting.
215
- """
216
- from pypdf import PdfReader
217
-
218
- reader = PdfReader(pdf_path)
219
- all_text = []
220
-
221
- for page in reader.pages:
222
- text = page.extract_text()
223
- if text:
224
- all_text.append(text.strip())
225
-
226
- full_text = "\n\n".join(all_text)
227
-
228
- # Treat extracted PDF text as plain text / light markdown
229
- # (most PDFs lose formatting — we do our best)
230
- return parse_plain(full_text)
231
-
232
-
233
- def parse_plain(text: str) -> list:
234
- """
235
- Heuristic plain-text parser.
236
- Short ALL-CAPS or title-case lines → headings.
237
- Everything else → paragraphs.
238
- """
239
- blocks = []
240
- paragraphs = re.split(r'\n{2,}', text.strip())
241
-
242
- for para in paragraphs:
243
- para = para.strip()
244
- if not para:
245
- continue
246
-
247
- lines = para.splitlines()
248
-
249
- # Single short line that looks like a heading
250
- if len(lines) == 1 and len(para) < 80:
251
- if para.isupper() or re.match(r'^[A-Z][^.!?]*$', para):
252
- blocks.append({"type": "h1", "text": para.title()})
253
- continue
254
-
255
- # Bullet lists
256
- if lines[0].startswith(("- ", "• ", "* ")):
257
- for line in lines:
258
- text_part = re.sub(r'^[-•*]\s+', '', line.strip())
259
- if text_part:
260
- blocks.append({"type": "bullet", "text": text_part})
261
- continue
262
-
263
- # Regular paragraph
264
- blocks.append({"type": "body", "text": " ".join(lines)})
265
-
266
- return blocks
267
-
268
-
269
- # ── Pass-through validator ─────────────────────────────────────────────────────
270
- VALID_TYPES = {"h1","h2","h3","body","bullet","numbered","callout","table",
271
- "image","code","math","divider","caption","pagebreak","spacer"}
272
-
273
- def validate_content_json(data: list) -> tuple[list, list]:
274
- """Return (valid_blocks, warnings)."""
275
- valid, warnings = [], []
276
- for i, block in enumerate(data):
277
- if not isinstance(block, dict):
278
- warnings.append(f"Block {i}: not a dict, skipped")
279
- continue
280
- btype = block.get("type")
281
- if btype not in VALID_TYPES:
282
- warnings.append(f"Block {i}: unknown type '{btype}', kept as-is")
283
- valid.append(block)
284
- return valid, warnings
285
-
286
-
287
- # ── Dispatcher ─────────────────────────────────────────────────────────────────
288
- def parse_file(input_path: str) -> tuple[list, list]:
289
- """Return (blocks, warnings)."""
290
- ext = Path(input_path).suffix.lower()
291
-
292
- if ext in (".md", ".txt", ".markdown"):
293
- with open(input_path, encoding="utf-8", errors="replace") as f:
294
- text = f.read()
295
- blocks = parse_markdown(text)
296
- return blocks, []
297
-
298
- if ext == ".pdf":
299
- blocks = parse_pdf(input_path)
300
- return blocks, ["PDF text extraction is best-effort — review content.json before rendering"]
301
-
302
- if ext == ".json":
303
- with open(input_path) as f:
304
- data = json.load(f)
305
- if isinstance(data, list):
306
- return validate_content_json(data)
307
- # Maybe it's a meta-wrapper {"content": [...]}
308
- if isinstance(data, dict) and "content" in data:
309
- return validate_content_json(data["content"])
310
- return [], [f"JSON file does not contain a list of content blocks"]
311
-
312
- return [], [f"Unsupported file type: {ext}. Supported: .md .txt .pdf .json"]
313
-
314
-
315
- # ── CLI ────────────────────────────────────────────────────────────────────────
316
- def main():
317
- parser = argparse.ArgumentParser(description="Parse a document into content.json")
318
- parser.add_argument("--input", required=True, help="Input file (.md, .txt, .pdf, .json)")
319
- parser.add_argument("--out", default="content.json", help="Output content.json path")
320
- args = parser.parse_args()
321
-
322
- if not os.path.exists(args.input):
323
- print(json.dumps({"status": "error", "error": f"File not found: {args.input}"}),
324
- file=sys.stderr)
325
- sys.exit(1)
326
-
327
- try:
328
- blocks, warnings = parse_file(args.input)
329
- except Exception as e:
330
- import traceback
331
- print(json.dumps({"status": "error", "error": str(e),
332
- "trace": traceback.format_exc()}), file=sys.stderr)
333
- sys.exit(3)
334
-
335
- if not blocks:
336
- print(json.dumps({
337
- "status": "error",
338
- "error": "No content blocks extracted",
339
- "warnings": warnings,
340
- }), file=sys.stderr)
341
- sys.exit(3)
342
-
343
- with open(args.out, "w", encoding="utf-8") as f:
344
- json.dump(blocks, f, indent=2, ensure_ascii=False)
345
-
346
- result = {
347
- "status": "ok",
348
- "out": args.out,
349
- "block_count": len(blocks),
350
- "warnings": warnings,
351
- }
352
- print(json.dumps(result, indent=2))
353
-
354
- print(f"\n── Parsed {args.input} ─────────────────────────────────────",
355
- file=sys.stderr)
356
- print(f" Blocks : {len(blocks)}", file=sys.stderr)
357
-
358
- type_counts: dict = {}
359
- for b in blocks:
360
- type_counts[b.get("type","?")] = type_counts.get(b.get("type","?"), 0) + 1
361
- for t, n in sorted(type_counts.items()):
362
- print(f" {t:12} × {n}", file=sys.stderr)
363
-
364
- if warnings:
365
- print(f" Warnings:", file=sys.stderr)
366
- for w in warnings:
367
- print(f" ⚠ {w}", file=sys.stderr)
368
- print(f"\n Next: bash make.sh run --content {args.out} --title '...' --type ...",
369
- file=sys.stderr)
370
- print("", file=sys.stderr)
371
-
372
-
373
- if __name__ == "__main__":
374
- main()