regen.mde 0.2.2 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/LICENSE +16 -16
  2. package/README.md +409 -295
  3. package/bin/build-corpus-editor.js +83 -81
  4. package/bin/build-corpus.js +41 -41
  5. package/bin/postinstall.js +259 -187
  6. package/bin/regen-mdeditor-install.js +27 -27
  7. package/bin/regen-mdeditor-uninstall.js +19 -19
  8. package/bin/validate-katex.js +93 -93
  9. package/desktop/BuildCorpusEditor/BuildCorpusBridge.cs +493 -270
  10. package/desktop/BuildCorpusEditor/BuildCorpusEditor.csproj +22 -22
  11. package/desktop/BuildCorpusEditor/EditorForm.cs +853 -540
  12. package/desktop/BuildCorpusEditor/Program.cs +85 -81
  13. package/desktop/BuildCorpusEditor/app.manifest +16 -16
  14. package/dist/release/regen-mde-0.8.0-win-x64.zip +0 -0
  15. package/dist/windows-editor/BuildCorpusEditor.dll +0 -0
  16. package/dist/windows-editor/BuildCorpusEditor.exe +0 -0
  17. package/dist/windows-editor/BuildCorpusEditor.pdb +0 -0
  18. package/dist/windows-editor/BuildCorpusEditor.runtimeconfig.json +1 -1
  19. package/dist/windows-editor/wwwroot/assets/index-C_VxJk4k.js +375 -0
  20. package/dist/windows-editor/wwwroot/assets/index-Wt9zSjIw.css +1 -0
  21. package/dist/windows-editor/wwwroot/index.html +22 -22
  22. package/editor-web/index.html +21 -21
  23. package/editor-web/src/main.jsx +1044 -399
  24. package/editor-web/src/styles.css +846 -602
  25. package/editor-web/vite.config.js +13 -13
  26. package/examples/build-corpus.config.example.json +21 -21
  27. package/installer/install-regen-mde.ps1 +214 -175
  28. package/installer/regen-mde.nsi +81 -81
  29. package/package.json +10 -6
  30. package/pyproject.toml +4 -3
  31. package/requirements.txt +5 -4
  32. package/scripts/build-windows-editor.ps1 +47 -47
  33. package/scripts/package-windows-editor.ps1 +90 -90
  34. package/scripts/release-dual.mjs +105 -0
  35. package/scripts/run-corpus.ps1 +28 -28
  36. package/scripts/run-editor-implementation-plane.ps1 +226 -203
  37. package/scripts/run-required-tests.ps1 +98 -98
  38. package/scripts/run-smoke.ps1 +28 -28
  39. package/src/build_corpus/__init__.py +1 -1
  40. package/src/build_corpus/docx_exporter.py +1055 -798
  41. package/src/build_corpus/equations.py +1345 -0
  42. package/src/build_corpus/exporter.py +1488 -1195
  43. package/src/build_corpus/frontmatter.py +302 -0
  44. package/src/build_corpus/ppt_exporter.py +543 -532
  45. package/src/build_corpus/templates/__init__.py +1 -1
  46. package/src/build_corpus/validate_assets.py +46 -46
  47. package/tools/audit_corpus.py +203 -203
  48. package/tools/collect_microsoft_word_templates.py +228 -228
  49. package/tools/collect_online_docx_corpus.py +272 -272
  50. package/tools/collect_online_pptx_corpus.py +252 -252
  51. package/tools/compare_pptx_inputs_outputs.py +87 -87
  52. package/tools/roundtrip_docx_corpus.py +171 -171
  53. package/dist/release/regen.mde-0.2.2-win-x64-setup.exe +0 -0
  54. package/dist/release/regen.mde-0.2.2-win-x64.zip +0 -0
  55. package/dist/windows-editor/wwwroot/assets/index-DjJ6xmhy.js +0 -326
  56. package/dist/windows-editor/wwwroot/assets/index-_dwMNNsm.css +0 -1
@@ -0,0 +1,302 @@
1
+ """MDK YAML frontmatter — emit on forward (docx/pptx -> md), strip on reverse
2
+ (md -> docx) and persist verbatim into the Office package's ``docProps/custom.xml``.
3
+
4
+ The design contract (see ``HANDOFF-build-corpus-mdk-frontmatter``):
5
+
6
+ * Every generated ``.md`` carries an inline MDK frontmatter block by default, so
7
+ ``@regen/codeflow`` (MDK) can compile it with zero changes on the MDK side.
8
+ * The frontmatter is **stripped** on the ``md -> docx`` reverse so the visible
9
+ Word body never contains YAML.
10
+ * The stripped YAML is stored **verbatim** in one custom document property,
11
+ ``mdk_frontmatter`` (``vt:lpwstr``), inside ``docProps/custom.xml``. That is the
12
+ durable home that travels inside the Word/PowerPoint file, so the round-trip is
13
+ lossless: forward re-reads it verbatim if present, else generates defaults.
14
+
15
+ No external YAML dependency is used; the frontmatter we emit is a small, fixed
16
+ shape, so we build/parse it with plain string handling and keep the body verbatim.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import datetime as _dt
22
+ import hashlib
23
+ from pathlib import Path
24
+ from xml.sax.saxutils import escape as _xml_escape
25
+ from zipfile import ZIP_DEFLATED, ZipFile
26
+
27
+ FRONTMATTER_PROPERTY_NAME = "mdk_frontmatter"
28
+ CUSTOM_XML_PART = "docProps/custom.xml"
29
+ CONTENT_TYPES_PART = "[Content_Types].xml"
30
+ ROOT_RELS_PART = "_rels/.rels"
31
+
32
+ CUSTOM_PROPS_NS = "http://schemas.openxmlformats.org/officeDocument/2006/custom-properties"
33
+ VT_NS = "http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes"
34
+ CUSTOM_PROPS_CONTENT_TYPE = (
35
+ "application/vnd.openxmlformats-officedocument.custom-properties+xml"
36
+ )
37
+ CUSTOM_PROPS_REL_TYPE = (
38
+ "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties"
39
+ )
40
+ CUSTOM_PROPS_FMTID = "{D5CDD505-2E9C-101B-9397-08002B2CF9AE}"
41
+
42
+ # An evidence-handle HTML comment immediately follows the frontmatter so MDK can
43
+ # anchor the converted Markdown to the original binary it was derived from.
44
+ _EVIDENCE_OPEN = "<!-- mdk:evidence-handle"
45
+ _EVIDENCE_CLOSE = "-->"
46
+
47
+
48
+ # ---------------------------------------------------------------------------
49
+ # Emit (forward: docx/pptx -> md)
50
+ # ---------------------------------------------------------------------------
51
+
52
+ def _yaml_block_default(source_path: Path) -> str:
53
+ """Render the default MDK frontmatter YAML block for a converted document."""
54
+ try:
55
+ mtime = _dt.date.fromtimestamp(source_path.stat().st_mtime).isoformat()
56
+ created = f'"{mtime}"'
57
+ except OSError:
58
+ created = "null"
59
+ return (
60
+ "---\n"
61
+ 'mdk_schema_version: "1.0"\n'
62
+ "doc_type: corpus_note\n"
63
+ "system: other\n"
64
+ "status: active\n"
65
+ "owner: human\n"
66
+ "source_of_truth: false\n"
67
+ f"created: {created}\n"
68
+ "last_reviewed: null\n"
69
+ "supersedes: []\n"
70
+ "depends_on: []\n"
71
+ "tags: []\n"
72
+ "---\n"
73
+ )
74
+
75
+
76
+ def _evidence_handle(source_path: Path) -> str:
77
+ """Render the ``mdk:evidence-handle`` comment for the original binary."""
78
+ ext = source_path.suffix.lstrip(".") or "binary"
79
+ try:
80
+ digest = hashlib.sha256(source_path.read_bytes()).hexdigest()
81
+ except OSError:
82
+ digest = ""
83
+ abs_path = str(source_path.resolve())
84
+ return (
85
+ f"{_EVIDENCE_OPEN}\n"
86
+ "kind: local\n"
87
+ f"source_path: {abs_path}\n"
88
+ f"hash: {digest}\n"
89
+ f"description: Original {ext} the corpus Markdown was converted from "
90
+ f"(build-corpus).\n"
91
+ f"{_EVIDENCE_CLOSE}\n"
92
+ )
93
+
94
+
95
+ def build_frontmatter(source_path: Path, prior_frontmatter: str | None = None) -> str:
96
+ """Return the YAML frontmatter block (``---`` … ``---\\n``) for a document.
97
+
98
+ If ``prior_frontmatter`` (recovered verbatim from a source ``custom.xml``)
99
+ is supplied, it is used as-is so round-trips stay lossless. Otherwise a
100
+ default block is generated from the source file's metadata.
101
+ """
102
+ if prior_frontmatter:
103
+ block = prior_frontmatter.strip("\n") + "\n"
104
+ if not block.startswith("---"):
105
+ block = "---\n" + block
106
+ if not block.rstrip("\n").endswith("---"):
107
+ block = block.rstrip("\n") + "\n---\n"
108
+ return block
109
+ return _yaml_block_default(source_path)
110
+
111
+
112
+ def add_mdk_frontmatter(
113
+ markdown: str,
114
+ source_path: Path,
115
+ prior_frontmatter: str | None = None,
116
+ ) -> str:
117
+ """Prepend MDK frontmatter (+ an evidence-handle when freshly generated).
118
+
119
+ When ``prior_frontmatter`` is supplied (a verbatim restore from a source
120
+ ``custom.xml``), only that block is emitted — no new evidence-handle is added,
121
+ because the verbatim block already carries whatever the source held.
122
+ """
123
+ block = build_frontmatter(source_path, prior_frontmatter)
124
+ if prior_frontmatter:
125
+ return f"{block}\n{markdown.lstrip(chr(10))}"
126
+ evidence = _evidence_handle(source_path)
127
+ return f"{block}{evidence}\n{markdown.lstrip(chr(10))}"
128
+
129
+
130
+ # ---------------------------------------------------------------------------
131
+ # Strip (reverse: md -> docx)
132
+ # ---------------------------------------------------------------------------
133
+
134
+ def strip_mdk_frontmatter(markdown: str) -> tuple[str | None, str]:
135
+ """Split a leading ``---`` … ``---`` YAML frontmatter block off the body.
136
+
137
+ Returns ``(frontmatter_block_or_None, body)``. The returned frontmatter
138
+ block, when present, includes its ``---`` fences and a trailing newline so it
139
+ can be stored verbatim. A following ``mdk:evidence-handle`` comment is also
140
+ stripped from the body (it is conversion provenance, not document content).
141
+ """
142
+ if not markdown.startswith("---"):
143
+ return None, markdown
144
+ lines = markdown.split("\n")
145
+ if not lines or lines[0].strip() != "---":
146
+ return None, markdown
147
+ close_idx = None
148
+ for idx in range(1, len(lines)):
149
+ if lines[idx].strip() == "---":
150
+ close_idx = idx
151
+ break
152
+ if close_idx is None:
153
+ return None, markdown
154
+ block = "\n".join(lines[: close_idx + 1]) + "\n"
155
+ rest_lines = lines[close_idx + 1 :]
156
+ body = "\n".join(rest_lines)
157
+ body = _strip_leading_evidence_handle(body)
158
+ return block, body.lstrip("\n")
159
+
160
+
161
+ def _strip_leading_evidence_handle(body: str) -> str:
162
+ """Remove a leading ``mdk:evidence-handle`` comment block from the body."""
163
+ stripped = body.lstrip("\n")
164
+ if not stripped.startswith(_EVIDENCE_OPEN):
165
+ return body
166
+ end = stripped.find(_EVIDENCE_CLOSE)
167
+ if end == -1:
168
+ return body
169
+ return stripped[end + len(_EVIDENCE_CLOSE) :]
170
+
171
+
172
+ # ---------------------------------------------------------------------------
173
+ # Source custom.xml read (forward)
174
+ # ---------------------------------------------------------------------------
175
+
176
+ def read_frontmatter_from_zip(zf: ZipFile) -> str | None:
177
+ """Return the verbatim ``mdk_frontmatter`` property from an OPC package, if any."""
178
+ if CUSTOM_XML_PART not in zf.namelist():
179
+ return None
180
+ try:
181
+ raw = zf.read(CUSTOM_XML_PART).decode("utf-8")
182
+ except (KeyError, UnicodeDecodeError):
183
+ return None
184
+ return _extract_property_value(raw, FRONTMATTER_PROPERTY_NAME)
185
+
186
+
187
+ def _extract_property_value(custom_xml: str, name: str) -> str | None:
188
+ """Pull a custom property's ``<vt:lpwstr>`` text out of ``custom.xml``."""
189
+ from xml.etree import ElementTree as ET
190
+
191
+ try:
192
+ root = ET.fromstring(custom_xml)
193
+ except ET.ParseError:
194
+ return None
195
+ for prop in root:
196
+ if prop.attrib.get("name") != name:
197
+ continue
198
+ for child in prop:
199
+ if child.text is not None:
200
+ return _xml_unescape(child.text)
201
+ return ""
202
+ return None
203
+
204
+
205
+ def _xml_unescape(value: str) -> str:
206
+ return (
207
+ value.replace("&lt;", "<")
208
+ .replace("&gt;", ">")
209
+ .replace("&quot;", '"')
210
+ .replace("&apos;", "'")
211
+ .replace("&amp;", "&")
212
+ )
213
+
214
+
215
+ # ---------------------------------------------------------------------------
216
+ # Inject custom.xml (reverse) — write frontmatter into the saved Office package
217
+ # ---------------------------------------------------------------------------
218
+
219
+ def _build_custom_xml(frontmatter: str) -> str:
220
+ body = _xml_escape(frontmatter)
221
+ return (
222
+ '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n'
223
+ f'<Properties xmlns="{CUSTOM_PROPS_NS}" xmlns:vt="{VT_NS}">'
224
+ f'<property fmtid="{CUSTOM_PROPS_FMTID}" pid="2" '
225
+ f'name="{FRONTMATTER_PROPERTY_NAME}">'
226
+ f"<vt:lpwstr>{body}</vt:lpwstr>"
227
+ "</property></Properties>"
228
+ )
229
+
230
+
231
+ def _add_content_type_override(content_types_xml: str) -> str:
232
+ override = (
233
+ f'<Override PartName="/{CUSTOM_XML_PART}" '
234
+ f'ContentType="{CUSTOM_PROPS_CONTENT_TYPE}"/>'
235
+ )
236
+ if f'PartName="/{CUSTOM_XML_PART}"' in content_types_xml:
237
+ return content_types_xml
238
+ return content_types_xml.replace("</Types>", f"{override}</Types>")
239
+
240
+
241
+ def _add_root_relationship(rels_xml: str) -> str:
242
+ if f'Target="{CUSTOM_XML_PART}"' in rels_xml or 'Id="rIdMDK"' in rels_xml:
243
+ return rels_xml
244
+ relationship = (
245
+ f'<Relationship Id="rIdMDK" Type="{CUSTOM_PROPS_REL_TYPE}" '
246
+ f'Target="{CUSTOM_XML_PART}"/>'
247
+ )
248
+ return rels_xml.replace("</Relationships>", f"{relationship}</Relationships>")
249
+
250
+
251
+ def inject_frontmatter_into_package(package_path: Path, frontmatter: str) -> None:
252
+ """Add/replace the ``mdk_frontmatter`` custom property in an OPC package.
253
+
254
+ Rewrites ``docProps/custom.xml`` with the verbatim frontmatter, registers the
255
+ Content-Type Override, and adds the ``_rels/.rels`` Relationship. Works for
256
+ any OPC package (``.docx``/``.pptx``/``.xlsx``) since they share OPC packaging.
257
+ """
258
+ package_path = Path(package_path)
259
+ custom_xml = _build_custom_xml(frontmatter)
260
+ import tempfile
261
+
262
+ with tempfile.TemporaryDirectory(prefix="build-corpus-fm-") as tmp:
263
+ patched = Path(tmp) / package_path.name
264
+ with ZipFile(package_path) as src, ZipFile(patched, "w", ZIP_DEFLATED) as dst:
265
+ names = set(src.namelist())
266
+ for name in src.namelist():
267
+ if name == CUSTOM_XML_PART:
268
+ continue # replaced below
269
+ data = src.read(name)
270
+ if name == CONTENT_TYPES_PART:
271
+ data = _add_content_type_override(data.decode("utf-8")).encode("utf-8")
272
+ elif name == ROOT_RELS_PART:
273
+ data = _add_root_relationship(data.decode("utf-8")).encode("utf-8")
274
+ dst.writestr(name, data)
275
+ dst.writestr(CUSTOM_XML_PART, custom_xml)
276
+ if CONTENT_TYPES_PART not in names:
277
+ dst.writestr(CONTENT_TYPES_PART, _default_content_types())
278
+ if ROOT_RELS_PART not in names:
279
+ dst.writestr(ROOT_RELS_PART, _default_root_rels())
280
+ import shutil
281
+
282
+ shutil.move(str(patched), str(package_path))
283
+
284
+
285
+ def _default_content_types() -> str:
286
+ return (
287
+ '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n'
288
+ '<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">'
289
+ f'<Override PartName="/{CUSTOM_XML_PART}" '
290
+ f'ContentType="{CUSTOM_PROPS_CONTENT_TYPE}"/>'
291
+ "</Types>"
292
+ )
293
+
294
+
295
+ def _default_root_rels() -> str:
296
+ return (
297
+ '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n'
298
+ '<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
299
+ f'<Relationship Id="rIdMDK" Type="{CUSTOM_PROPS_REL_TYPE}" '
300
+ f'Target="{CUSTOM_XML_PART}"/>'
301
+ "</Relationships>"
302
+ )