regen.mde 0.2.2 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +409 -295
- package/bin/build-corpus-editor.js +5 -3
- package/bin/postinstall.js +259 -187
- package/bin/regen-mdeditor-install.js +1 -1
- package/bin/regen-mdeditor-uninstall.js +1 -1
- package/desktop/BuildCorpusEditor/BuildCorpusBridge.cs +493 -270
- package/desktop/BuildCorpusEditor/EditorForm.cs +853 -540
- package/desktop/BuildCorpusEditor/Program.cs +85 -81
- package/dist/release/regen-mde-0.3.0-win-x64-setup.exe +0 -0
- package/dist/release/{regen.mde-0.2.2-win-x64.zip → regen-mde-0.3.0-win-x64.zip} +0 -0
- package/dist/release/regen-mde-0.7.0-win-x64-setup.exe +0 -0
- package/dist/release/regen-mde-0.7.0-win-x64.zip +0 -0
- package/dist/windows-editor/BuildCorpusEditor.dll +0 -0
- package/dist/windows-editor/BuildCorpusEditor.exe +0 -0
- package/dist/windows-editor/BuildCorpusEditor.pdb +0 -0
- package/dist/windows-editor/wwwroot/assets/index-C_VxJk4k.js +375 -0
- package/dist/windows-editor/wwwroot/assets/index-Wt9zSjIw.css +1 -0
- package/dist/windows-editor/wwwroot/index.html +3 -3
- package/editor-web/index.html +1 -1
- package/editor-web/src/main.jsx +1044 -399
- package/editor-web/src/styles.css +846 -602
- package/installer/install-regen-mde.ps1 +49 -10
- package/installer/regen-mde.nsi +16 -16
- package/package.json +90 -86
- package/pyproject.toml +35 -33
- package/requirements.txt +6 -4
- package/scripts/package-windows-editor.ps1 +8 -8
- package/scripts/release-dual.mjs +105 -0
- package/scripts/run-editor-implementation-plane.ps1 +29 -6
- package/src/build_corpus/docx_exporter.py +1055 -798
- package/src/build_corpus/equations.py +80 -0
- package/src/build_corpus/exporter.py +1488 -1195
- package/src/build_corpus/frontmatter.py +302 -0
- package/src/build_corpus/ppt_exporter.py +543 -532
- package/dist/release/regen.mde-0.2.2-win-x64-setup.exe +0 -0
- package/dist/windows-editor/wwwroot/assets/index-DjJ6xmhy.js +0 -326
- package/dist/windows-editor/wwwroot/assets/index-_dwMNNsm.css +0 -1
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
"""MDK YAML frontmatter — emit on forward (docx/pptx -> md), strip on reverse
|
|
2
|
+
(md -> docx) and persist verbatim into the Office package's ``docProps/custom.xml``.
|
|
3
|
+
|
|
4
|
+
The design contract (see ``HANDOFF-build-corpus-mdk-frontmatter``):
|
|
5
|
+
|
|
6
|
+
* Every generated ``.md`` carries an inline MDK frontmatter block by default, so
|
|
7
|
+
``@regen/codeflow`` (MDK) can compile it with zero changes on the MDK side.
|
|
8
|
+
* The frontmatter is **stripped** on the ``md -> docx`` reverse so the visible
|
|
9
|
+
Word body never contains YAML.
|
|
10
|
+
* The stripped YAML is stored **verbatim** in one custom document property,
|
|
11
|
+
``mdk_frontmatter`` (``vt:lpwstr``), inside ``docProps/custom.xml``. That is the
|
|
12
|
+
durable home that travels inside the Word/PowerPoint file, so the round-trip is
|
|
13
|
+
lossless: forward re-reads it verbatim if present, else generates defaults.
|
|
14
|
+
|
|
15
|
+
No external YAML dependency is used; the frontmatter we emit is a small, fixed
|
|
16
|
+
shape, so we build/parse it with plain string handling and keep the body verbatim.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import datetime as _dt
|
|
22
|
+
import hashlib
|
|
23
|
+
from pathlib import Path
|
|
24
|
+
from xml.sax.saxutils import escape as _xml_escape
|
|
25
|
+
from zipfile import ZIP_DEFLATED, ZipFile
|
|
26
|
+
|
|
27
|
+
FRONTMATTER_PROPERTY_NAME = "mdk_frontmatter"
|
|
28
|
+
CUSTOM_XML_PART = "docProps/custom.xml"
|
|
29
|
+
CONTENT_TYPES_PART = "[Content_Types].xml"
|
|
30
|
+
ROOT_RELS_PART = "_rels/.rels"
|
|
31
|
+
|
|
32
|
+
CUSTOM_PROPS_NS = "http://schemas.openxmlformats.org/officeDocument/2006/custom-properties"
|
|
33
|
+
VT_NS = "http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes"
|
|
34
|
+
CUSTOM_PROPS_CONTENT_TYPE = (
|
|
35
|
+
"application/vnd.openxmlformats-officedocument.custom-properties+xml"
|
|
36
|
+
)
|
|
37
|
+
CUSTOM_PROPS_REL_TYPE = (
|
|
38
|
+
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties"
|
|
39
|
+
)
|
|
40
|
+
CUSTOM_PROPS_FMTID = "{D5CDD505-2E9C-101B-9397-08002B2CF9AE}"
|
|
41
|
+
|
|
42
|
+
# An evidence-handle HTML comment immediately follows the frontmatter so MDK can
|
|
43
|
+
# anchor the converted Markdown to the original binary it was derived from.
|
|
44
|
+
_EVIDENCE_OPEN = "<!-- mdk:evidence-handle"
|
|
45
|
+
_EVIDENCE_CLOSE = "-->"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# ---------------------------------------------------------------------------
|
|
49
|
+
# Emit (forward: docx/pptx -> md)
|
|
50
|
+
# ---------------------------------------------------------------------------
|
|
51
|
+
|
|
52
|
+
def _yaml_block_default(source_path: Path) -> str:
|
|
53
|
+
"""Render the default MDK frontmatter YAML block for a converted document."""
|
|
54
|
+
try:
|
|
55
|
+
mtime = _dt.date.fromtimestamp(source_path.stat().st_mtime).isoformat()
|
|
56
|
+
created = f'"{mtime}"'
|
|
57
|
+
except OSError:
|
|
58
|
+
created = "null"
|
|
59
|
+
return (
|
|
60
|
+
"---\n"
|
|
61
|
+
'mdk_schema_version: "1.0"\n'
|
|
62
|
+
"doc_type: corpus_note\n"
|
|
63
|
+
"system: other\n"
|
|
64
|
+
"status: active\n"
|
|
65
|
+
"owner: human\n"
|
|
66
|
+
"source_of_truth: false\n"
|
|
67
|
+
f"created: {created}\n"
|
|
68
|
+
"last_reviewed: null\n"
|
|
69
|
+
"supersedes: []\n"
|
|
70
|
+
"depends_on: []\n"
|
|
71
|
+
"tags: []\n"
|
|
72
|
+
"---\n"
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _evidence_handle(source_path: Path) -> str:
|
|
77
|
+
"""Render the ``mdk:evidence-handle`` comment for the original binary."""
|
|
78
|
+
ext = source_path.suffix.lstrip(".") or "binary"
|
|
79
|
+
try:
|
|
80
|
+
digest = hashlib.sha256(source_path.read_bytes()).hexdigest()
|
|
81
|
+
except OSError:
|
|
82
|
+
digest = ""
|
|
83
|
+
abs_path = str(source_path.resolve())
|
|
84
|
+
return (
|
|
85
|
+
f"{_EVIDENCE_OPEN}\n"
|
|
86
|
+
"kind: local\n"
|
|
87
|
+
f"source_path: {abs_path}\n"
|
|
88
|
+
f"hash: {digest}\n"
|
|
89
|
+
f"description: Original {ext} the corpus Markdown was converted from "
|
|
90
|
+
f"(build-corpus).\n"
|
|
91
|
+
f"{_EVIDENCE_CLOSE}\n"
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def build_frontmatter(source_path: Path, prior_frontmatter: str | None = None) -> str:
|
|
96
|
+
"""Return the YAML frontmatter block (``---`` … ``---\\n``) for a document.
|
|
97
|
+
|
|
98
|
+
If ``prior_frontmatter`` (recovered verbatim from a source ``custom.xml``)
|
|
99
|
+
is supplied, it is used as-is so round-trips stay lossless. Otherwise a
|
|
100
|
+
default block is generated from the source file's metadata.
|
|
101
|
+
"""
|
|
102
|
+
if prior_frontmatter:
|
|
103
|
+
block = prior_frontmatter.strip("\n") + "\n"
|
|
104
|
+
if not block.startswith("---"):
|
|
105
|
+
block = "---\n" + block
|
|
106
|
+
if not block.rstrip("\n").endswith("---"):
|
|
107
|
+
block = block.rstrip("\n") + "\n---\n"
|
|
108
|
+
return block
|
|
109
|
+
return _yaml_block_default(source_path)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def add_mdk_frontmatter(
|
|
113
|
+
markdown: str,
|
|
114
|
+
source_path: Path,
|
|
115
|
+
prior_frontmatter: str | None = None,
|
|
116
|
+
) -> str:
|
|
117
|
+
"""Prepend MDK frontmatter (+ an evidence-handle when freshly generated).
|
|
118
|
+
|
|
119
|
+
When ``prior_frontmatter`` is supplied (a verbatim restore from a source
|
|
120
|
+
``custom.xml``), only that block is emitted — no new evidence-handle is added,
|
|
121
|
+
because the verbatim block already carries whatever the source held.
|
|
122
|
+
"""
|
|
123
|
+
block = build_frontmatter(source_path, prior_frontmatter)
|
|
124
|
+
if prior_frontmatter:
|
|
125
|
+
return f"{block}\n{markdown.lstrip(chr(10))}"
|
|
126
|
+
evidence = _evidence_handle(source_path)
|
|
127
|
+
return f"{block}{evidence}\n{markdown.lstrip(chr(10))}"
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
# ---------------------------------------------------------------------------
|
|
131
|
+
# Strip (reverse: md -> docx)
|
|
132
|
+
# ---------------------------------------------------------------------------
|
|
133
|
+
|
|
134
|
+
def strip_mdk_frontmatter(markdown: str) -> tuple[str | None, str]:
|
|
135
|
+
"""Split a leading ``---`` … ``---`` YAML frontmatter block off the body.
|
|
136
|
+
|
|
137
|
+
Returns ``(frontmatter_block_or_None, body)``. The returned frontmatter
|
|
138
|
+
block, when present, includes its ``---`` fences and a trailing newline so it
|
|
139
|
+
can be stored verbatim. A following ``mdk:evidence-handle`` comment is also
|
|
140
|
+
stripped from the body (it is conversion provenance, not document content).
|
|
141
|
+
"""
|
|
142
|
+
if not markdown.startswith("---"):
|
|
143
|
+
return None, markdown
|
|
144
|
+
lines = markdown.split("\n")
|
|
145
|
+
if not lines or lines[0].strip() != "---":
|
|
146
|
+
return None, markdown
|
|
147
|
+
close_idx = None
|
|
148
|
+
for idx in range(1, len(lines)):
|
|
149
|
+
if lines[idx].strip() == "---":
|
|
150
|
+
close_idx = idx
|
|
151
|
+
break
|
|
152
|
+
if close_idx is None:
|
|
153
|
+
return None, markdown
|
|
154
|
+
block = "\n".join(lines[: close_idx + 1]) + "\n"
|
|
155
|
+
rest_lines = lines[close_idx + 1 :]
|
|
156
|
+
body = "\n".join(rest_lines)
|
|
157
|
+
body = _strip_leading_evidence_handle(body)
|
|
158
|
+
return block, body.lstrip("\n")
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _strip_leading_evidence_handle(body: str) -> str:
|
|
162
|
+
"""Remove a leading ``mdk:evidence-handle`` comment block from the body."""
|
|
163
|
+
stripped = body.lstrip("\n")
|
|
164
|
+
if not stripped.startswith(_EVIDENCE_OPEN):
|
|
165
|
+
return body
|
|
166
|
+
end = stripped.find(_EVIDENCE_CLOSE)
|
|
167
|
+
if end == -1:
|
|
168
|
+
return body
|
|
169
|
+
return stripped[end + len(_EVIDENCE_CLOSE) :]
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
# ---------------------------------------------------------------------------
|
|
173
|
+
# Source custom.xml read (forward)
|
|
174
|
+
# ---------------------------------------------------------------------------
|
|
175
|
+
|
|
176
|
+
def read_frontmatter_from_zip(zf: ZipFile) -> str | None:
|
|
177
|
+
"""Return the verbatim ``mdk_frontmatter`` property from an OPC package, if any."""
|
|
178
|
+
if CUSTOM_XML_PART not in zf.namelist():
|
|
179
|
+
return None
|
|
180
|
+
try:
|
|
181
|
+
raw = zf.read(CUSTOM_XML_PART).decode("utf-8")
|
|
182
|
+
except (KeyError, UnicodeDecodeError):
|
|
183
|
+
return None
|
|
184
|
+
return _extract_property_value(raw, FRONTMATTER_PROPERTY_NAME)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _extract_property_value(custom_xml: str, name: str) -> str | None:
|
|
188
|
+
"""Pull a custom property's ``<vt:lpwstr>`` text out of ``custom.xml``."""
|
|
189
|
+
from xml.etree import ElementTree as ET
|
|
190
|
+
|
|
191
|
+
try:
|
|
192
|
+
root = ET.fromstring(custom_xml)
|
|
193
|
+
except ET.ParseError:
|
|
194
|
+
return None
|
|
195
|
+
for prop in root:
|
|
196
|
+
if prop.attrib.get("name") != name:
|
|
197
|
+
continue
|
|
198
|
+
for child in prop:
|
|
199
|
+
if child.text is not None:
|
|
200
|
+
return _xml_unescape(child.text)
|
|
201
|
+
return ""
|
|
202
|
+
return None
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def _xml_unescape(value: str) -> str:
|
|
206
|
+
return (
|
|
207
|
+
value.replace("<", "<")
|
|
208
|
+
.replace(">", ">")
|
|
209
|
+
.replace(""", '"')
|
|
210
|
+
.replace("'", "'")
|
|
211
|
+
.replace("&", "&")
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
# ---------------------------------------------------------------------------
|
|
216
|
+
# Inject custom.xml (reverse) — write frontmatter into the saved Office package
|
|
217
|
+
# ---------------------------------------------------------------------------
|
|
218
|
+
|
|
219
|
+
def _build_custom_xml(frontmatter: str) -> str:
|
|
220
|
+
body = _xml_escape(frontmatter)
|
|
221
|
+
return (
|
|
222
|
+
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n'
|
|
223
|
+
f'<Properties xmlns="{CUSTOM_PROPS_NS}" xmlns:vt="{VT_NS}">'
|
|
224
|
+
f'<property fmtid="{CUSTOM_PROPS_FMTID}" pid="2" '
|
|
225
|
+
f'name="{FRONTMATTER_PROPERTY_NAME}">'
|
|
226
|
+
f"<vt:lpwstr>{body}</vt:lpwstr>"
|
|
227
|
+
"</property></Properties>"
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def _add_content_type_override(content_types_xml: str) -> str:
|
|
232
|
+
override = (
|
|
233
|
+
f'<Override PartName="/{CUSTOM_XML_PART}" '
|
|
234
|
+
f'ContentType="{CUSTOM_PROPS_CONTENT_TYPE}"/>'
|
|
235
|
+
)
|
|
236
|
+
if f'PartName="/{CUSTOM_XML_PART}"' in content_types_xml:
|
|
237
|
+
return content_types_xml
|
|
238
|
+
return content_types_xml.replace("</Types>", f"{override}</Types>")
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def _add_root_relationship(rels_xml: str) -> str:
|
|
242
|
+
if f'Target="{CUSTOM_XML_PART}"' in rels_xml or 'Id="rIdMDK"' in rels_xml:
|
|
243
|
+
return rels_xml
|
|
244
|
+
relationship = (
|
|
245
|
+
f'<Relationship Id="rIdMDK" Type="{CUSTOM_PROPS_REL_TYPE}" '
|
|
246
|
+
f'Target="{CUSTOM_XML_PART}"/>'
|
|
247
|
+
)
|
|
248
|
+
return rels_xml.replace("</Relationships>", f"{relationship}</Relationships>")
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def inject_frontmatter_into_package(package_path: Path, frontmatter: str) -> None:
|
|
252
|
+
"""Add/replace the ``mdk_frontmatter`` custom property in an OPC package.
|
|
253
|
+
|
|
254
|
+
Rewrites ``docProps/custom.xml`` with the verbatim frontmatter, registers the
|
|
255
|
+
Content-Type Override, and adds the ``_rels/.rels`` Relationship. Works for
|
|
256
|
+
any OPC package (``.docx``/``.pptx``/``.xlsx``) since they share OPC packaging.
|
|
257
|
+
"""
|
|
258
|
+
package_path = Path(package_path)
|
|
259
|
+
custom_xml = _build_custom_xml(frontmatter)
|
|
260
|
+
import tempfile
|
|
261
|
+
|
|
262
|
+
with tempfile.TemporaryDirectory(prefix="build-corpus-fm-") as tmp:
|
|
263
|
+
patched = Path(tmp) / package_path.name
|
|
264
|
+
with ZipFile(package_path) as src, ZipFile(patched, "w", ZIP_DEFLATED) as dst:
|
|
265
|
+
names = set(src.namelist())
|
|
266
|
+
for name in src.namelist():
|
|
267
|
+
if name == CUSTOM_XML_PART:
|
|
268
|
+
continue # replaced below
|
|
269
|
+
data = src.read(name)
|
|
270
|
+
if name == CONTENT_TYPES_PART:
|
|
271
|
+
data = _add_content_type_override(data.decode("utf-8")).encode("utf-8")
|
|
272
|
+
elif name == ROOT_RELS_PART:
|
|
273
|
+
data = _add_root_relationship(data.decode("utf-8")).encode("utf-8")
|
|
274
|
+
dst.writestr(name, data)
|
|
275
|
+
dst.writestr(CUSTOM_XML_PART, custom_xml)
|
|
276
|
+
if CONTENT_TYPES_PART not in names:
|
|
277
|
+
dst.writestr(CONTENT_TYPES_PART, _default_content_types())
|
|
278
|
+
if ROOT_RELS_PART not in names:
|
|
279
|
+
dst.writestr(ROOT_RELS_PART, _default_root_rels())
|
|
280
|
+
import shutil
|
|
281
|
+
|
|
282
|
+
shutil.move(str(patched), str(package_path))
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def _default_content_types() -> str:
|
|
286
|
+
return (
|
|
287
|
+
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n'
|
|
288
|
+
'<Types xmlns="http://schemas.openxmlformats.org/package/2006/content-types">'
|
|
289
|
+
f'<Override PartName="/{CUSTOM_XML_PART}" '
|
|
290
|
+
f'ContentType="{CUSTOM_PROPS_CONTENT_TYPE}"/>'
|
|
291
|
+
"</Types>"
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def _default_root_rels() -> str:
|
|
296
|
+
return (
|
|
297
|
+
'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n'
|
|
298
|
+
'<Relationships xmlns="http://schemas.openxmlformats.org/package/2006/relationships">'
|
|
299
|
+
f'<Relationship Id="rIdMDK" Type="{CUSTOM_PROPS_REL_TYPE}" '
|
|
300
|
+
f'Target="{CUSTOM_XML_PART}"/>'
|
|
301
|
+
"</Relationships>"
|
|
302
|
+
)
|