regen.mde 0.8.0 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +16 -16
- package/README.md +2 -1
- package/bin/build-corpus-editor.js +83 -83
- package/bin/build-corpus.js +41 -41
- package/bin/regen-mdeditor-install.js +27 -27
- package/bin/regen-mdeditor-uninstall.js +19 -19
- package/bin/validate-katex.js +93 -93
- package/desktop/BuildCorpusEditor/BuildCorpusEditor.csproj +22 -22
- package/desktop/BuildCorpusEditor/EditorForm.cs +48 -0
- package/desktop/BuildCorpusEditor/app.manifest +16 -16
- package/dist/release/{regen-mde-0.8.0-win-x64.zip → regen-mde-0.6.1-win-x64.zip} +0 -0
- package/dist/release/regen-mde-0.8.2-win-x64.zip +0 -0
- package/dist/windows-editor/BuildCorpusEditor.dll +0 -0
- package/dist/windows-editor/BuildCorpusEditor.exe +0 -0
- package/dist/windows-editor/BuildCorpusEditor.pdb +0 -0
- package/dist/windows-editor/wwwroot/assets/{index-C_VxJk4k.js → index-BB0sbZaD.js} +107 -107
- package/dist/windows-editor/wwwroot/assets/index-CtOv7qsC.css +1 -0
- package/dist/windows-editor/wwwroot/index.html +22 -22
- package/editor-web/index.html +21 -21
- package/editor-web/src/main.jsx +91 -53
- package/editor-web/src/styles.css +65 -1
- package/editor-web/vite.config.js +13 -13
- package/examples/build-corpus.config.example.json +21 -21
- package/installer/install-regen-mde.ps1 +214 -214
- package/installer/regen-mde.nsi +81 -81
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/scripts/build-windows-editor.ps1 +47 -47
- package/scripts/package-windows-editor.ps1 +90 -90
- package/scripts/run-corpus.ps1 +28 -28
- package/scripts/run-editor-implementation-plane.ps1 +226 -226
- package/scripts/run-required-tests.ps1 +98 -98
- package/scripts/run-smoke.ps1 +28 -28
- package/src/build_corpus/__init__.py +3 -3
- package/src/build_corpus/docx_exporter.py +10 -4
- package/src/build_corpus/equations.py +1345 -1345
- package/src/build_corpus/templates/__init__.py +1 -1
- package/src/build_corpus/validate_assets.py +46 -46
- package/tools/audit_corpus.py +203 -203
- package/tools/collect_microsoft_word_templates.py +228 -228
- package/tools/collect_online_docx_corpus.py +272 -272
- package/tools/collect_online_pptx_corpus.py +252 -252
- package/tools/compare_pptx_inputs_outputs.py +87 -87
- package/tools/roundtrip_docx_corpus.py +171 -171
- package/dist/windows-editor/wwwroot/assets/index-Wt9zSjIw.css +0 -1
package/scripts/run-smoke.ps1
CHANGED
|
@@ -1,28 +1,28 @@
|
|
|
1
|
-
param(
|
|
2
|
-
[Parameter(Mandatory=$true)]
|
|
3
|
-
[string]$Docx,
|
|
4
|
-
|
|
5
|
-
[string]$Out = ".tmp\smoke",
|
|
6
|
-
|
|
7
|
-
[ValidateSet("assets", "base64", "s3")]
|
|
8
|
-
[string]$Images = "assets",
|
|
9
|
-
|
|
10
|
-
[string]$Config = ""
|
|
11
|
-
)
|
|
12
|
-
|
|
13
|
-
$ErrorActionPreference = "Stop"
|
|
14
|
-
$root = Split-Path -Parent $PSScriptRoot
|
|
15
|
-
$buildCorpus = Join-Path $root "bin\build-corpus.js"
|
|
16
|
-
$katex = Join-Path $root "bin\validate-katex.js"
|
|
17
|
-
$assetValidator = Join-Path $root "src\build_corpus\validate_assets.py"
|
|
18
|
-
|
|
19
|
-
$args = @($buildCorpus, $Docx, "--out", $Out, "--images", $Images)
|
|
20
|
-
if ($Config) {
|
|
21
|
-
$args += @("--config", $Config)
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
node @args
|
|
25
|
-
node $katex $Out | Set-Content -Path (Join-Path $Out "katex-report.json") -Encoding UTF8
|
|
26
|
-
py -3 $assetValidator $Out | Set-Content -Path (Join-Path $Out "asset-report.json") -Encoding UTF8
|
|
27
|
-
|
|
28
|
-
Write-Host "Smoke test passed: $Out"
|
|
1
|
+
param(
|
|
2
|
+
[Parameter(Mandatory=$true)]
|
|
3
|
+
[string]$Docx,
|
|
4
|
+
|
|
5
|
+
[string]$Out = ".tmp\smoke",
|
|
6
|
+
|
|
7
|
+
[ValidateSet("assets", "base64", "s3")]
|
|
8
|
+
[string]$Images = "assets",
|
|
9
|
+
|
|
10
|
+
[string]$Config = ""
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
$ErrorActionPreference = "Stop"
|
|
14
|
+
$root = Split-Path -Parent $PSScriptRoot
|
|
15
|
+
$buildCorpus = Join-Path $root "bin\build-corpus.js"
|
|
16
|
+
$katex = Join-Path $root "bin\validate-katex.js"
|
|
17
|
+
$assetValidator = Join-Path $root "src\build_corpus\validate_assets.py"
|
|
18
|
+
|
|
19
|
+
$args = @($buildCorpus, $Docx, "--out", $Out, "--images", $Images)
|
|
20
|
+
if ($Config) {
|
|
21
|
+
$args += @("--config", $Config)
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
node @args
|
|
25
|
+
node $katex $Out | Set-Content -Path (Join-Path $Out "katex-report.json") -Encoding UTF8
|
|
26
|
+
py -3 $assetValidator $Out | Set-Content -Path (Join-Path $Out "asset-report.json") -Encoding UTF8
|
|
27
|
+
|
|
28
|
+
Write-Host "Smoke test passed: $Out"
|
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
"""DOCX to Markdown conversion with native image and equation handling."""
|
|
2
|
-
|
|
3
|
-
__version__ = "0.8.0"
|
|
1
|
+
"""DOCX to Markdown conversion with native image and equation handling."""
|
|
2
|
+
|
|
3
|
+
__version__ = "0.8.0"
|
|
@@ -36,7 +36,7 @@ except ImportError: # pragma: no cover - script-style invocation
|
|
|
36
36
|
# like "\$252.3B" — are NOT mistaken for inline-math fences. Escaped dollars then
|
|
37
37
|
# flow through the plain-text path and are unescaped to "$" (counted as a fixup).
|
|
38
38
|
INLINE_TOKEN_RE = re.compile(
|
|
39
|
-
r"(!\[[^\]]*\]\([^)]+\)|\[[^\]]+\]\([^)]+\)|`[^`]+`|(?<!\\)\$\$[^$]+\$\$|(?<!\\)\$[^$\n]+\$|\*\*\*.+?\*\*\*|\*\*.+?\*\*|\*.+?\*)"
|
|
39
|
+
r"(!\[[^\]]*\]\([^)]+\)|\[[^\]]+\]\([^)]+\)|`[^`]+`|(?<!\\)\$\$(?:\\\$|[^$])+\$\$|(?<!\\)\$(?:\\\$|[^$\n])+\$|\*\*\*.+?\*\*\*|\*\*.+?\*\*|\*.+?\*)"
|
|
40
40
|
)
|
|
41
41
|
|
|
42
42
|
# Image targets python-docx can never embed as a picture (need an external
|
|
@@ -83,10 +83,16 @@ def count_input_elements(markdown: str) -> dict:
|
|
|
83
83
|
line_text = lines[index]
|
|
84
84
|
# inline elements on this line
|
|
85
85
|
counts["images"] += len(re.findall(r"!\[[^\]]*\]\([^)]+\)", line_text))
|
|
86
|
+
# Math inside a link/image/code token is rendered as part of that token —
|
|
87
|
+
# INLINE_TOKEN_RE matches links/images/code BEFORE inline math — so strip
|
|
88
|
+
# those spans before counting equations to mirror the renderer. (e.g. a TOC
|
|
89
|
+
# entry "[... Allocation ($\alpha$) 117](#anchor)" is one link, not an equation.)
|
|
90
|
+
eq_text = re.sub(r"!\[[^\]]*\]\([^)]+\)|\[[^\]]+\]\([^)]+\)|`[^`]+`", " ", line_text)
|
|
86
91
|
# display $$...$$ embedded in a line with surrounding text (render tokenizes these too)
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
92
|
+
# (?:\\\$|[^$]) lets an escaped \$ — e.g. currency inside math — stay inside the fence
|
|
93
|
+
counts["equations"] += len(re.findall(r"(?<!\\)\$\$(?:\\\$|[^$])+\$\$", eq_text))
|
|
94
|
+
# inline math: standalone $...$ not part of $$ and not escaped (escaped \$ stays inside)
|
|
95
|
+
counts["equations"] += len(re.findall(r"(?<!\\)(?<!\$)\$(?:\\\$|[^$\n])+\$(?!\$)", eq_text))
|
|
90
96
|
# links that are not images
|
|
91
97
|
counts["links"] += len(re.findall(r"(?<!!)\[[^\]]+\]\([^)]+\)", line_text))
|
|
92
98
|
index += 1
|