regen.mde 0.8.1 → 0.8.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/LICENSE +16 -16
  2. package/bin/build-corpus-editor.js +83 -83
  3. package/bin/build-corpus.js +41 -41
  4. package/bin/regen-mdeditor-install.js +27 -27
  5. package/bin/regen-mdeditor-uninstall.js +19 -19
  6. package/bin/validate-katex.js +93 -93
  7. package/desktop/BuildCorpusEditor/BuildCorpusEditor.csproj +22 -22
  8. package/desktop/BuildCorpusEditor/EditorForm.cs +58 -58
  9. package/desktop/BuildCorpusEditor/app.manifest +16 -16
  10. package/dist/release/{regen-mde-0.8.0-win-x64.zip → regen-mde-0.6.1-win-x64.zip} +0 -0
  11. package/dist/release/{regen-mde-0.8.1-win-x64.zip → regen-mde-0.8.2-win-x64.zip} +0 -0
  12. package/dist/windows-editor/BuildCorpusEditor.dll +0 -0
  13. package/dist/windows-editor/BuildCorpusEditor.exe +0 -0
  14. package/dist/windows-editor/BuildCorpusEditor.pdb +0 -0
  15. package/dist/windows-editor/wwwroot/index.html +20 -20
  16. package/editor-web/index.html +21 -21
  17. package/editor-web/src/main.jsx +107 -107
  18. package/editor-web/src/styles.css +99 -99
  19. package/editor-web/vite.config.js +13 -13
  20. package/examples/build-corpus.config.example.json +21 -21
  21. package/installer/install-regen-mde.ps1 +214 -214
  22. package/installer/regen-mde.nsi +81 -81
  23. package/package.json +1 -1
  24. package/pyproject.toml +1 -1
  25. package/scripts/build-windows-editor.ps1 +47 -47
  26. package/scripts/package-windows-editor.ps1 +90 -90
  27. package/scripts/run-corpus.ps1 +28 -28
  28. package/scripts/run-editor-implementation-plane.ps1 +226 -226
  29. package/scripts/run-required-tests.ps1 +98 -98
  30. package/scripts/run-smoke.ps1 +28 -28
  31. package/src/build_corpus/__init__.py +3 -3
  32. package/src/build_corpus/docx_exporter.py +10 -4
  33. package/src/build_corpus/equations.py +1345 -1345
  34. package/src/build_corpus/templates/__init__.py +1 -1
  35. package/src/build_corpus/validate_assets.py +46 -46
  36. package/tools/audit_corpus.py +203 -203
  37. package/tools/collect_microsoft_word_templates.py +228 -228
  38. package/tools/collect_online_docx_corpus.py +272 -272
  39. package/tools/collect_online_pptx_corpus.py +252 -252
  40. package/tools/compare_pptx_inputs_outputs.py +87 -87
  41. package/tools/roundtrip_docx_corpus.py +171 -171
@@ -1,28 +1,28 @@
1
- param(
2
- [Parameter(Mandatory=$true)]
3
- [string]$Docx,
4
-
5
- [string]$Out = ".tmp\smoke",
6
-
7
- [ValidateSet("assets", "base64", "s3")]
8
- [string]$Images = "assets",
9
-
10
- [string]$Config = ""
11
- )
12
-
13
- $ErrorActionPreference = "Stop"
14
- $root = Split-Path -Parent $PSScriptRoot
15
- $buildCorpus = Join-Path $root "bin\build-corpus.js"
16
- $katex = Join-Path $root "bin\validate-katex.js"
17
- $assetValidator = Join-Path $root "src\build_corpus\validate_assets.py"
18
-
19
- $args = @($buildCorpus, $Docx, "--out", $Out, "--images", $Images)
20
- if ($Config) {
21
- $args += @("--config", $Config)
22
- }
23
-
24
- node @args
25
- node $katex $Out | Set-Content -Path (Join-Path $Out "katex-report.json") -Encoding UTF8
26
- py -3 $assetValidator $Out | Set-Content -Path (Join-Path $Out "asset-report.json") -Encoding UTF8
27
-
28
- Write-Host "Smoke test passed: $Out"
1
+ param(
2
+ [Parameter(Mandatory=$true)]
3
+ [string]$Docx,
4
+
5
+ [string]$Out = ".tmp\smoke",
6
+
7
+ [ValidateSet("assets", "base64", "s3")]
8
+ [string]$Images = "assets",
9
+
10
+ [string]$Config = ""
11
+ )
12
+
13
+ $ErrorActionPreference = "Stop"
14
+ $root = Split-Path -Parent $PSScriptRoot
15
+ $buildCorpus = Join-Path $root "bin\build-corpus.js"
16
+ $katex = Join-Path $root "bin\validate-katex.js"
17
+ $assetValidator = Join-Path $root "src\build_corpus\validate_assets.py"
18
+
19
+ $args = @($buildCorpus, $Docx, "--out", $Out, "--images", $Images)
20
+ if ($Config) {
21
+ $args += @("--config", $Config)
22
+ }
23
+
24
+ node @args
25
+ node $katex $Out | Set-Content -Path (Join-Path $Out "katex-report.json") -Encoding UTF8
26
+ py -3 $assetValidator $Out | Set-Content -Path (Join-Path $Out "asset-report.json") -Encoding UTF8
27
+
28
+ Write-Host "Smoke test passed: $Out"
@@ -1,3 +1,3 @@
1
- """DOCX to Markdown conversion with native image and equation handling."""
2
-
3
- __version__ = "0.8.0"
1
+ """DOCX to Markdown conversion with native image and equation handling."""
2
+
3
+ __version__ = "0.8.0"
@@ -36,7 +36,7 @@ except ImportError: # pragma: no cover - script-style invocation
36
36
  # like "\$252.3B" — are NOT mistaken for inline-math fences. Escaped dollars then
37
37
  # flow through the plain-text path and are unescaped to "$" (counted as a fixup).
38
38
  INLINE_TOKEN_RE = re.compile(
39
- r"(!\[[^\]]*\]\([^)]+\)|\[[^\]]+\]\([^)]+\)|`[^`]+`|(?<!\\)\$\$[^$]+\$\$|(?<!\\)\$[^$\n]+\$|\*\*\*.+?\*\*\*|\*\*.+?\*\*|\*.+?\*)"
39
+ r"(!\[[^\]]*\]\([^)]+\)|\[[^\]]+\]\([^)]+\)|`[^`]+`|(?<!\\)\$\$(?:\\\$|[^$])+\$\$|(?<!\\)\$(?:\\\$|[^$\n])+\$|\*\*\*.+?\*\*\*|\*\*.+?\*\*|\*.+?\*)"
40
40
  )
41
41
 
42
42
  # Image targets python-docx can never embed as a picture (need an external
@@ -83,10 +83,16 @@ def count_input_elements(markdown: str) -> dict:
83
83
  line_text = lines[index]
84
84
  # inline elements on this line
85
85
  counts["images"] += len(re.findall(r"!\[[^\]]*\]\([^)]+\)", line_text))
86
+ # Math inside a link/image/code token is rendered as part of that token —
87
+ # INLINE_TOKEN_RE matches links/images/code BEFORE inline math — so strip
88
+ # those spans before counting equations to mirror the renderer. (e.g. a TOC
89
+ # entry "[... Allocation ($\alpha$) 117](#anchor)" is one link, not an equation.)
90
+ eq_text = re.sub(r"!\[[^\]]*\]\([^)]+\)|\[[^\]]+\]\([^)]+\)|`[^`]+`", " ", line_text)
86
91
  # display $$...$$ embedded in a line with surrounding text (render tokenizes these too)
87
- counts["equations"] += len(re.findall(r"(?<!\\)\$\$[^$]+\$\$", line_text))
88
- # inline math: standalone $...$ not part of $$ and not escaped
89
- counts["equations"] += len(re.findall(r"(?<!\\)(?<!\$)\$[^$\n]+\$(?!\$)", line_text))
92
+ # (?:\\\$|[^$]) lets an escaped \$ — e.g. currency inside math — stay inside the fence
93
+ counts["equations"] += len(re.findall(r"(?<!\\)\$\$(?:\\\$|[^$])+\$\$", eq_text))
94
+ # inline math: standalone $...$ not part of $$ and not escaped (escaped \$ stays inside)
95
+ counts["equations"] += len(re.findall(r"(?<!\\)(?<!\$)\$(?:\\\$|[^$\n])+\$(?!\$)", eq_text))
90
96
  # links that are not images
91
97
  counts["links"] += len(re.findall(r"(?<!!)\[[^\]]+\]\([^)]+\)", line_text))
92
98
  index += 1