multilingualprogramming 0.4.0__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/PKG-INFO +4 -1
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/README.md +3 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/__init__.py +12 -0
- multilingualprogramming-0.6.0/multilingualprogramming/__main__.py +410 -0
- multilingualprogramming-0.6.0/multilingualprogramming/codegen/build_orchestrator.py +137 -0
- multilingualprogramming-0.6.0/multilingualprogramming/codegen/encoding_guard.py +50 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/codegen/executor.py +17 -7
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/codegen/python_generator.py +31 -3
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/codegen/runtime_builtins.py +46 -0
- multilingualprogramming-0.6.0/multilingualprogramming/codegen/wat_generator.py +1649 -0
- multilingualprogramming-0.6.0/multilingualprogramming/codegen/wat_generator_core.py +100 -0
- multilingualprogramming-0.6.0/multilingualprogramming/codegen/wat_generator_expression.py +109 -0
- multilingualprogramming-0.6.0/multilingualprogramming/codegen/wat_generator_loop.py +106 -0
- multilingualprogramming-0.6.0/multilingualprogramming/codegen/wat_generator_manifest.py +259 -0
- multilingualprogramming-0.6.0/multilingualprogramming/codegen/wat_generator_match.py +191 -0
- multilingualprogramming-0.6.0/multilingualprogramming/codegen/wat_generator_oop.py +403 -0
- multilingualprogramming-0.6.0/multilingualprogramming/codegen/wat_generator_support.py +161 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/lexer/lexer.py +102 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/lexer/token.py +3 -1
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/lexer/token_types.py +1 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/parser/ast_nodes.py +15 -3
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/parser/parser.py +100 -50
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/parser/semantic_analyzer.py +22 -6
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/resources/usm/builtins_aliases.json +646 -2
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/resources/usm/keywords.json +21 -1
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/runtime/backend_selector.py +12 -2
- multilingualprogramming-0.6.0/multilingualprogramming/runtime/numeric_primitives.py +127 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/version.py +1 -1
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/wasm/loader.py +13 -0
- multilingualprogramming-0.6.0/multilingualprogramming/wasm/tuple_abi.py +38 -0
- multilingualprogramming-0.6.0/multilingualprogramming/wasm/tuple_memory.py +27 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming.egg-info/PKG-INFO +4 -1
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming.egg-info/SOURCES.txt +12 -0
- multilingualprogramming-0.4.0/multilingualprogramming/__main__.py +0 -243
- multilingualprogramming-0.4.0/multilingualprogramming/codegen/wat_generator.py +0 -762
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/LICENSE +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/codegen/__init__.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/codegen/repl.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/codegen/wasm_generator.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/core/__init__.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/core/ir.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/core/lowering.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/datetime/__init__.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/datetime/date_parser.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/datetime/mp_date.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/datetime/mp_datetime.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/datetime/mp_time.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/datetime/resource_loader.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/exceptions.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/imports.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/keyword/__init__.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/keyword/keyword_registry.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/keyword/keyword_validator.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/keyword/language_pack_validator.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/lexer/__init__.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/lexer/source_reader.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/numeral/__init__.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/numeral/abstract_numeral.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/numeral/complex_numeral.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/numeral/fraction_numeral.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/numeral/mp_numeral.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/numeral/numeral_converter.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/numeral/roman_numeral.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/numeral/unicode_numeral.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/parser/__init__.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/parser/ast_printer.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/parser/error_messages.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/parser/surface_normalizer.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/resources/datetime/eras.json +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/resources/datetime/formats.json +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/resources/datetime/months.json +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/resources/datetime/weekdays.json +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/resources/parser/error_messages.json +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/resources/repl/commands.json +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/resources/usm/operators.json +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/resources/usm/schema.json +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/resources/usm/surface_patterns.json +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/runtime/python_fallbacks.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/unicode_string.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming.egg-info/dependency_links.txt +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming.egg-info/entry_points.txt +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming.egg-info/requires.txt +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming.egg-info/top_level.txt +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/pyproject.toml +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/setup.cfg +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/setup.py +0 -0
- {multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/tests/tests.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: multilingualprogramming
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.6.0
|
|
4
4
|
Summary: Python application for multilingual programming
|
|
5
5
|
Author-email: John Samuel <johnsamuelwrites@example.com>
|
|
6
6
|
License: GPL-3.0-or-later
|
|
@@ -301,6 +301,7 @@ multilingual run main_en.ml --lang en
|
|
|
301
301
|
- Date/time: `MPDate`, `MPTime`, `MPDatetime`
|
|
302
302
|
- Frontend: `Lexer`, `Parser`, AST nodes, `SemanticAnalyzer`
|
|
303
303
|
- Runtime: `PythonCodeGenerator`, `RuntimeBuiltins`, `ProgramExecutor`, `REPL`
|
|
304
|
+
- WAT/WASM backend: `WATCodeGenerator` — compiles multilingual AST to executable WebAssembly (core language + OOP with stateful class instances)
|
|
304
305
|
|
|
305
306
|
Additional syntax now supported:
|
|
306
307
|
|
|
@@ -396,6 +397,8 @@ Use this README for setup and workflow; use `docs/` for design rationale and pol
|
|
|
396
397
|
- Development and debugging guide: [docs/development.md](docs/development.md)
|
|
397
398
|
- Guide complet en francais: [docs/fr/programmation.md](docs/fr/programmation.md)
|
|
398
399
|
- Language onboarding guide: [docs/language_onboarding.md](docs/language_onboarding.md)
|
|
400
|
+
- WAT/WASM architecture overview: [docs/WASM_ARCHITECTURE_OVERVIEW.md](docs/WASM_ARCHITECTURE_OVERVIEW.md)
|
|
401
|
+
- WAT/WASM OOP object model reference: [docs/wat_oop_model.md](docs/wat_oop_model.md)
|
|
399
402
|
- Contribution guide: [CONTRIBUTING.md](CONTRIBUTING.md)
|
|
400
403
|
- Release process: [docs/releasing.md](docs/releasing.md)
|
|
401
404
|
- Changelog: [CHANGELOG.md](CHANGELOG.md)
|
|
@@ -265,6 +265,7 @@ multilingual run main_en.ml --lang en
|
|
|
265
265
|
- Date/time: `MPDate`, `MPTime`, `MPDatetime`
|
|
266
266
|
- Frontend: `Lexer`, `Parser`, AST nodes, `SemanticAnalyzer`
|
|
267
267
|
- Runtime: `PythonCodeGenerator`, `RuntimeBuiltins`, `ProgramExecutor`, `REPL`
|
|
268
|
+
- WAT/WASM backend: `WATCodeGenerator` — compiles multilingual AST to executable WebAssembly (core language + OOP with stateful class instances)
|
|
268
269
|
|
|
269
270
|
Additional syntax now supported:
|
|
270
271
|
|
|
@@ -360,6 +361,8 @@ Use this README for setup and workflow; use `docs/` for design rationale and pol
|
|
|
360
361
|
- Development and debugging guide: [docs/development.md](docs/development.md)
|
|
361
362
|
- Guide complet en francais: [docs/fr/programmation.md](docs/fr/programmation.md)
|
|
362
363
|
- Language onboarding guide: [docs/language_onboarding.md](docs/language_onboarding.md)
|
|
364
|
+
- WAT/WASM architecture overview: [docs/WASM_ARCHITECTURE_OVERVIEW.md](docs/WASM_ARCHITECTURE_OVERVIEW.md)
|
|
365
|
+
- WAT/WASM OOP object model reference: [docs/wat_oop_model.md](docs/wat_oop_model.md)
|
|
363
366
|
- Contribution guide: [CONTRIBUTING.md](CONTRIBUTING.md)
|
|
364
367
|
- Release process: [docs/releasing.md](docs/releasing.md)
|
|
365
368
|
- Changelog: [CHANGELOG.md](CHANGELOG.md)
|
{multilingualprogramming-0.4.0 → multilingualprogramming-0.6.0}/multilingualprogramming/__init__.py
RENAMED
|
@@ -36,6 +36,13 @@ from multilingualprogramming.codegen.executor import ProgramExecutor, ExecutionR
|
|
|
36
36
|
from multilingualprogramming.codegen.repl import REPL
|
|
37
37
|
from multilingualprogramming.core.ir import CoreIRProgram
|
|
38
38
|
from multilingualprogramming.core.lowering import lower_to_core_ir
|
|
39
|
+
from multilingualprogramming.runtime.numeric_primitives import (
|
|
40
|
+
Vec2,
|
|
41
|
+
ComplexScalar,
|
|
42
|
+
FastRNG,
|
|
43
|
+
BoundedArray,
|
|
44
|
+
MinDistanceAccumulator,
|
|
45
|
+
)
|
|
39
46
|
from multilingualprogramming.imports import (
|
|
40
47
|
enable_multilingual_imports, disable_multilingual_imports,
|
|
41
48
|
)
|
|
@@ -71,6 +78,11 @@ __all__ = [
|
|
|
71
78
|
"REPL",
|
|
72
79
|
"CoreIRProgram",
|
|
73
80
|
"lower_to_core_ir",
|
|
81
|
+
"Vec2",
|
|
82
|
+
"ComplexScalar",
|
|
83
|
+
"FastRNG",
|
|
84
|
+
"BoundedArray",
|
|
85
|
+
"MinDistanceAccumulator",
|
|
74
86
|
"enable_multilingual_imports",
|
|
75
87
|
"disable_multilingual_imports",
|
|
76
88
|
]
|
|
@@ -0,0 +1,410 @@
|
|
|
1
|
+
#
|
|
2
|
+
# SPDX-FileCopyrightText: 2024 John Samuel <johnsamuelwrites@gmail.com>
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
CLI entry point for the multilingual programming language.
|
|
9
|
+
|
|
10
|
+
Usage:
|
|
11
|
+
python -m multilingualprogramming # Start REPL
|
|
12
|
+
python -m multilingualprogramming <file>.ml # Execute a source file
|
|
13
|
+
python -m multilingualprogramming run <file> # Execute a file
|
|
14
|
+
python -m multilingualprogramming repl [--lang XX] # Start REPL
|
|
15
|
+
python -m multilingualprogramming compile <file> # Show generated Python
|
|
16
|
+
python -m multilingualprogramming build-wasm-bundle <file> # Build WAT/ABI bundle
|
|
17
|
+
python -m multilingualprogramming smoke --lang fr # Validate one language pack
|
|
18
|
+
python -m multilingualprogramming smoke --all # Validate all language packs
|
|
19
|
+
"""
|
|
20
|
+
# pylint: disable=mixed-line-endings
|
|
21
|
+
|
|
22
|
+
import argparse
|
|
23
|
+
import json
|
|
24
|
+
import sys
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
|
|
27
|
+
from multilingualprogramming.codegen.encoding_guard import (
|
|
28
|
+
assert_clean_utf8_file,
|
|
29
|
+
assert_clean_text_encoding,
|
|
30
|
+
)
|
|
31
|
+
from multilingualprogramming.codegen.build_orchestrator import BuildOrchestrator
|
|
32
|
+
from multilingualprogramming.codegen.executor import ProgramExecutor
|
|
33
|
+
from multilingualprogramming.codegen.python_generator import PythonCodeGenerator
|
|
34
|
+
from multilingualprogramming.codegen.repl import REPL
|
|
35
|
+
from multilingualprogramming.codegen.wat_generator import WATCodeGenerator
|
|
36
|
+
from multilingualprogramming.keyword.language_pack_validator import (
|
|
37
|
+
LanguagePackValidator,
|
|
38
|
+
)
|
|
39
|
+
from multilingualprogramming.exceptions import UnsupportedLanguageError
|
|
40
|
+
from multilingualprogramming.lexer.lexer import Lexer
|
|
41
|
+
from multilingualprogramming.parser.parser import Parser
|
|
42
|
+
from multilingualprogramming.version import __version__
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _read_source_file(path: str) -> str:
|
|
46
|
+
try:
|
|
47
|
+
with open(path, encoding="utf-8") as f:
|
|
48
|
+
return f.read()
|
|
49
|
+
except FileNotFoundError:
|
|
50
|
+
print(f"Error: file not found: {path}", file=sys.stderr)
|
|
51
|
+
sys.exit(1)
|
|
52
|
+
except OSError as e:
|
|
53
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
54
|
+
sys.exit(1)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _parse_program_from_file(path: str, lang: str | None):
|
|
58
|
+
source = _read_source_file(path)
|
|
59
|
+
lexer = Lexer(source, language=lang)
|
|
60
|
+
tokens = lexer.tokenize()
|
|
61
|
+
detected_lang = lexer.language or lang or "en"
|
|
62
|
+
parser = Parser(tokens, source_language=detected_lang)
|
|
63
|
+
return parser.parse()
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def cmd_run(args):
|
|
67
|
+
"""Execute a multilingual source file."""
|
|
68
|
+
source = _read_source_file(args.file)
|
|
69
|
+
|
|
70
|
+
# Determine package context so that relative imports work.
|
|
71
|
+
# Walk up from the file's directory while __init__.ml files exist;
|
|
72
|
+
# that chain of directories forms the package name. The directory
|
|
73
|
+
# above the outermost package becomes the sys.path entry.
|
|
74
|
+
resolved = Path(args.file).resolve()
|
|
75
|
+
pkg_parts = []
|
|
76
|
+
current = resolved.parent
|
|
77
|
+
while (current / "__init__.ml").is_file():
|
|
78
|
+
pkg_parts.append(current.name)
|
|
79
|
+
current = current.parent
|
|
80
|
+
pkg_parts.reverse()
|
|
81
|
+
package_name = ".".join(pkg_parts) if pkg_parts else None
|
|
82
|
+
|
|
83
|
+
# The path entry is either the package root (when inside a package)
|
|
84
|
+
# or the script's own directory (top-level script).
|
|
85
|
+
path_entry = str(current if pkg_parts else resolved.parent)
|
|
86
|
+
if path_entry not in sys.path:
|
|
87
|
+
sys.path.insert(0, path_entry)
|
|
88
|
+
|
|
89
|
+
# Pass __package__ so the import system can resolve relative imports.
|
|
90
|
+
run_globals = {"__package__": package_name} if package_name else {}
|
|
91
|
+
|
|
92
|
+
executor = ProgramExecutor(language=args.lang)
|
|
93
|
+
result = executor.execute(source, globals_dict=run_globals or None)
|
|
94
|
+
|
|
95
|
+
if result.output:
|
|
96
|
+
sys.stdout.write(result.output)
|
|
97
|
+
|
|
98
|
+
if not result.success:
|
|
99
|
+
for err in result.errors:
|
|
100
|
+
print(err, file=sys.stderr)
|
|
101
|
+
sys.exit(1)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def cmd_repl(args):
|
|
105
|
+
"""Start the interactive REPL."""
|
|
106
|
+
repl = REPL(
|
|
107
|
+
language=args.lang,
|
|
108
|
+
show_python=args.show_python,
|
|
109
|
+
show_wat=args.show_wat,
|
|
110
|
+
show_rust=args.show_rust,
|
|
111
|
+
)
|
|
112
|
+
repl.run()
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def cmd_compile(args):
|
|
116
|
+
"""Compile a source file and print the generated Python."""
|
|
117
|
+
program = _parse_program_from_file(args.file, args.lang)
|
|
118
|
+
|
|
119
|
+
generator = PythonCodeGenerator()
|
|
120
|
+
python_source = generator.generate(program)
|
|
121
|
+
print(python_source)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def cmd_smoke(args):
|
|
125
|
+
"""Run language-pack smoke validation checks."""
|
|
126
|
+
registry_validator = LanguagePackValidator()
|
|
127
|
+
languages = (
|
|
128
|
+
sorted(registry_validator.get_supported_languages())
|
|
129
|
+
if args.all
|
|
130
|
+
else [args.lang]
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
failed = False
|
|
134
|
+
for language in languages:
|
|
135
|
+
try:
|
|
136
|
+
errors = registry_validator.validate(language)
|
|
137
|
+
except UnsupportedLanguageError as exc:
|
|
138
|
+
failed = True
|
|
139
|
+
print(f"[FAIL] {language}: {exc}", file=sys.stderr)
|
|
140
|
+
continue
|
|
141
|
+
|
|
142
|
+
if errors:
|
|
143
|
+
failed = True
|
|
144
|
+
print(f"[FAIL] {language}", file=sys.stderr)
|
|
145
|
+
for error in errors:
|
|
146
|
+
print(f" - {error}", file=sys.stderr)
|
|
147
|
+
else:
|
|
148
|
+
print(f"[PASS] {language}")
|
|
149
|
+
|
|
150
|
+
if failed:
|
|
151
|
+
sys.exit(1)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def cmd_wat_abi(args):
|
|
155
|
+
"""Parse source and emit the generated WAT ABI manifest JSON."""
|
|
156
|
+
program = _parse_program_from_file(args.file, args.lang)
|
|
157
|
+
manifest = WATCodeGenerator().generate_abi_manifest(program)
|
|
158
|
+
print(json.dumps(manifest, ensure_ascii=False, indent=2))
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def cmd_wat_host_shim(args):
|
|
162
|
+
"""Emit JS host-import shim from generated WAT ABI manifest."""
|
|
163
|
+
program = _parse_program_from_file(args.file, args.lang)
|
|
164
|
+
generator = WATCodeGenerator()
|
|
165
|
+
manifest = generator.generate_abi_manifest(program)
|
|
166
|
+
print(generator.generate_js_host_shim(manifest))
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def cmd_wat_renderer_template(args):
|
|
170
|
+
"""Emit JS renderer skeleton from generated WAT ABI manifest."""
|
|
171
|
+
program = _parse_program_from_file(args.file, args.lang)
|
|
172
|
+
generator = WATCodeGenerator()
|
|
173
|
+
manifest = generator.generate_abi_manifest(program)
|
|
174
|
+
print(generator.generate_renderer_template(manifest))
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def cmd_encoding_check(args):
|
|
178
|
+
"""Validate UTF-8/no-mojibake policy for provided files."""
|
|
179
|
+
failed = False
|
|
180
|
+
for fpath in args.files:
|
|
181
|
+
try:
|
|
182
|
+
assert_clean_utf8_file(fpath)
|
|
183
|
+
print(f"[PASS] {fpath}")
|
|
184
|
+
except (OSError, UnicodeDecodeError, ValueError) as exc:
|
|
185
|
+
failed = True
|
|
186
|
+
print(f"[FAIL] {fpath}: {exc}", file=sys.stderr)
|
|
187
|
+
if failed:
|
|
188
|
+
sys.exit(1)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def cmd_encoding_check_generated(args):
|
|
192
|
+
"""Validate generated compiler outputs for encoding regressions."""
|
|
193
|
+
program = _parse_program_from_file(args.file, args.lang)
|
|
194
|
+
wat_generator = WATCodeGenerator()
|
|
195
|
+
py_source = PythonCodeGenerator().generate(program)
|
|
196
|
+
wat_source = wat_generator.generate(program)
|
|
197
|
+
abi_json = json.dumps(
|
|
198
|
+
wat_generator.generate_abi_manifest(program), ensure_ascii=False, indent=2
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
assert_clean_text_encoding("generated_python", py_source)
|
|
202
|
+
assert_clean_text_encoding("generated_wat", wat_source)
|
|
203
|
+
assert_clean_text_encoding("generated_abi_json", abi_json)
|
|
204
|
+
print("[PASS] generated_python")
|
|
205
|
+
print("[PASS] generated_wat")
|
|
206
|
+
print("[PASS] generated_abi_json")
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def cmd_build_wasm_bundle(args):
|
|
210
|
+
"""Build deterministic WAT artifact bundle with atomic writes."""
|
|
211
|
+
program = _parse_program_from_file(args.file, args.lang)
|
|
212
|
+
orchestrator = BuildOrchestrator(args.out_dir)
|
|
213
|
+
outputs = orchestrator.build_from_program(program)
|
|
214
|
+
print(f"[PASS] {outputs.transpiled_python}")
|
|
215
|
+
print(f"[PASS] {outputs.wat}")
|
|
216
|
+
print(f"[PASS] {outputs.abi_manifest}")
|
|
217
|
+
print(f"[PASS] {outputs.host_shim_js}")
|
|
218
|
+
print(f"[PASS] {outputs.renderer_template_js}")
|
|
219
|
+
print(f"[PASS] {outputs.build_graph}")
|
|
220
|
+
print(f"[PASS] {outputs.build_lockfile}")
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _maybe_dispatch_direct_file_run(argv):
|
|
224
|
+
"""Dispatch `multilingual <file>.ml [--lang XX]` to `cmd_run`."""
|
|
225
|
+
if not argv:
|
|
226
|
+
return False
|
|
227
|
+
|
|
228
|
+
first = argv[0]
|
|
229
|
+
if first.startswith("-"):
|
|
230
|
+
return False
|
|
231
|
+
if not first.lower().endswith(".ml"):
|
|
232
|
+
return False
|
|
233
|
+
|
|
234
|
+
arg_parser = argparse.ArgumentParser(
|
|
235
|
+
prog="multilingual",
|
|
236
|
+
description="Execute a multilingual source file",
|
|
237
|
+
)
|
|
238
|
+
arg_parser.add_argument("file", help="Path to the source file")
|
|
239
|
+
arg_parser.add_argument(
|
|
240
|
+
"--lang", default=None,
|
|
241
|
+
help="Source language code (e.g., en, fr, hi). Auto-detect if omitted.",
|
|
242
|
+
)
|
|
243
|
+
args = arg_parser.parse_args(argv)
|
|
244
|
+
cmd_run(args)
|
|
245
|
+
return True
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def main(): # pylint: disable=too-many-statements
|
|
249
|
+
"""Run the CLI entry point and dispatch subcommands."""
|
|
250
|
+
argv = sys.argv[1:]
|
|
251
|
+
if _maybe_dispatch_direct_file_run(argv):
|
|
252
|
+
return
|
|
253
|
+
|
|
254
|
+
parser = argparse.ArgumentParser(
|
|
255
|
+
prog="multilingual",
|
|
256
|
+
description=(
|
|
257
|
+
"Multilingual Programming Language CLI "
|
|
258
|
+
"(default command starts interactive REPL; "
|
|
259
|
+
"pass <file>.ml to run directly)"
|
|
260
|
+
),
|
|
261
|
+
)
|
|
262
|
+
parser.add_argument(
|
|
263
|
+
"--version", action="version",
|
|
264
|
+
version=f"%(prog)s {__version__}",
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
subparsers = parser.add_subparsers(dest="command")
|
|
268
|
+
|
|
269
|
+
# run subcommand
|
|
270
|
+
run_parser = subparsers.add_parser("run", help="Execute a source file")
|
|
271
|
+
run_parser.add_argument("file", help="Path to the source file")
|
|
272
|
+
run_parser.add_argument(
|
|
273
|
+
"--lang", default=None,
|
|
274
|
+
help="Source language code (e.g., en, fr, hi). Auto-detect if omitted.",
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
# repl subcommand
|
|
278
|
+
repl_parser = subparsers.add_parser("repl", help="Start interactive REPL")
|
|
279
|
+
repl_parser.add_argument(
|
|
280
|
+
"--lang", default=None,
|
|
281
|
+
help="Source language code (e.g., en, fr, hi). Auto-detect if omitted.",
|
|
282
|
+
)
|
|
283
|
+
repl_parser.add_argument(
|
|
284
|
+
"--show-python", action="store_true",
|
|
285
|
+
help="Display generated Python code before execution",
|
|
286
|
+
)
|
|
287
|
+
repl_parser.add_argument(
|
|
288
|
+
"--show-wat", action="store_true",
|
|
289
|
+
help="Display generated WAT (WebAssembly Text) code before execution",
|
|
290
|
+
)
|
|
291
|
+
repl_parser.add_argument(
|
|
292
|
+
"--show-rust", action="store_true",
|
|
293
|
+
help="Display generated Rust/Wasmtime bridge code before execution",
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
# compile subcommand
|
|
297
|
+
compile_parser = subparsers.add_parser(
|
|
298
|
+
"compile", help="Show generated Python code"
|
|
299
|
+
)
|
|
300
|
+
compile_parser.add_argument("file", help="Path to the source file")
|
|
301
|
+
compile_parser.add_argument(
|
|
302
|
+
"--lang", default=None,
|
|
303
|
+
help="Source language code (e.g., en, fr, hi). Auto-detect if omitted.",
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
# smoke subcommand
|
|
307
|
+
smoke_parser = subparsers.add_parser(
|
|
308
|
+
"smoke", help="Validate language pack(s)"
|
|
309
|
+
)
|
|
310
|
+
smoke_parser.add_argument(
|
|
311
|
+
"--lang", default="en",
|
|
312
|
+
help="Language code to validate (default: en)",
|
|
313
|
+
)
|
|
314
|
+
smoke_parser.add_argument(
|
|
315
|
+
"--all", action="store_true",
|
|
316
|
+
help="Validate all supported languages",
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
wat_abi_parser = subparsers.add_parser(
|
|
320
|
+
"wat-abi", help="Emit WAT ABI manifest JSON for a source file"
|
|
321
|
+
)
|
|
322
|
+
wat_abi_parser.add_argument("file", help="Path to the source file")
|
|
323
|
+
wat_abi_parser.add_argument(
|
|
324
|
+
"--lang", default=None,
|
|
325
|
+
help="Source language code (e.g., en, fr, hi). Auto-detect if omitted.",
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
wat_host_parser = subparsers.add_parser(
|
|
329
|
+
"wat-host-shim",
|
|
330
|
+
help="Emit JS host shim from WAT ABI manifest for a source file",
|
|
331
|
+
)
|
|
332
|
+
wat_host_parser.add_argument("file", help="Path to the source file")
|
|
333
|
+
wat_host_parser.add_argument(
|
|
334
|
+
"--lang", default=None,
|
|
335
|
+
help="Source language code (e.g., en, fr, hi). Auto-detect if omitted.",
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
wat_renderer_parser = subparsers.add_parser(
|
|
339
|
+
"wat-renderer-template",
|
|
340
|
+
help="Emit JS renderer skeleton from WAT ABI manifest for a source file",
|
|
341
|
+
)
|
|
342
|
+
wat_renderer_parser.add_argument("file", help="Path to the source file")
|
|
343
|
+
wat_renderer_parser.add_argument(
|
|
344
|
+
"--lang", default=None,
|
|
345
|
+
help="Source language code (e.g., en, fr, hi). Auto-detect if omitted.",
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
encoding_check_parser = subparsers.add_parser(
|
|
349
|
+
"encoding-check",
|
|
350
|
+
help="Validate UTF-8 and no-mojibake markers for files",
|
|
351
|
+
)
|
|
352
|
+
encoding_check_parser.add_argument("files", nargs="+", help="Files to validate")
|
|
353
|
+
|
|
354
|
+
encoding_generated_parser = subparsers.add_parser(
|
|
355
|
+
"encoding-check-generated",
|
|
356
|
+
help="Validate generated Python/WAT/ABI outputs for a source file",
|
|
357
|
+
)
|
|
358
|
+
encoding_generated_parser.add_argument("file", help="Path to the source file")
|
|
359
|
+
encoding_generated_parser.add_argument(
|
|
360
|
+
"--lang", default=None,
|
|
361
|
+
help="Source language code (e.g., en, fr, hi). Auto-detect if omitted.",
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
build_bundle_parser = subparsers.add_parser(
|
|
365
|
+
"build-wasm-bundle",
|
|
366
|
+
help="Build deterministic WAT artifacts (WAT, ABI, JS shim, renderer)",
|
|
367
|
+
)
|
|
368
|
+
build_bundle_parser.add_argument("file", help="Path to the source file")
|
|
369
|
+
build_bundle_parser.add_argument(
|
|
370
|
+
"--lang", default=None,
|
|
371
|
+
help="Source language code (e.g., en, fr, hi). Auto-detect if omitted.",
|
|
372
|
+
)
|
|
373
|
+
build_bundle_parser.add_argument(
|
|
374
|
+
"--out-dir", default="build/wasm",
|
|
375
|
+
help="Output directory for generated artifacts (default: build/wasm)",
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
args = parser.parse_args()
|
|
379
|
+
|
|
380
|
+
if args.command == "run":
|
|
381
|
+
cmd_run(args)
|
|
382
|
+
elif args.command == "repl":
|
|
383
|
+
cmd_repl(args)
|
|
384
|
+
elif args.command == "compile":
|
|
385
|
+
cmd_compile(args)
|
|
386
|
+
elif args.command == "smoke":
|
|
387
|
+
cmd_smoke(args)
|
|
388
|
+
elif args.command == "wat-abi":
|
|
389
|
+
cmd_wat_abi(args)
|
|
390
|
+
elif args.command == "wat-host-shim":
|
|
391
|
+
cmd_wat_host_shim(args)
|
|
392
|
+
elif args.command == "wat-renderer-template":
|
|
393
|
+
cmd_wat_renderer_template(args)
|
|
394
|
+
elif args.command == "encoding-check":
|
|
395
|
+
cmd_encoding_check(args)
|
|
396
|
+
elif args.command == "encoding-check-generated":
|
|
397
|
+
cmd_encoding_check_generated(args)
|
|
398
|
+
elif args.command == "build-wasm-bundle":
|
|
399
|
+
cmd_build_wasm_bundle(args)
|
|
400
|
+
else:
|
|
401
|
+
# Default: start REPL
|
|
402
|
+
args.lang = None
|
|
403
|
+
args.show_python = False
|
|
404
|
+
args.show_wat = False
|
|
405
|
+
args.show_rust = False
|
|
406
|
+
cmd_repl(args)
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
if __name__ == "__main__":
|
|
410
|
+
main()
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
#
|
|
2
|
+
# SPDX-FileCopyrightText: 2026 John Samuel <johnsamuelwrites@gmail.com>
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
"""Deterministic build orchestration for WAT artifact bundles."""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import os
|
|
11
|
+
import time
|
|
12
|
+
import hashlib
|
|
13
|
+
from dataclasses import dataclass
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from tempfile import NamedTemporaryFile
|
|
16
|
+
|
|
17
|
+
from multilingualprogramming.codegen.python_generator import PythonCodeGenerator
|
|
18
|
+
from multilingualprogramming.codegen.wat_generator import WATCodeGenerator
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class BuildOutputs: # pylint: disable=too-many-instance-attributes
|
|
23
|
+
"""Resolved output file paths for build artifacts."""
|
|
24
|
+
|
|
25
|
+
wat: Path
|
|
26
|
+
abi_manifest: Path
|
|
27
|
+
host_shim_js: Path
|
|
28
|
+
renderer_template_js: Path
|
|
29
|
+
transpiled_python: Path
|
|
30
|
+
build_graph: Path
|
|
31
|
+
build_lockfile: Path
|
|
32
|
+
build_lock: Path
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class BuildOrchestrator: # pylint: disable=too-many-instance-attributes
|
|
36
|
+
"""Atomic, lock-guarded, deterministic artifact build."""
|
|
37
|
+
|
|
38
|
+
def __init__(self, output_dir: str | Path):
|
|
39
|
+
self.output_dir = Path(output_dir)
|
|
40
|
+
self.output_dir.mkdir(parents=True, exist_ok=True)
|
|
41
|
+
self.outputs = BuildOutputs(
|
|
42
|
+
wat=self.output_dir / "module.wat",
|
|
43
|
+
abi_manifest=self.output_dir / "abi_manifest.json",
|
|
44
|
+
host_shim_js=self.output_dir / "host_shim.js",
|
|
45
|
+
renderer_template_js=self.output_dir / "renderer_template.js",
|
|
46
|
+
transpiled_python=self.output_dir / "transpiled.py",
|
|
47
|
+
build_graph=self.output_dir / "build_graph.json",
|
|
48
|
+
build_lockfile=self.output_dir / "build.lock.json",
|
|
49
|
+
build_lock=self.output_dir / ".multilingual-build.lock",
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
def _acquire_lock(self, timeout_seconds: float = 10.0, poll_interval: float = 0.05):
|
|
53
|
+
start = time.perf_counter()
|
|
54
|
+
while True:
|
|
55
|
+
try:
|
|
56
|
+
fd = os.open(
|
|
57
|
+
self.outputs.build_lock,
|
|
58
|
+
os.O_CREAT | os.O_EXCL | os.O_WRONLY,
|
|
59
|
+
)
|
|
60
|
+
os.close(fd)
|
|
61
|
+
return
|
|
62
|
+
except FileExistsError:
|
|
63
|
+
if (time.perf_counter() - start) >= timeout_seconds:
|
|
64
|
+
raise TimeoutError(
|
|
65
|
+
f"Timeout acquiring build lock: {self.outputs.build_lock}"
|
|
66
|
+
) from None
|
|
67
|
+
time.sleep(poll_interval)
|
|
68
|
+
|
|
69
|
+
def _release_lock(self):
|
|
70
|
+
try:
|
|
71
|
+
self.outputs.build_lock.unlink()
|
|
72
|
+
except FileNotFoundError:
|
|
73
|
+
pass
|
|
74
|
+
|
|
75
|
+
@staticmethod
|
|
76
|
+
def _atomic_write_text(path: Path, content: str):
|
|
77
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
78
|
+
with NamedTemporaryFile("w", encoding="utf-8", delete=False, dir=path.parent) as tmp:
|
|
79
|
+
tmp.write(content)
|
|
80
|
+
tmp_path = Path(tmp.name)
|
|
81
|
+
os.replace(tmp_path, path)
|
|
82
|
+
|
|
83
|
+
def build_from_program(self, program) -> BuildOutputs:
|
|
84
|
+
"""Generate and atomically write all build artifacts from parsed Program."""
|
|
85
|
+
self._acquire_lock()
|
|
86
|
+
try:
|
|
87
|
+
wat_generator = WATCodeGenerator()
|
|
88
|
+
python_source = PythonCodeGenerator().generate(program)
|
|
89
|
+
wat_source = wat_generator.generate(program)
|
|
90
|
+
manifest = wat_generator.generate_abi_manifest(program)
|
|
91
|
+
host_shim = wat_generator.generate_js_host_shim(manifest)
|
|
92
|
+
renderer_template = wat_generator.generate_renderer_template(manifest)
|
|
93
|
+
source_digest = hashlib.sha256(python_source.encode("utf-8")).hexdigest()
|
|
94
|
+
|
|
95
|
+
build_graph = {
|
|
96
|
+
"version": 1,
|
|
97
|
+
"nodes": {
|
|
98
|
+
"program": [],
|
|
99
|
+
"transpiled_python": ["program"],
|
|
100
|
+
"module_wat": ["program"],
|
|
101
|
+
"abi_manifest": ["program", "module_wat"],
|
|
102
|
+
"host_shim_js": ["abi_manifest"],
|
|
103
|
+
"renderer_template_js": ["abi_manifest"],
|
|
104
|
+
},
|
|
105
|
+
}
|
|
106
|
+
build_lockfile = {
|
|
107
|
+
"version": 1,
|
|
108
|
+
"source_digest_sha256": source_digest,
|
|
109
|
+
"artifacts": {
|
|
110
|
+
"transpiled_python": self.outputs.transpiled_python.name,
|
|
111
|
+
"module_wat": self.outputs.wat.name,
|
|
112
|
+
"abi_manifest": self.outputs.abi_manifest.name,
|
|
113
|
+
"host_shim_js": self.outputs.host_shim_js.name,
|
|
114
|
+
"renderer_template_js": self.outputs.renderer_template_js.name,
|
|
115
|
+
"build_graph": self.outputs.build_graph.name,
|
|
116
|
+
},
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
self._atomic_write_text(self.outputs.transpiled_python, python_source)
|
|
120
|
+
self._atomic_write_text(self.outputs.wat, wat_source)
|
|
121
|
+
self._atomic_write_text(
|
|
122
|
+
self.outputs.abi_manifest,
|
|
123
|
+
json.dumps(manifest, ensure_ascii=False, indent=2, sort_keys=True),
|
|
124
|
+
)
|
|
125
|
+
self._atomic_write_text(self.outputs.host_shim_js, host_shim)
|
|
126
|
+
self._atomic_write_text(self.outputs.renderer_template_js, renderer_template)
|
|
127
|
+
self._atomic_write_text(
|
|
128
|
+
self.outputs.build_graph,
|
|
129
|
+
json.dumps(build_graph, ensure_ascii=False, indent=2, sort_keys=True),
|
|
130
|
+
)
|
|
131
|
+
self._atomic_write_text(
|
|
132
|
+
self.outputs.build_lockfile,
|
|
133
|
+
json.dumps(build_lockfile, ensure_ascii=False, indent=2, sort_keys=True),
|
|
134
|
+
)
|
|
135
|
+
return self.outputs
|
|
136
|
+
finally:
|
|
137
|
+
self._release_lock()
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
#
|
|
2
|
+
# SPDX-FileCopyrightText: 2026 John Samuel <johnsamuelwrites@gmail.com>
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
"""Encoding guardrails for generated compiler/runtime artifacts."""
|
|
8
|
+
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
_MOJIBAKE_MARKERS = (
|
|
12
|
+
"\ufffd", # replacement character
|
|
13
|
+
"Ã",
|
|
14
|
+
"Â",
|
|
15
|
+
"—",
|
|
16
|
+
"–",
|
|
17
|
+
"“",
|
|
18
|
+
"â€",
|
|
19
|
+
"‘",
|
|
20
|
+
"’",
|
|
21
|
+
"…",
|
|
22
|
+
"→",
|
|
23
|
+
"≥",
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def detect_text_encoding_issues(text: str) -> list[str]:
|
|
28
|
+
"""Return a list of encoding/mojibake issues detected in *text*."""
|
|
29
|
+
issues = []
|
|
30
|
+
if text.startswith("\ufeff"):
|
|
31
|
+
issues.append("utf8_bom_present")
|
|
32
|
+
for marker in _MOJIBAKE_MARKERS:
|
|
33
|
+
if marker in text:
|
|
34
|
+
issues.append(f"suspicious_marker:{marker}")
|
|
35
|
+
return issues
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def assert_clean_text_encoding(label: str, text: str) -> None:
|
|
39
|
+
"""Raise ValueError when text contains BOM or mojibake markers."""
|
|
40
|
+
issues = detect_text_encoding_issues(text)
|
|
41
|
+
if issues:
|
|
42
|
+
joined = ", ".join(issues)
|
|
43
|
+
raise ValueError(f"{label} contains encoding issues: {joined}")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def assert_clean_utf8_file(path: str | Path) -> None:
|
|
47
|
+
"""Validate file can be decoded as UTF-8 and has no known mojibake markers."""
|
|
48
|
+
fpath = Path(path)
|
|
49
|
+
content = fpath.read_text(encoding="utf-8")
|
|
50
|
+
assert_clean_text_encoding(str(fpath), content)
|