multilingualprogramming 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- multilingualprogramming/__init__.py +74 -0
- multilingualprogramming/__main__.py +194 -0
- multilingualprogramming/codegen/__init__.py +12 -0
- multilingualprogramming/codegen/executor.py +215 -0
- multilingualprogramming/codegen/python_generator.py +592 -0
- multilingualprogramming/codegen/repl.py +489 -0
- multilingualprogramming/codegen/runtime_builtins.py +308 -0
- multilingualprogramming/core/__init__.py +12 -0
- multilingualprogramming/core/ir.py +29 -0
- multilingualprogramming/core/lowering.py +24 -0
- multilingualprogramming/datetime/__init__.py +11 -0
- multilingualprogramming/datetime/date_parser.py +190 -0
- multilingualprogramming/datetime/mp_date.py +210 -0
- multilingualprogramming/datetime/mp_datetime.py +153 -0
- multilingualprogramming/datetime/mp_time.py +147 -0
- multilingualprogramming/datetime/resource_loader.py +18 -0
- multilingualprogramming/exceptions.py +158 -0
- multilingualprogramming/imports.py +150 -0
- multilingualprogramming/keyword/__init__.py +13 -0
- multilingualprogramming/keyword/keyword_registry.py +249 -0
- multilingualprogramming/keyword/keyword_validator.py +59 -0
- multilingualprogramming/keyword/language_pack_validator.py +110 -0
- multilingualprogramming/lexer/__init__.py +11 -0
- multilingualprogramming/lexer/lexer.py +570 -0
- multilingualprogramming/lexer/source_reader.py +91 -0
- multilingualprogramming/lexer/token.py +54 -0
- multilingualprogramming/lexer/token_types.py +38 -0
- multilingualprogramming/numeral/__init__.py +11 -0
- multilingualprogramming/numeral/abstract_numeral.py +232 -0
- multilingualprogramming/numeral/complex_numeral.py +190 -0
- multilingualprogramming/numeral/fraction_numeral.py +165 -0
- multilingualprogramming/numeral/mp_numeral.py +243 -0
- multilingualprogramming/numeral/numeral_converter.py +151 -0
- multilingualprogramming/numeral/roman_numeral.py +301 -0
- multilingualprogramming/numeral/unicode_numeral.py +292 -0
- multilingualprogramming/parser/__init__.py +28 -0
- multilingualprogramming/parser/ast_nodes.py +459 -0
- multilingualprogramming/parser/ast_printer.py +677 -0
- multilingualprogramming/parser/error_messages.py +75 -0
- multilingualprogramming/parser/parser.py +1796 -0
- multilingualprogramming/parser/semantic_analyzer.py +689 -0
- multilingualprogramming/parser/surface_normalizer.py +282 -0
- multilingualprogramming/resources/datetime/eras.json +23 -0
- multilingualprogramming/resources/datetime/formats.json +32 -0
- multilingualprogramming/resources/datetime/months.json +150 -0
- multilingualprogramming/resources/datetime/weekdays.json +90 -0
- multilingualprogramming/resources/parser/error_messages.json +310 -0
- multilingualprogramming/resources/repl/commands.json +636 -0
- multilingualprogramming/resources/usm/builtins_aliases.json +731 -0
- multilingualprogramming/resources/usm/keywords.json +1063 -0
- multilingualprogramming/resources/usm/operators.json +532 -0
- multilingualprogramming/resources/usm/schema.json +34 -0
- multilingualprogramming/resources/usm/surface_patterns.json +1523 -0
- multilingualprogramming/unicode_string.py +140 -0
- multilingualprogramming/version.py +9 -0
- multilingualprogramming-0.2.0.dist-info/METADATA +350 -0
- multilingualprogramming-0.2.0.dist-info/RECORD +61 -0
- multilingualprogramming-0.2.0.dist-info/WHEEL +5 -0
- multilingualprogramming-0.2.0.dist-info/entry_points.txt +3 -0
- multilingualprogramming-0.2.0.dist-info/licenses/LICENSE +674 -0
- multilingualprogramming-0.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
#
|
|
2
|
+
# SPDX-FileCopyrightText: 2026 John Samuel <johnsamuelwrites@gmail.com>
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
"""Import hook for loading `.ml` modules/packages."""
|
|
8
|
+
|
|
9
|
+
import importlib.abc
|
|
10
|
+
import importlib.util
|
|
11
|
+
import sys
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
from multilingualprogramming.codegen.python_generator import PythonCodeGenerator
|
|
15
|
+
from multilingualprogramming.codegen.runtime_builtins import RuntimeBuiltins
|
|
16
|
+
from multilingualprogramming.core.lowering import lower_to_core_ir
|
|
17
|
+
from multilingualprogramming.lexer.lexer import Lexer
|
|
18
|
+
from multilingualprogramming.parser.parser import Parser
|
|
19
|
+
from multilingualprogramming.parser.semantic_analyzer import SemanticAnalyzer
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class _MLLoader(importlib.abc.Loader):
|
|
23
|
+
"""Loader that transpiles and executes `.ml` source modules."""
|
|
24
|
+
|
|
25
|
+
_CODE_CACHE = {}
|
|
26
|
+
|
|
27
|
+
def __init__(self, source_path, is_package=False):
|
|
28
|
+
self.source_path = Path(source_path)
|
|
29
|
+
self._is_package = is_package
|
|
30
|
+
|
|
31
|
+
def is_package(self, fullname):
|
|
32
|
+
"""Return whether this loader target is a package."""
|
|
33
|
+
del fullname
|
|
34
|
+
return self._is_package
|
|
35
|
+
|
|
36
|
+
def _build_code_object(self):
|
|
37
|
+
"""Compile multilingual source to a cached Python code object."""
|
|
38
|
+
stat = self.source_path.stat()
|
|
39
|
+
cache_key = str(self.source_path.resolve())
|
|
40
|
+
cache_entry = self._CODE_CACHE.get(cache_key)
|
|
41
|
+
stamp = (stat.st_mtime_ns, stat.st_size)
|
|
42
|
+
if cache_entry and cache_entry["stamp"] == stamp:
|
|
43
|
+
return cache_entry["code"], cache_entry["language"]
|
|
44
|
+
|
|
45
|
+
with open(self.source_path, "r", encoding="utf-8") as handle:
|
|
46
|
+
source = handle.read()
|
|
47
|
+
|
|
48
|
+
lexer = Lexer(source, language=None)
|
|
49
|
+
tokens = lexer.tokenize()
|
|
50
|
+
detected_language = lexer.language or "en"
|
|
51
|
+
|
|
52
|
+
parser = Parser(tokens, source_language=detected_language)
|
|
53
|
+
program = parser.parse()
|
|
54
|
+
core_program = lower_to_core_ir(
|
|
55
|
+
program, detected_language, frontend_name="lexer_parser"
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
analyzer = SemanticAnalyzer(source_language=detected_language)
|
|
59
|
+
builtins_ns = RuntimeBuiltins(detected_language).namespace()
|
|
60
|
+
for name in builtins_ns:
|
|
61
|
+
analyzer.symbol_table.define(name, "variable", line=0, column=0)
|
|
62
|
+
|
|
63
|
+
semantic_errors = analyzer.analyze(core_program.ast)
|
|
64
|
+
if semantic_errors:
|
|
65
|
+
rendered = "; ".join(str(err) for err in semantic_errors)
|
|
66
|
+
raise ImportError(
|
|
67
|
+
f"Semantic checks failed for {self.source_path}: {rendered}"
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
python_source = PythonCodeGenerator().generate(core_program)
|
|
71
|
+
code_obj = compile(python_source, str(self.source_path), "exec")
|
|
72
|
+
|
|
73
|
+
self._CODE_CACHE[cache_key] = {
|
|
74
|
+
"stamp": stamp,
|
|
75
|
+
"code": code_obj,
|
|
76
|
+
"language": detected_language,
|
|
77
|
+
}
|
|
78
|
+
return code_obj, detected_language
|
|
79
|
+
|
|
80
|
+
def exec_module(self, module):
|
|
81
|
+
"""Execute the transpiled module code in the module namespace."""
|
|
82
|
+
code_obj, language = self._build_code_object()
|
|
83
|
+
for name, builtin_obj in RuntimeBuiltins(language).namespace().items():
|
|
84
|
+
module.__dict__.setdefault(name, builtin_obj)
|
|
85
|
+
exec(code_obj, module.__dict__) # pylint: disable=exec-used
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class _MLFinder(importlib.abc.MetaPathFinder):
|
|
89
|
+
"""Meta-path finder for `.ml` modules and packages."""
|
|
90
|
+
|
|
91
|
+
@staticmethod
|
|
92
|
+
def _candidate_spec(fullname, base_dir):
|
|
93
|
+
module_name = fullname.rsplit(".", 1)[-1]
|
|
94
|
+
package_dir = Path(base_dir) / module_name
|
|
95
|
+
package_init = package_dir / "__init__.ml"
|
|
96
|
+
if package_init.is_file():
|
|
97
|
+
loader = _MLLoader(package_init, is_package=True)
|
|
98
|
+
return importlib.util.spec_from_file_location(
|
|
99
|
+
fullname,
|
|
100
|
+
package_init,
|
|
101
|
+
loader=loader,
|
|
102
|
+
submodule_search_locations=[str(package_dir)],
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
module_file = Path(base_dir) / f"{module_name}.ml"
|
|
106
|
+
if module_file.is_file():
|
|
107
|
+
loader = _MLLoader(module_file, is_package=False)
|
|
108
|
+
return importlib.util.spec_from_file_location(
|
|
109
|
+
fullname,
|
|
110
|
+
module_file,
|
|
111
|
+
loader=loader,
|
|
112
|
+
)
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
def find_spec(self, fullname, path=None, target=None):
|
|
116
|
+
"""Find an import spec for modules/packages backed by `.ml` files."""
|
|
117
|
+
del target
|
|
118
|
+
search_paths = path if path is not None else sys.path
|
|
119
|
+
for base in search_paths:
|
|
120
|
+
if not base:
|
|
121
|
+
base = "."
|
|
122
|
+
try:
|
|
123
|
+
spec = self._candidate_spec(fullname, base)
|
|
124
|
+
except OSError:
|
|
125
|
+
continue
|
|
126
|
+
if spec is not None:
|
|
127
|
+
return spec
|
|
128
|
+
return None
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
_FINDER = None
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def enable_multilingual_imports():
|
|
135
|
+
"""Enable import of `.ml` modules in the active Python process."""
|
|
136
|
+
global _FINDER # pylint: disable=global-statement
|
|
137
|
+
if _FINDER is not None and _FINDER in sys.meta_path:
|
|
138
|
+
return _FINDER
|
|
139
|
+
finder = _MLFinder()
|
|
140
|
+
sys.meta_path.insert(0, finder)
|
|
141
|
+
_FINDER = finder
|
|
142
|
+
return finder
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def disable_multilingual_imports():
|
|
146
|
+
"""Disable import of `.ml` modules in the active Python process."""
|
|
147
|
+
global _FINDER # pylint: disable=global-statement
|
|
148
|
+
if _FINDER is not None and _FINDER in sys.meta_path:
|
|
149
|
+
sys.meta_path.remove(_FINDER)
|
|
150
|
+
_FINDER = None
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
#
|
|
2
|
+
# SPDX-FileCopyrightText: 2024 John Samuel <johnsamuelwrites@gmail.com>
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
"""Keyword subpackage for multilingual programming."""
|
|
8
|
+
|
|
9
|
+
from multilingualprogramming.keyword.keyword_registry import KeywordRegistry
|
|
10
|
+
from multilingualprogramming.keyword.keyword_validator import KeywordValidator
|
|
11
|
+
from multilingualprogramming.keyword.language_pack_validator import (
|
|
12
|
+
LanguagePackValidator,
|
|
13
|
+
)
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
#
|
|
2
|
+
# SPDX-FileCopyrightText: 2024 John Samuel <johnsamuelwrites@gmail.com>
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
"""Registry for multilingual keyword lookups based on the Universal Semantic Model."""
|
|
8
|
+
# pylint: disable=unsubscriptable-object
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, cast
|
|
13
|
+
from multilingualprogramming.exceptions import (
|
|
14
|
+
UnknownKeywordError,
|
|
15
|
+
UnsupportedLanguageError,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class KeywordRegistry:
|
|
20
|
+
"""
|
|
21
|
+
Loads and queries the USM keyword ontology.
|
|
22
|
+
|
|
23
|
+
Provides forward lookup (concept -> keyword), reverse lookup
|
|
24
|
+
(keyword -> concept), and language detection from a set of keywords.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
_instance = None
|
|
28
|
+
_data: dict[str, Any] | None = None
|
|
29
|
+
|
|
30
|
+
def __new__(cls):
|
|
31
|
+
if cls._instance is None:
|
|
32
|
+
cls._instance = super().__new__(cls)
|
|
33
|
+
return cls._instance
|
|
34
|
+
|
|
35
|
+
def __init__(self):
|
|
36
|
+
if KeywordRegistry._data is None:
|
|
37
|
+
self._load()
|
|
38
|
+
if not hasattr(self, "_reverse_index"):
|
|
39
|
+
self._reverse_index = {}
|
|
40
|
+
if not hasattr(self, "_concept_map"):
|
|
41
|
+
self._concept_map = {}
|
|
42
|
+
|
|
43
|
+
def _load(self):
|
|
44
|
+
"""Load keywords.json from the resources directory."""
|
|
45
|
+
resources_dir = Path(__file__).parent.parent / "resources" / "usm"
|
|
46
|
+
keywords_path = resources_dir / "keywords.json"
|
|
47
|
+
with open(keywords_path, "r", encoding="utf-8") as f:
|
|
48
|
+
KeywordRegistry._data = json.load(f)
|
|
49
|
+
|
|
50
|
+
# Build reverse index: {language: {keyword_string: [concept_id, ...]}}
|
|
51
|
+
self._reverse_index = {}
|
|
52
|
+
# Build flat concept map: {concept_id: {language: keyword | [keywords]}}
|
|
53
|
+
self._concept_map = {}
|
|
54
|
+
data = cast(dict[str, Any] | None, KeywordRegistry._data)
|
|
55
|
+
if data is None:
|
|
56
|
+
raise UnsupportedLanguageError("unknown")
|
|
57
|
+
|
|
58
|
+
for category_concepts in data["categories"].values():
|
|
59
|
+
for concept_id, translations in category_concepts.items():
|
|
60
|
+
self._concept_map[concept_id] = translations
|
|
61
|
+
for lang, keyword_value in translations.items():
|
|
62
|
+
if lang not in self._reverse_index:
|
|
63
|
+
self._reverse_index[lang] = {}
|
|
64
|
+
keywords = (
|
|
65
|
+
keyword_value
|
|
66
|
+
if isinstance(keyword_value, list)
|
|
67
|
+
else [keyword_value]
|
|
68
|
+
)
|
|
69
|
+
for keyword in keywords:
|
|
70
|
+
if keyword not in self._reverse_index[lang]:
|
|
71
|
+
self._reverse_index[lang][keyword] = []
|
|
72
|
+
self._reverse_index[lang][keyword].append(concept_id)
|
|
73
|
+
|
|
74
|
+
def _check_language(self, language):
|
|
75
|
+
"""Raise UnsupportedLanguageError if language is not in the registry."""
|
|
76
|
+
data = cast(dict[str, Any] | None, self._data)
|
|
77
|
+
if data is None:
|
|
78
|
+
raise UnsupportedLanguageError(language)
|
|
79
|
+
if language not in data["languages"]:
|
|
80
|
+
raise UnsupportedLanguageError(language)
|
|
81
|
+
|
|
82
|
+
def check_language(self, language):
|
|
83
|
+
"""Public language validation helper."""
|
|
84
|
+
self._check_language(language)
|
|
85
|
+
|
|
86
|
+
def get_keyword(self, concept_id, language):
|
|
87
|
+
"""
|
|
88
|
+
Look up a keyword for a concept in a given language.
|
|
89
|
+
|
|
90
|
+
Parameters:
|
|
91
|
+
concept_id (str): The USM concept ID (e.g., "COND_IF")
|
|
92
|
+
language (str): Language code (e.g., "en", "fr", "hi")
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
str: The keyword string in the requested language
|
|
96
|
+
|
|
97
|
+
Raises:
|
|
98
|
+
UnsupportedLanguageError: If the language is not supported
|
|
99
|
+
UnknownKeywordError: If the concept_id is not found
|
|
100
|
+
"""
|
|
101
|
+
self._check_language(language)
|
|
102
|
+
if concept_id not in self._concept_map:
|
|
103
|
+
raise UnknownKeywordError(concept_id)
|
|
104
|
+
translations = self._concept_map[concept_id]
|
|
105
|
+
if language not in translations:
|
|
106
|
+
raise UnsupportedLanguageError(
|
|
107
|
+
f"{language} (not available for concept {concept_id})"
|
|
108
|
+
)
|
|
109
|
+
keyword_value = translations[language]
|
|
110
|
+
if isinstance(keyword_value, list):
|
|
111
|
+
return keyword_value[0]
|
|
112
|
+
return keyword_value
|
|
113
|
+
|
|
114
|
+
def get_concept(self, keyword, language):
|
|
115
|
+
"""
|
|
116
|
+
Reverse lookup: given a keyword in a language, return the concept ID.
|
|
117
|
+
|
|
118
|
+
Parameters:
|
|
119
|
+
keyword (str): The keyword string (e.g., "si", "अगर")
|
|
120
|
+
language (str): Language code
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
str: The concept ID
|
|
124
|
+
|
|
125
|
+
Raises:
|
|
126
|
+
UnsupportedLanguageError: If the language is not supported
|
|
127
|
+
UnknownKeywordError: If the keyword is not found
|
|
128
|
+
"""
|
|
129
|
+
self._check_language(language)
|
|
130
|
+
lang_index = self._reverse_index.get(language, {})
|
|
131
|
+
concepts = lang_index.get(keyword)
|
|
132
|
+
if concepts is None:
|
|
133
|
+
raise UnknownKeywordError(f"'{keyword}' in language '{language}'")
|
|
134
|
+
if len(concepts) == 1:
|
|
135
|
+
return concepts[0]
|
|
136
|
+
# Return the first match; ambiguity is checked via KeywordValidator
|
|
137
|
+
return concepts[0]
|
|
138
|
+
|
|
139
|
+
def get_concepts(self, keyword, language):
|
|
140
|
+
"""
|
|
141
|
+
Get all concept IDs that match a keyword in a language.
|
|
142
|
+
|
|
143
|
+
Parameters:
|
|
144
|
+
keyword (str): The keyword string
|
|
145
|
+
language (str): Language code
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
list[str]: List of matching concept IDs
|
|
149
|
+
"""
|
|
150
|
+
self._check_language(language)
|
|
151
|
+
lang_index = self._reverse_index.get(language, {})
|
|
152
|
+
return lang_index.get(keyword, [])
|
|
153
|
+
|
|
154
|
+
def is_keyword(self, word, language):
|
|
155
|
+
"""
|
|
156
|
+
Check if a word is a recognized keyword in the given language.
|
|
157
|
+
|
|
158
|
+
Parameters:
|
|
159
|
+
word (str): The word to check
|
|
160
|
+
language (str): Language code
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
bool: True if the word is a keyword
|
|
164
|
+
"""
|
|
165
|
+
lang_index = self._reverse_index.get(language, {})
|
|
166
|
+
return word in lang_index
|
|
167
|
+
|
|
168
|
+
def get_all_keywords(self, language):
|
|
169
|
+
"""
|
|
170
|
+
Get all keywords for a language.
|
|
171
|
+
|
|
172
|
+
Parameters:
|
|
173
|
+
language (str): Language code
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
dict: Mapping of concept_id -> keyword string
|
|
177
|
+
"""
|
|
178
|
+
self._check_language(language)
|
|
179
|
+
result = {}
|
|
180
|
+
for concept_id, translations in self._concept_map.items():
|
|
181
|
+
if language in translations:
|
|
182
|
+
keyword_value = translations[language]
|
|
183
|
+
if isinstance(keyword_value, list):
|
|
184
|
+
result[concept_id] = keyword_value[0]
|
|
185
|
+
else:
|
|
186
|
+
result[concept_id] = keyword_value
|
|
187
|
+
return result
|
|
188
|
+
|
|
189
|
+
def get_supported_languages(self):
|
|
190
|
+
"""
|
|
191
|
+
Get list of all supported languages.
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
list[str]: Language codes
|
|
195
|
+
"""
|
|
196
|
+
data = cast(dict[str, Any] | None, self._data)
|
|
197
|
+
if data is None:
|
|
198
|
+
return []
|
|
199
|
+
return list(data["languages"])
|
|
200
|
+
|
|
201
|
+
def detect_language(self, keywords):
|
|
202
|
+
"""
|
|
203
|
+
Given a list of keyword strings, detect which language they belong to.
|
|
204
|
+
|
|
205
|
+
Scores each language by how many of the provided keywords match.
|
|
206
|
+
|
|
207
|
+
Parameters:
|
|
208
|
+
keywords (list[str]): List of keyword strings
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
str | None: The language code with the most matches, or None
|
|
212
|
+
"""
|
|
213
|
+
data = cast(dict[str, Any] | None, self._data)
|
|
214
|
+
if data is None:
|
|
215
|
+
return None
|
|
216
|
+
scores = {}
|
|
217
|
+
for lang in data["languages"]:
|
|
218
|
+
lang_index = self._reverse_index.get(lang, {})
|
|
219
|
+
score = sum(1 for kw in keywords if kw in lang_index)
|
|
220
|
+
if score > 0:
|
|
221
|
+
scores[lang] = score
|
|
222
|
+
|
|
223
|
+
if not scores:
|
|
224
|
+
return None
|
|
225
|
+
return max(scores, key=scores.get)
|
|
226
|
+
|
|
227
|
+
def get_all_concept_ids(self):
|
|
228
|
+
"""
|
|
229
|
+
Get all concept IDs in the registry.
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
list[str]: All concept IDs
|
|
233
|
+
"""
|
|
234
|
+
return list(self._concept_map.keys())
|
|
235
|
+
|
|
236
|
+
def get_language_index(self, language):
|
|
237
|
+
"""Get reverse index for a language."""
|
|
238
|
+
self._check_language(language)
|
|
239
|
+
return self._reverse_index.get(language, {})
|
|
240
|
+
|
|
241
|
+
def get_concept_map(self):
|
|
242
|
+
"""Get concept map used for validation workflows."""
|
|
243
|
+
return self._concept_map
|
|
244
|
+
|
|
245
|
+
@classmethod
|
|
246
|
+
def reset(cls):
|
|
247
|
+
"""Reset the singleton instance (useful for testing)."""
|
|
248
|
+
cls._instance = None
|
|
249
|
+
cls._data = None
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
#
|
|
2
|
+
# SPDX-FileCopyrightText: 2024 John Samuel <johnsamuelwrites@gmail.com>
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
"""Validator for keyword mappings, checking ambiguity and completeness."""
|
|
8
|
+
|
|
9
|
+
from multilingualprogramming.keyword.keyword_registry import KeywordRegistry
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class KeywordValidator:
|
|
13
|
+
"""
|
|
14
|
+
Validates keyword mappings for a language.
|
|
15
|
+
|
|
16
|
+
Checks for:
|
|
17
|
+
- Ambiguity: same string maps to multiple concepts
|
|
18
|
+
- Completeness: all concepts have a translation
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self):
|
|
22
|
+
self.registry = KeywordRegistry()
|
|
23
|
+
|
|
24
|
+
def validate_no_ambiguity(self, language):
|
|
25
|
+
"""
|
|
26
|
+
Check if any keywords in a language map to multiple concepts.
|
|
27
|
+
|
|
28
|
+
Parameters:
|
|
29
|
+
language (str): Language code
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
list[tuple[str, list[str]]]: List of (keyword, [concept_ids])
|
|
33
|
+
for ambiguous keywords. Empty list if no ambiguities.
|
|
34
|
+
"""
|
|
35
|
+
self.registry.check_language(language)
|
|
36
|
+
ambiguities = []
|
|
37
|
+
lang_index = self.registry.get_language_index(language)
|
|
38
|
+
for keyword, concepts in lang_index.items():
|
|
39
|
+
if len(concepts) > 1:
|
|
40
|
+
ambiguities.append((keyword, concepts))
|
|
41
|
+
return ambiguities
|
|
42
|
+
|
|
43
|
+
def validate_completeness(self, language):
|
|
44
|
+
"""
|
|
45
|
+
Check if a language has translations for all concepts.
|
|
46
|
+
|
|
47
|
+
Parameters:
|
|
48
|
+
language (str): Language code
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
list[str]: List of concept IDs missing translations.
|
|
52
|
+
Empty list if complete.
|
|
53
|
+
"""
|
|
54
|
+
self.registry.check_language(language)
|
|
55
|
+
missing = []
|
|
56
|
+
for concept_id, translations in self.registry.get_concept_map().items():
|
|
57
|
+
if language not in translations:
|
|
58
|
+
missing.append(concept_id)
|
|
59
|
+
return missing
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
#
|
|
2
|
+
# SPDX-FileCopyrightText: 2026 John Samuel <johnsamuelwrites@gmail.com>
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
"""Validation helpers for language pack smoke checks."""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from multilingualprogramming.codegen.executor import ProgramExecutor
|
|
13
|
+
from multilingualprogramming.keyword.keyword_registry import KeywordRegistry
|
|
14
|
+
from multilingualprogramming.keyword.keyword_validator import KeywordValidator
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class LanguagePackValidator:
|
|
18
|
+
"""Validate consistency and basic execution for one language pack."""
|
|
19
|
+
|
|
20
|
+
def __init__(self):
|
|
21
|
+
self._registry = KeywordRegistry()
|
|
22
|
+
self._keyword_validator = KeywordValidator()
|
|
23
|
+
resources = Path(__file__).resolve().parent.parent / "resources"
|
|
24
|
+
self._error_messages_path = resources / "parser" / "error_messages.json"
|
|
25
|
+
self._repl_commands_path = resources / "repl" / "commands.json"
|
|
26
|
+
|
|
27
|
+
@staticmethod
|
|
28
|
+
def _load_json(path):
|
|
29
|
+
with open(path, "r", encoding="utf-8-sig") as handle:
|
|
30
|
+
return json.load(handle)
|
|
31
|
+
|
|
32
|
+
def _validate_error_messages(self, language):
|
|
33
|
+
data = self._load_json(self._error_messages_path)
|
|
34
|
+
missing = []
|
|
35
|
+
for message_key, by_language in data.get("messages", {}).items():
|
|
36
|
+
if language not in by_language:
|
|
37
|
+
missing.append(message_key)
|
|
38
|
+
if not missing:
|
|
39
|
+
return None
|
|
40
|
+
return f"missing parser error messages for {len(missing)} keys"
|
|
41
|
+
|
|
42
|
+
def _validate_repl_catalog(self, language):
|
|
43
|
+
data = self._load_json(self._repl_commands_path)
|
|
44
|
+
|
|
45
|
+
if language not in data.get("help_titles", {}):
|
|
46
|
+
return "missing REPL help title"
|
|
47
|
+
|
|
48
|
+
for message_key, by_language in data.get("messages", {}).items():
|
|
49
|
+
if language not in by_language:
|
|
50
|
+
return f"missing REPL message '{message_key}'"
|
|
51
|
+
|
|
52
|
+
commands = data.get("commands", {})
|
|
53
|
+
for command_name, meta in commands.items():
|
|
54
|
+
aliases = meta.get("aliases", {}).get(language)
|
|
55
|
+
descriptions = meta.get("descriptions", {}).get(language)
|
|
56
|
+
if aliases is None:
|
|
57
|
+
return f"missing REPL aliases for '{command_name}'"
|
|
58
|
+
if not aliases:
|
|
59
|
+
return f"empty REPL aliases for '{command_name}'"
|
|
60
|
+
if descriptions is None:
|
|
61
|
+
return f"missing REPL description for '{command_name}'"
|
|
62
|
+
return None
|
|
63
|
+
|
|
64
|
+
def _validate_executor_smoke(self, language):
|
|
65
|
+
let_kw = self._registry.get_keyword("LET", language)
|
|
66
|
+
print_kw = self._registry.get_keyword("PRINT", language)
|
|
67
|
+
source = f"{let_kw} x = 1\n{print_kw}(x)\n"
|
|
68
|
+
result = ProgramExecutor(language=language).execute(source)
|
|
69
|
+
if not result.success:
|
|
70
|
+
return "executor smoke program failed"
|
|
71
|
+
if result.output.strip() != "1":
|
|
72
|
+
return f"executor smoke output mismatch: {result.output!r}"
|
|
73
|
+
return None
|
|
74
|
+
|
|
75
|
+
def get_supported_languages(self):
|
|
76
|
+
"""Return supported language codes from the keyword registry."""
|
|
77
|
+
return self._registry.get_supported_languages()
|
|
78
|
+
|
|
79
|
+
def validate(self, language):
|
|
80
|
+
"""
|
|
81
|
+
Return a list of validation errors for a language.
|
|
82
|
+
|
|
83
|
+
Empty list means validation passed.
|
|
84
|
+
"""
|
|
85
|
+
self._registry.check_language(language)
|
|
86
|
+
errors = []
|
|
87
|
+
|
|
88
|
+
missing_concepts = self._keyword_validator.validate_completeness(language)
|
|
89
|
+
if missing_concepts:
|
|
90
|
+
errors.append(
|
|
91
|
+
f"missing keyword translations for {len(missing_concepts)} concepts"
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
ambiguities = self._keyword_validator.validate_no_ambiguity(language)
|
|
95
|
+
if ambiguities:
|
|
96
|
+
errors.append(f"ambiguous keywords found: {len(ambiguities)}")
|
|
97
|
+
|
|
98
|
+
error_msg_issue = self._validate_error_messages(language)
|
|
99
|
+
if error_msg_issue:
|
|
100
|
+
errors.append(error_msg_issue)
|
|
101
|
+
|
|
102
|
+
repl_issue = self._validate_repl_catalog(language)
|
|
103
|
+
if repl_issue:
|
|
104
|
+
errors.append(repl_issue)
|
|
105
|
+
|
|
106
|
+
exec_issue = self._validate_executor_smoke(language)
|
|
107
|
+
if exec_issue:
|
|
108
|
+
errors.append(exec_issue)
|
|
109
|
+
|
|
110
|
+
return errors
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
#
|
|
2
|
+
# SPDX-FileCopyrightText: 2024 John Samuel <johnsamuelwrites@gmail.com>
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
"""Lexer subpackage for multilingual programming."""
|
|
8
|
+
|
|
9
|
+
from multilingualprogramming.lexer.token_types import TokenType
|
|
10
|
+
from multilingualprogramming.lexer.token import Token
|
|
11
|
+
from multilingualprogramming.lexer.lexer import Lexer
|