jaclang 0.8.4__py3-none-any.whl → 0.8.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of jaclang might be problematic. Click here for more details.
- jaclang/cli/cli.md +1 -0
- jaclang/cli/cli.py +109 -37
- jaclang/compiler/jac.lark +3 -3
- jaclang/compiler/larkparse/jac_parser.py +2 -2
- jaclang/compiler/parser.py +14 -21
- jaclang/compiler/passes/main/__init__.py +5 -1
- jaclang/compiler/passes/main/binder_pass.py +594 -0
- jaclang/compiler/passes/main/cfg_build_pass.py +21 -1
- jaclang/compiler/passes/main/import_pass.py +8 -256
- jaclang/compiler/passes/main/inheritance_pass.py +10 -3
- jaclang/compiler/passes/main/pyast_gen_pass.py +92 -77
- jaclang/compiler/passes/main/pyast_load_pass.py +24 -13
- jaclang/compiler/passes/main/sem_def_match_pass.py +1 -1
- jaclang/compiler/passes/main/sym_tab_build_pass.py +4 -0
- jaclang/compiler/passes/main/tests/fixtures/M1.jac +3 -0
- jaclang/compiler/passes/main/tests/fixtures/cfg_has_var.jac +12 -0
- jaclang/compiler/passes/main/tests/fixtures/cfg_if_no_else.jac +11 -0
- jaclang/compiler/passes/main/tests/fixtures/cfg_return.jac +9 -0
- jaclang/compiler/passes/main/tests/fixtures/checker_imported.jac +2 -0
- jaclang/compiler/passes/main/tests/fixtures/checker_importer.jac +6 -0
- jaclang/compiler/passes/main/tests/fixtures/data_spatial_types.jac +1 -1
- jaclang/compiler/passes/main/tests/fixtures/import_symbol_type_infer.jac +11 -0
- jaclang/compiler/passes/main/tests/fixtures/infer_type_assignment.jac +5 -0
- jaclang/compiler/passes/main/tests/fixtures/member_access_type_inferred.jac +13 -0
- jaclang/compiler/passes/main/tests/fixtures/member_access_type_resolve.jac +11 -0
- jaclang/compiler/passes/main/tests/fixtures/sym_binder.jac +47 -0
- jaclang/compiler/passes/main/tests/fixtures/type_annotation_assignment.jac +8 -0
- jaclang/compiler/passes/main/tests/test_binder_pass.py +111 -0
- jaclang/compiler/passes/main/tests/test_cfg_build_pass.py +62 -24
- jaclang/compiler/passes/main/tests/test_checker_pass.py +87 -0
- jaclang/compiler/passes/main/tests/test_pyast_gen_pass.py +13 -13
- jaclang/compiler/passes/main/tests/test_sem_def_match_pass.py +6 -6
- jaclang/compiler/passes/main/type_checker_pass.py +128 -0
- jaclang/compiler/passes/tool/doc_ir_gen_pass.py +2 -0
- jaclang/compiler/passes/tool/tests/fixtures/simple_walk_fmt.jac +3 -0
- jaclang/compiler/program.py +32 -11
- jaclang/compiler/tests/test_sr_errors.py +32 -0
- jaclang/compiler/type_system/__init__.py +1 -0
- jaclang/compiler/type_system/type_evaluator.py +421 -0
- jaclang/compiler/type_system/type_utils.py +41 -0
- jaclang/compiler/type_system/types.py +240 -0
- jaclang/compiler/unitree.py +36 -24
- jaclang/langserve/dev_engine.jac +645 -0
- jaclang/langserve/dev_server.jac +201 -0
- jaclang/langserve/engine.jac +24 -5
- jaclang/langserve/tests/server_test/test_lang_serve.py +2 -2
- jaclang/langserve/tests/test_dev_server.py +80 -0
- jaclang/langserve/tests/test_server.py +13 -0
- jaclang/runtimelib/builtin.py +28 -39
- jaclang/runtimelib/importer.py +34 -63
- jaclang/runtimelib/machine.py +48 -64
- jaclang/runtimelib/memory.py +23 -5
- jaclang/runtimelib/tests/fixtures/savable_object.jac +10 -2
- jaclang/runtimelib/utils.py +42 -6
- jaclang/tests/fixtures/edge_node_walk.jac +1 -1
- jaclang/tests/fixtures/edges_walk.jac +1 -1
- jaclang/tests/fixtures/gendot_bubble_sort.jac +1 -1
- jaclang/tests/fixtures/py_run.jac +8 -0
- jaclang/tests/fixtures/py_run.py +23 -0
- jaclang/tests/fixtures/pyfunc.py +2 -0
- jaclang/tests/fixtures/pyfunc_fmt.py +60 -0
- jaclang/tests/fixtures/pyfunc_fstr.py +25 -0
- jaclang/tests/fixtures/pyfunc_kwesc.py +33 -0
- jaclang/tests/fixtures/python_run_test.py +19 -0
- jaclang/tests/test_cli.py +107 -0
- jaclang/tests/test_language.py +106 -5
- jaclang/utils/lang_tools.py +6 -3
- jaclang/utils/module_resolver.py +90 -0
- jaclang/utils/symtable_test_helpers.py +125 -0
- jaclang/utils/test.py +3 -4
- jaclang/vendor/interegular/__init__.py +34 -0
- jaclang/vendor/interegular/comparator.py +163 -0
- jaclang/vendor/interegular/fsm.py +1015 -0
- jaclang/vendor/interegular/patterns.py +732 -0
- jaclang/vendor/interegular/py.typed +0 -0
- jaclang/vendor/interegular/utils/__init__.py +15 -0
- jaclang/vendor/interegular/utils/simple_parser.py +165 -0
- jaclang/vendor/interegular-0.3.3.dist-info/INSTALLER +1 -0
- jaclang/vendor/interegular-0.3.3.dist-info/LICENSE.txt +21 -0
- jaclang/vendor/interegular-0.3.3.dist-info/METADATA +64 -0
- jaclang/vendor/interegular-0.3.3.dist-info/RECORD +20 -0
- jaclang/vendor/interegular-0.3.3.dist-info/REQUESTED +0 -0
- jaclang/vendor/interegular-0.3.3.dist-info/WHEEL +5 -0
- jaclang/vendor/interegular-0.3.3.dist-info/top_level.txt +1 -0
- {jaclang-0.8.4.dist-info → jaclang-0.8.6.dist-info}/METADATA +2 -1
- {jaclang-0.8.4.dist-info → jaclang-0.8.6.dist-info}/RECORD +88 -43
- {jaclang-0.8.4.dist-info → jaclang-0.8.6.dist-info}/WHEEL +0 -0
- {jaclang-0.8.4.dist-info → jaclang-0.8.6.dist-info}/entry_points.txt +0 -0
jaclang/tests/test_language.py
CHANGED
|
@@ -13,6 +13,7 @@ from jaclang import JacMachine as Jac
|
|
|
13
13
|
from jaclang.cli import cli
|
|
14
14
|
from jaclang.compiler.program import JacProgram
|
|
15
15
|
from jaclang.utils.test import TestCase
|
|
16
|
+
from jaclang.runtimelib.utils import read_file_with_encoding
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
class JacLanguageTests(TestCase):
|
|
@@ -121,13 +122,19 @@ class JacLanguageTests(TestCase):
|
|
|
121
122
|
data = json.loads(stdout_value)
|
|
122
123
|
|
|
123
124
|
nodes = data["nodes"]
|
|
124
|
-
|
|
125
|
+
edges = data["edges"]
|
|
126
|
+
|
|
127
|
+
self.assertEqual(len(nodes), 5)
|
|
128
|
+
self.assertEqual(len(edges), 6)
|
|
129
|
+
|
|
125
130
|
for node in nodes:
|
|
126
131
|
label = node["label"]
|
|
127
132
|
self.assertIn(label, ["root", "N(val=0)", "N(val=1)"])
|
|
128
133
|
|
|
129
|
-
|
|
130
|
-
|
|
134
|
+
for edge in edges:
|
|
135
|
+
label = edge["label"]
|
|
136
|
+
self.assertIn(label, ["E(val=1)", "E(val=1)", "E(val=1)", "E(val=0)", "E(val=0)", "E(val=0)"])
|
|
137
|
+
|
|
131
138
|
|
|
132
139
|
def test_printgraph_mermaid(self) -> None:
|
|
133
140
|
"""Test the mermaid gen of builtin function."""
|
|
@@ -228,7 +235,7 @@ class JacLanguageTests(TestCase):
|
|
|
228
235
|
sys.stdout = sys.__stdout__
|
|
229
236
|
stdout_value = captured_output.getvalue()
|
|
230
237
|
self.assertIn(
|
|
231
|
-
'[label="inner_node(main=5, sub=2)"];',
|
|
238
|
+
'[label="inner_node(main=5, sub=2)"fillcolor="#FFDEAD"];',
|
|
232
239
|
stdout_value,
|
|
233
240
|
)
|
|
234
241
|
|
|
@@ -1363,4 +1370,98 @@ class JacLanguageTests(TestCase):
|
|
|
1363
1370
|
stdout_value = captured_output.getvalue().split("\n")
|
|
1364
1371
|
self.assertIn("Num: 4", stdout_value[0])
|
|
1365
1372
|
self.assertIn("Num: 3", stdout_value[1])
|
|
1366
|
-
self.assertIn("Completed", stdout_value[2])
|
|
1373
|
+
self.assertIn("Completed", stdout_value[2])
|
|
1374
|
+
|
|
1375
|
+
def test_read_file_with_encoding_utf8(self) -> None:
|
|
1376
|
+
"""Test reading UTF-8 encoded file."""
|
|
1377
|
+
with tempfile.NamedTemporaryFile(mode='w', encoding='utf-8', delete=False) as f:
|
|
1378
|
+
test_content = "Hello, 世界! 🌍 Testing UTF-8 encoding."
|
|
1379
|
+
f.write(test_content)
|
|
1380
|
+
temp_path = f.name
|
|
1381
|
+
|
|
1382
|
+
try:
|
|
1383
|
+
result = read_file_with_encoding(temp_path)
|
|
1384
|
+
self.assertEqual(result, test_content)
|
|
1385
|
+
finally:
|
|
1386
|
+
os.unlink(temp_path)
|
|
1387
|
+
|
|
1388
|
+
def test_read_file_with_encoding_utf16(self) -> None:
|
|
1389
|
+
"""Test reading UTF-16 encoded file when UTF-8 fails."""
|
|
1390
|
+
with tempfile.NamedTemporaryFile(delete=False, mode="w", encoding="utf-16") as f:
|
|
1391
|
+
test_content = "Hello, 世界! UTF-16 encoding test."
|
|
1392
|
+
f.write(test_content)
|
|
1393
|
+
temp_path = f.name
|
|
1394
|
+
|
|
1395
|
+
try:
|
|
1396
|
+
result = read_file_with_encoding(temp_path)
|
|
1397
|
+
self.assertEqual(result, test_content)
|
|
1398
|
+
finally:
|
|
1399
|
+
os.unlink(temp_path)
|
|
1400
|
+
|
|
1401
|
+
def test_read_file_with_encoding_utf8_bom(self) -> None:
|
|
1402
|
+
"""Test reading UTF-8 with BOM encoded file."""
|
|
1403
|
+
with tempfile.NamedTemporaryFile(delete=False, mode='w', encoding='utf-8-sig') as f:
|
|
1404
|
+
test_content = "Hello, UTF-8 BOM test! 🚀"
|
|
1405
|
+
f.write(test_content)
|
|
1406
|
+
temp_path = f.name
|
|
1407
|
+
|
|
1408
|
+
try:
|
|
1409
|
+
result = read_file_with_encoding(temp_path)
|
|
1410
|
+
self.assertEqual(result, test_content)
|
|
1411
|
+
finally:
|
|
1412
|
+
os.unlink(temp_path)
|
|
1413
|
+
|
|
1414
|
+
# TODO: Support reading files with Latin-1 encoding
|
|
1415
|
+
# def test_read_file_with_encoding_latin1(self) -> None:
|
|
1416
|
+
# """Test reading Latin-1 encoded file as fallback."""
|
|
1417
|
+
# with tempfile.NamedTemporaryFile(mode='w', encoding='latin-1', delete=False) as f:
|
|
1418
|
+
# test_content = "Hello, café! Latin-1 test."
|
|
1419
|
+
# f.write(test_content)
|
|
1420
|
+
# f.flush()
|
|
1421
|
+
# temp_path = f.name
|
|
1422
|
+
|
|
1423
|
+
# try:
|
|
1424
|
+
# result = read_file_with_encoding(temp_path)
|
|
1425
|
+
# self.assertEqual(result, test_content)
|
|
1426
|
+
# finally:
|
|
1427
|
+
# os.unlink(temp_path)
|
|
1428
|
+
|
|
1429
|
+
def test_read_file_with_encoding_binary_file_fallback(self) -> None:
|
|
1430
|
+
"""Test reading binary file falls back to latin-1."""
|
|
1431
|
+
with tempfile.NamedTemporaryFile(delete=False) as f:
|
|
1432
|
+
binary_data = bytes([0xFF, 0xFE, 0x00, 0x48, 0x65, 0x6C, 0x6C, 0x6F])
|
|
1433
|
+
f.write(binary_data)
|
|
1434
|
+
f.flush()
|
|
1435
|
+
temp_path = f.name
|
|
1436
|
+
|
|
1437
|
+
try:
|
|
1438
|
+
result = read_file_with_encoding(temp_path)
|
|
1439
|
+
self.assertIsInstance(result, str)
|
|
1440
|
+
self.assertGreater(len(result), 0)
|
|
1441
|
+
finally:
|
|
1442
|
+
os.unlink(temp_path)
|
|
1443
|
+
|
|
1444
|
+
def test_read_file_with_encoding_special_characters(self) -> None:
|
|
1445
|
+
"""Test reading file with various special characters."""
|
|
1446
|
+
with tempfile.NamedTemporaryFile(mode='w', encoding='utf-8', delete=False) as f:
|
|
1447
|
+
test_content = (
|
|
1448
|
+
"Special chars: åäö ñ ü ç é\n"
|
|
1449
|
+
"Symbols: ©®™ §¶†‡•\n"
|
|
1450
|
+
"Math: ∑∏∫√±≤≥≠\n"
|
|
1451
|
+
"Arrows: ←→↑↓↔\n"
|
|
1452
|
+
"Emoji: 😀😍🎉🔥💯\n"
|
|
1453
|
+
)
|
|
1454
|
+
f.write(test_content)
|
|
1455
|
+
f.flush()
|
|
1456
|
+
temp_path = f.name
|
|
1457
|
+
|
|
1458
|
+
try:
|
|
1459
|
+
result = read_file_with_encoding(temp_path)
|
|
1460
|
+
|
|
1461
|
+
self.assertEqual(result, test_content)
|
|
1462
|
+
self.assertIn("åäö", result)
|
|
1463
|
+
self.assertIn("©®™", result)
|
|
1464
|
+
self.assertIn("∑∏∫", result)
|
|
1465
|
+
self.assertIn("😀😍", result)
|
|
1466
|
+
finally:
|
|
1467
|
+
os.unlink(temp_path)
|
jaclang/utils/lang_tools.py
CHANGED
|
@@ -8,9 +8,11 @@ from typing import List, Optional, Type
|
|
|
8
8
|
|
|
9
9
|
import jaclang.compiler.unitree as uni
|
|
10
10
|
from jaclang.compiler.passes.main import PyastBuildPass
|
|
11
|
+
from jaclang.compiler.passes.main.cfg_build_pass import cfg_dot_from_file
|
|
11
12
|
from jaclang.compiler.passes.tool.doc_ir_gen_pass import DocIRGenPass
|
|
12
13
|
from jaclang.compiler.program import JacProgram
|
|
13
14
|
from jaclang.compiler.unitree import UniScopeNode
|
|
15
|
+
from jaclang.runtimelib.utils import read_file_with_encoding
|
|
14
16
|
from jaclang.utils.helpers import auto_generate_refs, pascal_to_snake
|
|
15
17
|
|
|
16
18
|
|
|
@@ -194,9 +196,8 @@ class AstTool:
|
|
|
194
196
|
base = base if base else "./"
|
|
195
197
|
|
|
196
198
|
if file_name.endswith(".py"):
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
parsed_ast = py_ast.parse(file_source)
|
|
199
|
+
file_source = read_file_with_encoding(file_name)
|
|
200
|
+
parsed_ast = py_ast.parse(file_source)
|
|
200
201
|
if output == "pyast":
|
|
201
202
|
return f"\n{py_ast.dump(parsed_ast, indent=2)}"
|
|
202
203
|
try:
|
|
@@ -242,6 +243,8 @@ class AstTool:
|
|
|
242
243
|
return out
|
|
243
244
|
case "ast.":
|
|
244
245
|
return ir.printgraph()
|
|
246
|
+
case "cfg.":
|
|
247
|
+
return cfg_dot_from_file(file_name)
|
|
245
248
|
case "unparse":
|
|
246
249
|
return ir.unparse()
|
|
247
250
|
case "pyast":
|
jaclang/utils/module_resolver.py
CHANGED
|
@@ -57,6 +57,18 @@ def resolve_module(target: str, base_path: str) -> Tuple[str, str]:
|
|
|
57
57
|
if res:
|
|
58
58
|
return res
|
|
59
59
|
|
|
60
|
+
typeshed_paths = get_typeshed_paths()
|
|
61
|
+
for typeshed_dir in typeshed_paths:
|
|
62
|
+
res = _candidate_from_typeshed(typeshed_dir, actual_parts)
|
|
63
|
+
if res:
|
|
64
|
+
# print(f"Found '{target}' in typeshed: {res[0]}")
|
|
65
|
+
return res
|
|
66
|
+
|
|
67
|
+
# If not found in any typeshed directory, but typeshed is configured,
|
|
68
|
+
# return a stub .pyi path for type checking.
|
|
69
|
+
stub_pyi_path = os.path.join(typeshed_paths[0], *actual_parts) + ".pyi"
|
|
70
|
+
if os.path.isfile(stub_pyi_path):
|
|
71
|
+
return stub_pyi_path, "pyi"
|
|
60
72
|
base_dir = base_path if os.path.isdir(base_path) else os.path.dirname(base_path)
|
|
61
73
|
for _ in range(max(level - 1, 0)):
|
|
62
74
|
base_dir = os.path.dirname(base_dir)
|
|
@@ -90,3 +102,81 @@ def resolve_relative_path(target: str, base_path: str) -> str:
|
|
|
90
102
|
"""Resolve only the path component for a target."""
|
|
91
103
|
path, _ = resolve_module(target, base_path)
|
|
92
104
|
return path
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def get_typeshed_paths() -> list[str]:
|
|
108
|
+
"""Return the typeshed stubs and stdlib directories if available."""
|
|
109
|
+
# You may want to make this configurable or autodetect
|
|
110
|
+
# Corrected base path calculation: removed one ".."
|
|
111
|
+
base = os.path.join(
|
|
112
|
+
os.path.dirname(__file__), # jaclang/utils
|
|
113
|
+
"..", # jaclang
|
|
114
|
+
"vendor",
|
|
115
|
+
"typeshed", # jaclang/vendor/typeshed
|
|
116
|
+
)
|
|
117
|
+
base = os.path.abspath(base)
|
|
118
|
+
stubs = os.path.join(base, "stubs")
|
|
119
|
+
stdlib = os.path.join(base, "stdlib")
|
|
120
|
+
paths = []
|
|
121
|
+
if os.path.isdir(stubs):
|
|
122
|
+
paths.append(stubs)
|
|
123
|
+
if os.path.isdir(stdlib):
|
|
124
|
+
paths.append(stdlib)
|
|
125
|
+
return paths
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _candidate_from_typeshed(base: str, parts: list[str]) -> Optional[Tuple[str, str]]:
|
|
129
|
+
"""Find .pyi files in typeshed, trying module.pyi then package/__init__.pyi."""
|
|
130
|
+
if not parts: #
|
|
131
|
+
return None
|
|
132
|
+
|
|
133
|
+
# This is the path prefix for the module/package, e.g., os.path.join(base, "collections", "abc")
|
|
134
|
+
candidate_prefix = os.path.join(base, *parts)
|
|
135
|
+
|
|
136
|
+
# 1. Check for a direct module file (e.g., base/parts.pyi or base/package/module.pyi)
|
|
137
|
+
# Example: parts=["collections", "abc"] -> candidate_prefix = base/collections/abc
|
|
138
|
+
# module_file_pyi = base/collections/abc.pyi
|
|
139
|
+
# Example: parts=["sys"] -> candidate_prefix = base/sys
|
|
140
|
+
# module_file_pyi = base/sys.pyi
|
|
141
|
+
module_file_pyi = candidate_prefix + ".pyi"
|
|
142
|
+
if os.path.isfile(module_file_pyi):
|
|
143
|
+
return module_file_pyi, "pyi"
|
|
144
|
+
|
|
145
|
+
# 2. Check if the candidate_prefix itself is a directory (package)
|
|
146
|
+
# and look for __init__.pyi inside it.
|
|
147
|
+
# Example: parts=["_typeshed"] -> candidate_prefix = base/_typeshed
|
|
148
|
+
# init_pyi = base/_typeshed/__init__.pyi
|
|
149
|
+
if os.path.isdir(candidate_prefix):
|
|
150
|
+
init_pyi = os.path.join(candidate_prefix, "__init__.pyi")
|
|
151
|
+
if os.path.isfile(init_pyi):
|
|
152
|
+
return init_pyi, "pyi"
|
|
153
|
+
|
|
154
|
+
# Heuristic for packages where stubs are in a subdirectory of the same name
|
|
155
|
+
# e.g., parts = ["requests"], candidate_prefix = base/requests
|
|
156
|
+
# checks base/requests/requests/__init__.pyi
|
|
157
|
+
# This part of the original heuristic is preserved.
|
|
158
|
+
if parts: # Ensure parts is not empty for parts[-1]
|
|
159
|
+
inner_pkg_init_pyi = os.path.join(
|
|
160
|
+
candidate_prefix, parts[-1], "__init__.pyi"
|
|
161
|
+
)
|
|
162
|
+
if os.path.isfile(inner_pkg_init_pyi):
|
|
163
|
+
return inner_pkg_init_pyi, "pyi"
|
|
164
|
+
|
|
165
|
+
return None
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class PythonModuleResolver:
|
|
169
|
+
"""Resolver for Python modules with enhanced import capabilities."""
|
|
170
|
+
|
|
171
|
+
def resolve_module_path(self, target: str, base_path: str) -> str:
|
|
172
|
+
"""Resolve Python module path without importing."""
|
|
173
|
+
caller_dir = (
|
|
174
|
+
base_path if os.path.isdir(base_path) else os.path.dirname(base_path)
|
|
175
|
+
)
|
|
176
|
+
caller_dir = caller_dir if caller_dir else os.getcwd()
|
|
177
|
+
local_py_file = os.path.join(caller_dir, target.split(".")[-1] + ".py")
|
|
178
|
+
|
|
179
|
+
if os.path.exists(local_py_file):
|
|
180
|
+
return local_py_file
|
|
181
|
+
else:
|
|
182
|
+
raise ImportError(f"Module '{target}' not found in {caller_dir}")
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"""Symbol table testing helpers for Jaseci."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from jaclang.compiler.unitree import Symbol, UniScopeNode
|
|
6
|
+
from jaclang.utils.test import TestCase
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class SymTableTestMixin(TestCase):
|
|
10
|
+
"""Mixin class providing assertion methods for symbol table testing."""
|
|
11
|
+
|
|
12
|
+
def assert_symbol_exists(
|
|
13
|
+
self,
|
|
14
|
+
sym_table: UniScopeNode,
|
|
15
|
+
symbol_name: str,
|
|
16
|
+
symbol_type: Optional[str] = None,
|
|
17
|
+
) -> Symbol:
|
|
18
|
+
"""Assert that a symbol exists in the symbol table."""
|
|
19
|
+
symbol = look_down(sym_table, symbol_name)
|
|
20
|
+
self.assertIsNotNone(
|
|
21
|
+
symbol, f"Symbol '{symbol_name}' not found in symbol table"
|
|
22
|
+
)
|
|
23
|
+
if symbol_type:
|
|
24
|
+
self.assertIn(
|
|
25
|
+
symbol_type,
|
|
26
|
+
str(symbol),
|
|
27
|
+
f"Symbol '{symbol_name}' is not of type '{symbol_type}'",
|
|
28
|
+
)
|
|
29
|
+
return symbol
|
|
30
|
+
|
|
31
|
+
def assert_symbol_decl_at(self, symbol: Symbol, line: int, col: int) -> None:
|
|
32
|
+
"""Assert that a symbol is declared at specific line and column."""
|
|
33
|
+
decl_info = str(symbol)
|
|
34
|
+
expected_decl = f"{line}:{col}"
|
|
35
|
+
self.assertIn(
|
|
36
|
+
expected_decl,
|
|
37
|
+
decl_info,
|
|
38
|
+
f"Symbol declaration not found at {expected_decl}. Got: {decl_info}",
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
def assert_symbol_defns_at(
|
|
42
|
+
self, symbol: Symbol, expected_defns: list[tuple[int, int]]
|
|
43
|
+
) -> None:
|
|
44
|
+
"""Assert that a symbol has definitions at specific locations."""
|
|
45
|
+
symbol_str = str(symbol)
|
|
46
|
+
for line, col in expected_defns:
|
|
47
|
+
expected_defn = f"{line}:{col}"
|
|
48
|
+
self.assertIn(
|
|
49
|
+
expected_defn,
|
|
50
|
+
symbol_str,
|
|
51
|
+
f"Symbol definition not found at {expected_defn}. Got: {symbol_str}",
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
def assert_symbol_uses_at(
|
|
55
|
+
self, symbol: Symbol, expected_uses: list[tuple[int, int]]
|
|
56
|
+
) -> None:
|
|
57
|
+
"""Assert that a symbol has uses at specific locations."""
|
|
58
|
+
symbol_uses_str = str(symbol.uses)
|
|
59
|
+
for line, col in expected_uses:
|
|
60
|
+
expected_use = f"{line}:{col}"
|
|
61
|
+
self.assertIn(
|
|
62
|
+
expected_use,
|
|
63
|
+
symbol_uses_str,
|
|
64
|
+
f"Symbol use not found at {expected_use}. Got: {symbol_uses_str}",
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
def assert_symbol_complete(
|
|
68
|
+
self,
|
|
69
|
+
sym_table: UniScopeNode,
|
|
70
|
+
symbol_name: str,
|
|
71
|
+
symbol_type: str,
|
|
72
|
+
decl: tuple[int, int],
|
|
73
|
+
defns: Optional[list[tuple[int, int]]] = None,
|
|
74
|
+
uses: Optional[list[tuple[int, int]]] = None,
|
|
75
|
+
) -> None:
|
|
76
|
+
"""Assert complete symbol information (declaration, definitions, uses)."""
|
|
77
|
+
symbol = self.assert_symbol_exists(sym_table, symbol_name, symbol_type)
|
|
78
|
+
self.assert_symbol_decl_at(symbol, decl[0], decl[1])
|
|
79
|
+
|
|
80
|
+
if defns:
|
|
81
|
+
self.assert_symbol_defns_at(symbol, defns)
|
|
82
|
+
|
|
83
|
+
if uses:
|
|
84
|
+
self.assert_symbol_uses_at(symbol, uses)
|
|
85
|
+
|
|
86
|
+
def assert_sub_table_exists(
|
|
87
|
+
self, sym_table: UniScopeNode, table_name: str, tab_type: str
|
|
88
|
+
) -> None:
|
|
89
|
+
"""Assert that a sub-table exists in the symbol table."""
|
|
90
|
+
sub_tables = sym_table.kid_scope
|
|
91
|
+
table_names = [table.scope_name for table in sub_tables]
|
|
92
|
+
type_names = [table.get_type() for table in sub_tables]
|
|
93
|
+
matching_tables = [name for name in table_names if table_name in name]
|
|
94
|
+
matching_types = [
|
|
95
|
+
type_name for type_name in type_names if tab_type in str(type_name)
|
|
96
|
+
]
|
|
97
|
+
self.assertTrue(
|
|
98
|
+
len(matching_tables) > 0,
|
|
99
|
+
f"Sub-table '{table_name}' not found. Available: {table_names}",
|
|
100
|
+
)
|
|
101
|
+
self.assertTrue(
|
|
102
|
+
len(matching_types) > 0,
|
|
103
|
+
f"Sub-table type '{tab_type}' not found in {table_names} of types {type_names}",
|
|
104
|
+
)
|
|
105
|
+
return sub_tables[table_names.index(matching_tables[0])]
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def look_down(tab: UniScopeNode, name: str, deep: bool = True) -> Optional[Symbol]:
|
|
109
|
+
"""Lookup a variable in the symbol table."""
|
|
110
|
+
if name in tab.names_in_scope:
|
|
111
|
+
if not tab.names_in_scope[name].imported:
|
|
112
|
+
return tab.names_in_scope[name]
|
|
113
|
+
else:
|
|
114
|
+
sym = tab.names_in_scope[name]
|
|
115
|
+
return sym
|
|
116
|
+
for i in tab.inherited_scope:
|
|
117
|
+
found = i.lookup(name, deep=False)
|
|
118
|
+
if found:
|
|
119
|
+
return found
|
|
120
|
+
if deep and tab.kid_scope:
|
|
121
|
+
for kid in tab.kid_scope:
|
|
122
|
+
found = kid.lookup(name, deep=True)
|
|
123
|
+
if found:
|
|
124
|
+
return found
|
|
125
|
+
return None
|
jaclang/utils/test.py
CHANGED
|
@@ -9,6 +9,7 @@ from _pytest.logging import LogCaptureFixture
|
|
|
9
9
|
|
|
10
10
|
import jaclang
|
|
11
11
|
from jaclang.compiler.passes import UniPass
|
|
12
|
+
from jaclang.runtimelib.utils import read_file_with_encoding
|
|
12
13
|
from jaclang.utils.helpers import get_uni_nodes_as_snake_case as ast_snakes
|
|
13
14
|
|
|
14
15
|
import pytest
|
|
@@ -41,13 +42,11 @@ class TestCase(_TestCase):
|
|
|
41
42
|
raise ValueError("Unable to determine the file of the module.")
|
|
42
43
|
fixture_src = module.__file__
|
|
43
44
|
fixture_path = os.path.join(os.path.dirname(fixture_src), "fixtures", fixture)
|
|
44
|
-
|
|
45
|
-
return f.read()
|
|
45
|
+
return read_file_with_encoding(fixture_path)
|
|
46
46
|
|
|
47
47
|
def file_to_str(self, file_path: str) -> str:
|
|
48
48
|
"""Load fixture from fixtures directory."""
|
|
49
|
-
|
|
50
|
-
return f.read()
|
|
49
|
+
return read_file_with_encoding(file_path)
|
|
51
50
|
|
|
52
51
|
def fixture_abs_path(self, fixture: str) -> str:
|
|
53
52
|
"""Get absolute path of a fixture from fixtures directory."""
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""
|
|
2
|
+
A package to compare python-style regexes and test if they have intersections.
|
|
3
|
+
Based on the `greenery`-package by @qntm, adapted and specialized for `lark-parser`
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Iterable, Tuple
|
|
7
|
+
|
|
8
|
+
from interegular.fsm import FSM
|
|
9
|
+
from interegular.patterns import Pattern, parse_pattern, REFlags, Unsupported, InvalidSyntax
|
|
10
|
+
from interegular.comparator import Comparator
|
|
11
|
+
from interegular.utils import logger
|
|
12
|
+
|
|
13
|
+
__all__ = ['FSM', 'Pattern', 'Comparator', 'parse_pattern', 'compare_patterns', 'compare_regexes', '__version__', 'REFlags', 'Unsupported',
|
|
14
|
+
'InvalidSyntax']
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def compare_regexes(*regexes: str) -> Iterable[Tuple[str, str]]:
|
|
18
|
+
"""
|
|
19
|
+
Checks the regexes for intersections. Returns all pairs it found
|
|
20
|
+
"""
|
|
21
|
+
c = Comparator({r: parse_pattern(r) for r in regexes})
|
|
22
|
+
print(c._patterns)
|
|
23
|
+
return c.check(regexes)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def compare_patterns(*ps: Pattern) -> Iterable[Tuple[Pattern, Pattern]]:
|
|
27
|
+
"""
|
|
28
|
+
Checks the Patterns for intersections. Returns all pairs it found
|
|
29
|
+
"""
|
|
30
|
+
c = Comparator({p: p for p in ps})
|
|
31
|
+
return c.check(ps)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
__version__ = "0.3.3"
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
from collections import namedtuple
|
|
2
|
+
from dataclasses import dataclass
|
|
3
|
+
from itertools import combinations
|
|
4
|
+
from typing import List, Tuple, Any, Dict, Iterable, Set, FrozenSet, Optional
|
|
5
|
+
|
|
6
|
+
from interegular import InvalidSyntax, REFlags
|
|
7
|
+
from interegular.fsm import FSM, Alphabet, anything_else
|
|
8
|
+
from interegular.patterns import Pattern, Unsupported, parse_pattern
|
|
9
|
+
from interegular.utils import logger, soft_repr
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class ExampleCollision:
|
|
14
|
+
"""
|
|
15
|
+
Captures the full text of an example collision between two regex.
|
|
16
|
+
`main_text` is the part that actually gets captured by the two regex
|
|
17
|
+
`prefix` is the part that is potentially needed for lookbehinds
|
|
18
|
+
`postfix` is the part that is potentially needed for lookahead
|
|
19
|
+
"""
|
|
20
|
+
prefix: str
|
|
21
|
+
main_text: str
|
|
22
|
+
postfix: str
|
|
23
|
+
|
|
24
|
+
def format_multiline(self, intro: str = "Example Collision: ", indent: str = "",
|
|
25
|
+
force_pointer: bool = False) -> str:
|
|
26
|
+
"""
|
|
27
|
+
Formats this example somewhat similar to a python syntax error.
|
|
28
|
+
- intro is added on the first line
|
|
29
|
+
- indent is added on the second line
|
|
30
|
+
The three parts of the example are concatenated and `^` is used to underline them.
|
|
31
|
+
|
|
32
|
+
ExampleCollision(prefix='a', main_text='cd', postfix='ef').format_multiline()
|
|
33
|
+
|
|
34
|
+
leads to
|
|
35
|
+
|
|
36
|
+
Example Collision: acdef
|
|
37
|
+
^^
|
|
38
|
+
|
|
39
|
+
This function will escape the character where necessary to stay readable.
|
|
40
|
+
if `force_pointer` is False, the function will not produce the second line if only main_text is set
|
|
41
|
+
"""
|
|
42
|
+
if len(intro) < len(indent):
|
|
43
|
+
raise ValueError("Can't have intro be shorter than indent")
|
|
44
|
+
prefix = soft_repr(self.prefix)
|
|
45
|
+
main_text = soft_repr(self.main_text)
|
|
46
|
+
postfix = soft_repr(self.postfix)
|
|
47
|
+
text = f"{prefix}{main_text}{postfix}"
|
|
48
|
+
if len(text) != len(main_text):
|
|
49
|
+
whitespace = ' ' * (len(intro) - len(indent) + len(prefix))
|
|
50
|
+
pointers = '^' * len(main_text)
|
|
51
|
+
return f"{intro}{text}\n{indent}{whitespace}{pointers}"
|
|
52
|
+
else:
|
|
53
|
+
return f"{intro}{text}"
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def full_text(self):
|
|
57
|
+
return self.prefix + self.main_text + self.postfix
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class Comparator:
|
|
61
|
+
"""
|
|
62
|
+
A class that represents the main interface for comparing a list of regex to each other.
|
|
63
|
+
It expects a dictionary of arbitrary labels mapped to `Pattern` instances,
|
|
64
|
+
but there is a utility function to create the instances `from_regex` strings.
|
|
65
|
+
|
|
66
|
+
The main interface function all expect the abitrary labels to be given, which
|
|
67
|
+
then get mapped to the correct `Pattern` and/or `FSM` instance.
|
|
68
|
+
|
|
69
|
+
There is a utility function `mark(a,b)` which allows to mark pairs that shouldn't
|
|
70
|
+
be checked again by `check`.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
def __init__(self, patterns: Dict[Any, Pattern]):
|
|
74
|
+
self._patterns = patterns
|
|
75
|
+
self._marked_pairs: Set[FrozenSet[Any]] = set()
|
|
76
|
+
if not patterns: # `isdisjoint` can not be called anyway, so we don't need to create a valid state
|
|
77
|
+
return
|
|
78
|
+
self._alphabet = Alphabet.union(*(p.get_alphabet(REFlags(0)) for p in patterns.values()))[0]
|
|
79
|
+
prefix_postfix_s = [p.prefix_postfix for p in patterns.values()]
|
|
80
|
+
self._prefix_postfix = max(p[0] for p in prefix_postfix_s), max(p[1] for p in prefix_postfix_s)
|
|
81
|
+
self._fsms: Dict[Any, FSM] = {}
|
|
82
|
+
self._know_pairs: Dict[Tuple[Any, Any], bool] = {}
|
|
83
|
+
|
|
84
|
+
def get_fsm(self, a: Any) -> FSM:
|
|
85
|
+
if a not in self._fsms:
|
|
86
|
+
try:
|
|
87
|
+
self._fsms[a] = self._patterns[a].to_fsm(self._alphabet, self._prefix_postfix)
|
|
88
|
+
except Unsupported as e:
|
|
89
|
+
self._fsms[a] = None
|
|
90
|
+
logger.warning(f"Can't compile Pattern to fsm for {a}\n {repr(e)}")
|
|
91
|
+
except KeyError:
|
|
92
|
+
self._fsms[a] = None # In case it was thrown away in `from_regexes`
|
|
93
|
+
return self._fsms[a]
|
|
94
|
+
|
|
95
|
+
def isdisjoint(self, a: Any, b: Any) -> bool:
|
|
96
|
+
if (a, b) not in self._know_pairs:
|
|
97
|
+
fa, fb = self.get_fsm(a), self.get_fsm(b)
|
|
98
|
+
if fa is None or fb is None:
|
|
99
|
+
self._know_pairs[a, b] = True # We can't know. Assume they are disjoint
|
|
100
|
+
else:
|
|
101
|
+
self._know_pairs[a, b] = fa.isdisjoint(fb)
|
|
102
|
+
return self._know_pairs[a, b]
|
|
103
|
+
|
|
104
|
+
def check(self, keys: Iterable[Any] = None, skip_marked: bool = False) -> Iterable[Tuple[Any, Any]]:
|
|
105
|
+
if keys is None:
|
|
106
|
+
keys = self._patterns
|
|
107
|
+
for a, b in combinations(keys, 2):
|
|
108
|
+
if skip_marked and self.is_marked(a, b):
|
|
109
|
+
continue
|
|
110
|
+
if not self.isdisjoint(a, b):
|
|
111
|
+
yield a, b
|
|
112
|
+
|
|
113
|
+
def get_example_overlap(self, a: Any, b: Any, max_time: float = None) -> ExampleCollision:
|
|
114
|
+
pa, pb = self._patterns[a], self._patterns[b]
|
|
115
|
+
needed_pre = max(pa.prefix_postfix[0], pb.prefix_postfix[0])
|
|
116
|
+
needed_post = max(pa.prefix_postfix[1], pb.prefix_postfix[1])
|
|
117
|
+
|
|
118
|
+
# We use the optimal alphabet here instead of the general one since that
|
|
119
|
+
# massively improves performance by every metric.
|
|
120
|
+
alphabet = pa.get_alphabet(REFlags(0)).union(pb.get_alphabet(REFlags(0)))[0]
|
|
121
|
+
fa, fb = pa.to_fsm(alphabet, (needed_pre, needed_post)), pb.to_fsm(alphabet, (needed_pre, needed_post))
|
|
122
|
+
intersection = fa.intersection(fb)
|
|
123
|
+
if max_time is None:
|
|
124
|
+
max_iterations = None
|
|
125
|
+
else:
|
|
126
|
+
# We calculate an approximation for that value of max_iterations
|
|
127
|
+
# that makes sure for this function to finish in under max_time seconds
|
|
128
|
+
# This values will heavily depend on CPU, python version, exact patterns
|
|
129
|
+
# and probably more factors, but this should generally be in the correct
|
|
130
|
+
# ballpark.
|
|
131
|
+
max_iterations = int((max_time - 0.09)/(1.4e-6 * len(alphabet)))
|
|
132
|
+
try:
|
|
133
|
+
text = next(intersection.strings(max_iterations))
|
|
134
|
+
except StopIteration:
|
|
135
|
+
raise ValueError(f"No overlap between {a} and {b} exists")
|
|
136
|
+
text = ''.join(c if c != anything_else else '?' for c in text)
|
|
137
|
+
if needed_post > 0:
|
|
138
|
+
return ExampleCollision(text[:needed_pre], text[needed_pre:-needed_post], text[-needed_post:])
|
|
139
|
+
else:
|
|
140
|
+
return ExampleCollision(text[:needed_pre], text[needed_pre:], '')
|
|
141
|
+
|
|
142
|
+
def is_marked(self, a: Any, b: Any) -> bool:
|
|
143
|
+
return frozenset({a, b}) in self._marked_pairs
|
|
144
|
+
|
|
145
|
+
@property
|
|
146
|
+
def marked_pairs(self):
|
|
147
|
+
return self._marked_pairs
|
|
148
|
+
|
|
149
|
+
def count_marked_pairs(self):
|
|
150
|
+
return len(self._marked_pairs)
|
|
151
|
+
|
|
152
|
+
def mark(self, a: Any, b: Any):
|
|
153
|
+
self._marked_pairs.add(frozenset({a, b}))
|
|
154
|
+
|
|
155
|
+
@classmethod
|
|
156
|
+
def from_regexes(cls, regexes: Dict[Any, str]):
|
|
157
|
+
patterns = {}
|
|
158
|
+
for k, r in regexes.items():
|
|
159
|
+
try:
|
|
160
|
+
patterns[k] = parse_pattern(r)
|
|
161
|
+
except (Unsupported, InvalidSyntax) as e:
|
|
162
|
+
logger.warning(f"Can't compile regex to Pattern for {k}\n {repr(e)}")
|
|
163
|
+
return cls(patterns)
|