jaclang 0.8.4__py3-none-any.whl → 0.8.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of jaclang might be problematic. Click here for more details.

Files changed (88) hide show
  1. jaclang/cli/cli.md +1 -0
  2. jaclang/cli/cli.py +109 -37
  3. jaclang/compiler/jac.lark +3 -3
  4. jaclang/compiler/larkparse/jac_parser.py +2 -2
  5. jaclang/compiler/parser.py +14 -21
  6. jaclang/compiler/passes/main/__init__.py +5 -1
  7. jaclang/compiler/passes/main/binder_pass.py +594 -0
  8. jaclang/compiler/passes/main/cfg_build_pass.py +21 -1
  9. jaclang/compiler/passes/main/import_pass.py +8 -256
  10. jaclang/compiler/passes/main/inheritance_pass.py +10 -3
  11. jaclang/compiler/passes/main/pyast_gen_pass.py +92 -77
  12. jaclang/compiler/passes/main/pyast_load_pass.py +24 -13
  13. jaclang/compiler/passes/main/sem_def_match_pass.py +1 -1
  14. jaclang/compiler/passes/main/sym_tab_build_pass.py +4 -0
  15. jaclang/compiler/passes/main/tests/fixtures/M1.jac +3 -0
  16. jaclang/compiler/passes/main/tests/fixtures/cfg_has_var.jac +12 -0
  17. jaclang/compiler/passes/main/tests/fixtures/cfg_if_no_else.jac +11 -0
  18. jaclang/compiler/passes/main/tests/fixtures/cfg_return.jac +9 -0
  19. jaclang/compiler/passes/main/tests/fixtures/checker_imported.jac +2 -0
  20. jaclang/compiler/passes/main/tests/fixtures/checker_importer.jac +6 -0
  21. jaclang/compiler/passes/main/tests/fixtures/data_spatial_types.jac +1 -1
  22. jaclang/compiler/passes/main/tests/fixtures/import_symbol_type_infer.jac +11 -0
  23. jaclang/compiler/passes/main/tests/fixtures/infer_type_assignment.jac +5 -0
  24. jaclang/compiler/passes/main/tests/fixtures/member_access_type_inferred.jac +13 -0
  25. jaclang/compiler/passes/main/tests/fixtures/member_access_type_resolve.jac +11 -0
  26. jaclang/compiler/passes/main/tests/fixtures/sym_binder.jac +47 -0
  27. jaclang/compiler/passes/main/tests/fixtures/type_annotation_assignment.jac +8 -0
  28. jaclang/compiler/passes/main/tests/test_binder_pass.py +111 -0
  29. jaclang/compiler/passes/main/tests/test_cfg_build_pass.py +62 -24
  30. jaclang/compiler/passes/main/tests/test_checker_pass.py +87 -0
  31. jaclang/compiler/passes/main/tests/test_pyast_gen_pass.py +13 -13
  32. jaclang/compiler/passes/main/tests/test_sem_def_match_pass.py +6 -6
  33. jaclang/compiler/passes/main/type_checker_pass.py +128 -0
  34. jaclang/compiler/passes/tool/doc_ir_gen_pass.py +2 -0
  35. jaclang/compiler/passes/tool/tests/fixtures/simple_walk_fmt.jac +3 -0
  36. jaclang/compiler/program.py +32 -11
  37. jaclang/compiler/tests/test_sr_errors.py +32 -0
  38. jaclang/compiler/type_system/__init__.py +1 -0
  39. jaclang/compiler/type_system/type_evaluator.py +421 -0
  40. jaclang/compiler/type_system/type_utils.py +41 -0
  41. jaclang/compiler/type_system/types.py +240 -0
  42. jaclang/compiler/unitree.py +36 -24
  43. jaclang/langserve/dev_engine.jac +645 -0
  44. jaclang/langserve/dev_server.jac +201 -0
  45. jaclang/langserve/engine.jac +24 -5
  46. jaclang/langserve/tests/server_test/test_lang_serve.py +2 -2
  47. jaclang/langserve/tests/test_dev_server.py +80 -0
  48. jaclang/langserve/tests/test_server.py +13 -0
  49. jaclang/runtimelib/builtin.py +28 -39
  50. jaclang/runtimelib/importer.py +34 -63
  51. jaclang/runtimelib/machine.py +48 -64
  52. jaclang/runtimelib/memory.py +23 -5
  53. jaclang/runtimelib/tests/fixtures/savable_object.jac +10 -2
  54. jaclang/runtimelib/utils.py +42 -6
  55. jaclang/tests/fixtures/edge_node_walk.jac +1 -1
  56. jaclang/tests/fixtures/edges_walk.jac +1 -1
  57. jaclang/tests/fixtures/gendot_bubble_sort.jac +1 -1
  58. jaclang/tests/fixtures/py_run.jac +8 -0
  59. jaclang/tests/fixtures/py_run.py +23 -0
  60. jaclang/tests/fixtures/pyfunc.py +2 -0
  61. jaclang/tests/fixtures/pyfunc_fmt.py +60 -0
  62. jaclang/tests/fixtures/pyfunc_fstr.py +25 -0
  63. jaclang/tests/fixtures/pyfunc_kwesc.py +33 -0
  64. jaclang/tests/fixtures/python_run_test.py +19 -0
  65. jaclang/tests/test_cli.py +107 -0
  66. jaclang/tests/test_language.py +106 -5
  67. jaclang/utils/lang_tools.py +6 -3
  68. jaclang/utils/module_resolver.py +90 -0
  69. jaclang/utils/symtable_test_helpers.py +125 -0
  70. jaclang/utils/test.py +3 -4
  71. jaclang/vendor/interegular/__init__.py +34 -0
  72. jaclang/vendor/interegular/comparator.py +163 -0
  73. jaclang/vendor/interegular/fsm.py +1015 -0
  74. jaclang/vendor/interegular/patterns.py +732 -0
  75. jaclang/vendor/interegular/py.typed +0 -0
  76. jaclang/vendor/interegular/utils/__init__.py +15 -0
  77. jaclang/vendor/interegular/utils/simple_parser.py +165 -0
  78. jaclang/vendor/interegular-0.3.3.dist-info/INSTALLER +1 -0
  79. jaclang/vendor/interegular-0.3.3.dist-info/LICENSE.txt +21 -0
  80. jaclang/vendor/interegular-0.3.3.dist-info/METADATA +64 -0
  81. jaclang/vendor/interegular-0.3.3.dist-info/RECORD +20 -0
  82. jaclang/vendor/interegular-0.3.3.dist-info/REQUESTED +0 -0
  83. jaclang/vendor/interegular-0.3.3.dist-info/WHEEL +5 -0
  84. jaclang/vendor/interegular-0.3.3.dist-info/top_level.txt +1 -0
  85. {jaclang-0.8.4.dist-info → jaclang-0.8.6.dist-info}/METADATA +2 -1
  86. {jaclang-0.8.4.dist-info → jaclang-0.8.6.dist-info}/RECORD +88 -43
  87. {jaclang-0.8.4.dist-info → jaclang-0.8.6.dist-info}/WHEEL +0 -0
  88. {jaclang-0.8.4.dist-info → jaclang-0.8.6.dist-info}/entry_points.txt +0 -0
@@ -13,6 +13,7 @@ from jaclang import JacMachine as Jac
13
13
  from jaclang.cli import cli
14
14
  from jaclang.compiler.program import JacProgram
15
15
  from jaclang.utils.test import TestCase
16
+ from jaclang.runtimelib.utils import read_file_with_encoding
16
17
 
17
18
 
18
19
  class JacLanguageTests(TestCase):
@@ -121,13 +122,19 @@ class JacLanguageTests(TestCase):
121
122
  data = json.loads(stdout_value)
122
123
 
123
124
  nodes = data["nodes"]
124
- self.assertEqual(len(nodes), 7)
125
+ edges = data["edges"]
126
+
127
+ self.assertEqual(len(nodes), 5)
128
+ self.assertEqual(len(edges), 6)
129
+
125
130
  for node in nodes:
126
131
  label = node["label"]
127
132
  self.assertIn(label, ["root", "N(val=0)", "N(val=1)"])
128
133
 
129
- edges = data["edges"]
130
- self.assertEqual(len(edges), 6)
134
+ for edge in edges:
135
+ label = edge["label"]
136
+ self.assertIn(label, ["E(val=1)", "E(val=1)", "E(val=1)", "E(val=0)", "E(val=0)", "E(val=0)"])
137
+
131
138
 
132
139
  def test_printgraph_mermaid(self) -> None:
133
140
  """Test the mermaid gen of builtin function."""
@@ -228,7 +235,7 @@ class JacLanguageTests(TestCase):
228
235
  sys.stdout = sys.__stdout__
229
236
  stdout_value = captured_output.getvalue()
230
237
  self.assertIn(
231
- '[label="inner_node(main=5, sub=2)"];',
238
+ '[label="inner_node(main=5, sub=2)"fillcolor="#FFDEAD"];',
232
239
  stdout_value,
233
240
  )
234
241
 
@@ -1363,4 +1370,98 @@ class JacLanguageTests(TestCase):
1363
1370
  stdout_value = captured_output.getvalue().split("\n")
1364
1371
  self.assertIn("Num: 4", stdout_value[0])
1365
1372
  self.assertIn("Num: 3", stdout_value[1])
1366
- self.assertIn("Completed", stdout_value[2])
1373
+ self.assertIn("Completed", stdout_value[2])
1374
+
1375
+ def test_read_file_with_encoding_utf8(self) -> None:
1376
+ """Test reading UTF-8 encoded file."""
1377
+ with tempfile.NamedTemporaryFile(mode='w', encoding='utf-8', delete=False) as f:
1378
+ test_content = "Hello, 世界! 🌍 Testing UTF-8 encoding."
1379
+ f.write(test_content)
1380
+ temp_path = f.name
1381
+
1382
+ try:
1383
+ result = read_file_with_encoding(temp_path)
1384
+ self.assertEqual(result, test_content)
1385
+ finally:
1386
+ os.unlink(temp_path)
1387
+
1388
+ def test_read_file_with_encoding_utf16(self) -> None:
1389
+ """Test reading UTF-16 encoded file when UTF-8 fails."""
1390
+ with tempfile.NamedTemporaryFile(delete=False, mode="w", encoding="utf-16") as f:
1391
+ test_content = "Hello, 世界! UTF-16 encoding test."
1392
+ f.write(test_content)
1393
+ temp_path = f.name
1394
+
1395
+ try:
1396
+ result = read_file_with_encoding(temp_path)
1397
+ self.assertEqual(result, test_content)
1398
+ finally:
1399
+ os.unlink(temp_path)
1400
+
1401
+ def test_read_file_with_encoding_utf8_bom(self) -> None:
1402
+ """Test reading UTF-8 with BOM encoded file."""
1403
+ with tempfile.NamedTemporaryFile(delete=False, mode='w', encoding='utf-8-sig') as f:
1404
+ test_content = "Hello, UTF-8 BOM test! 🚀"
1405
+ f.write(test_content)
1406
+ temp_path = f.name
1407
+
1408
+ try:
1409
+ result = read_file_with_encoding(temp_path)
1410
+ self.assertEqual(result, test_content)
1411
+ finally:
1412
+ os.unlink(temp_path)
1413
+
1414
+ # TODO: Support reading files with Latin-1 encoding
1415
+ # def test_read_file_with_encoding_latin1(self) -> None:
1416
+ # """Test reading Latin-1 encoded file as fallback."""
1417
+ # with tempfile.NamedTemporaryFile(mode='w', encoding='latin-1', delete=False) as f:
1418
+ # test_content = "Hello, café! Latin-1 test."
1419
+ # f.write(test_content)
1420
+ # f.flush()
1421
+ # temp_path = f.name
1422
+
1423
+ # try:
1424
+ # result = read_file_with_encoding(temp_path)
1425
+ # self.assertEqual(result, test_content)
1426
+ # finally:
1427
+ # os.unlink(temp_path)
1428
+
1429
+ def test_read_file_with_encoding_binary_file_fallback(self) -> None:
1430
+ """Test reading binary file falls back to latin-1."""
1431
+ with tempfile.NamedTemporaryFile(delete=False) as f:
1432
+ binary_data = bytes([0xFF, 0xFE, 0x00, 0x48, 0x65, 0x6C, 0x6C, 0x6F])
1433
+ f.write(binary_data)
1434
+ f.flush()
1435
+ temp_path = f.name
1436
+
1437
+ try:
1438
+ result = read_file_with_encoding(temp_path)
1439
+ self.assertIsInstance(result, str)
1440
+ self.assertGreater(len(result), 0)
1441
+ finally:
1442
+ os.unlink(temp_path)
1443
+
1444
+ def test_read_file_with_encoding_special_characters(self) -> None:
1445
+ """Test reading file with various special characters."""
1446
+ with tempfile.NamedTemporaryFile(mode='w', encoding='utf-8', delete=False) as f:
1447
+ test_content = (
1448
+ "Special chars: åäö ñ ü ç é\n"
1449
+ "Symbols: ©®™ §¶†‡•\n"
1450
+ "Math: ∑∏∫√±≤≥≠\n"
1451
+ "Arrows: ←→↑↓↔\n"
1452
+ "Emoji: 😀😍🎉🔥💯\n"
1453
+ )
1454
+ f.write(test_content)
1455
+ f.flush()
1456
+ temp_path = f.name
1457
+
1458
+ try:
1459
+ result = read_file_with_encoding(temp_path)
1460
+
1461
+ self.assertEqual(result, test_content)
1462
+ self.assertIn("åäö", result)
1463
+ self.assertIn("©®™", result)
1464
+ self.assertIn("∑∏∫", result)
1465
+ self.assertIn("😀😍", result)
1466
+ finally:
1467
+ os.unlink(temp_path)
@@ -8,9 +8,11 @@ from typing import List, Optional, Type
8
8
 
9
9
  import jaclang.compiler.unitree as uni
10
10
  from jaclang.compiler.passes.main import PyastBuildPass
11
+ from jaclang.compiler.passes.main.cfg_build_pass import cfg_dot_from_file
11
12
  from jaclang.compiler.passes.tool.doc_ir_gen_pass import DocIRGenPass
12
13
  from jaclang.compiler.program import JacProgram
13
14
  from jaclang.compiler.unitree import UniScopeNode
15
+ from jaclang.runtimelib.utils import read_file_with_encoding
14
16
  from jaclang.utils.helpers import auto_generate_refs, pascal_to_snake
15
17
 
16
18
 
@@ -194,9 +196,8 @@ class AstTool:
194
196
  base = base if base else "./"
195
197
 
196
198
  if file_name.endswith(".py"):
197
- with open(file_name, "r") as f:
198
- file_source = f.read()
199
- parsed_ast = py_ast.parse(file_source)
199
+ file_source = read_file_with_encoding(file_name)
200
+ parsed_ast = py_ast.parse(file_source)
200
201
  if output == "pyast":
201
202
  return f"\n{py_ast.dump(parsed_ast, indent=2)}"
202
203
  try:
@@ -242,6 +243,8 @@ class AstTool:
242
243
  return out
243
244
  case "ast.":
244
245
  return ir.printgraph()
246
+ case "cfg.":
247
+ return cfg_dot_from_file(file_name)
245
248
  case "unparse":
246
249
  return ir.unparse()
247
250
  case "pyast":
@@ -57,6 +57,18 @@ def resolve_module(target: str, base_path: str) -> Tuple[str, str]:
57
57
  if res:
58
58
  return res
59
59
 
60
+ typeshed_paths = get_typeshed_paths()
61
+ for typeshed_dir in typeshed_paths:
62
+ res = _candidate_from_typeshed(typeshed_dir, actual_parts)
63
+ if res:
64
+ # print(f"Found '{target}' in typeshed: {res[0]}")
65
+ return res
66
+
67
+ # If not found in any typeshed directory, but typeshed is configured,
68
+ # return a stub .pyi path for type checking.
69
+ stub_pyi_path = os.path.join(typeshed_paths[0], *actual_parts) + ".pyi"
70
+ if os.path.isfile(stub_pyi_path):
71
+ return stub_pyi_path, "pyi"
60
72
  base_dir = base_path if os.path.isdir(base_path) else os.path.dirname(base_path)
61
73
  for _ in range(max(level - 1, 0)):
62
74
  base_dir = os.path.dirname(base_dir)
@@ -90,3 +102,81 @@ def resolve_relative_path(target: str, base_path: str) -> str:
90
102
  """Resolve only the path component for a target."""
91
103
  path, _ = resolve_module(target, base_path)
92
104
  return path
105
+
106
+
107
+ def get_typeshed_paths() -> list[str]:
108
+ """Return the typeshed stubs and stdlib directories if available."""
109
+ # You may want to make this configurable or autodetect
110
+ # Corrected base path calculation: removed one ".."
111
+ base = os.path.join(
112
+ os.path.dirname(__file__), # jaclang/utils
113
+ "..", # jaclang
114
+ "vendor",
115
+ "typeshed", # jaclang/vendor/typeshed
116
+ )
117
+ base = os.path.abspath(base)
118
+ stubs = os.path.join(base, "stubs")
119
+ stdlib = os.path.join(base, "stdlib")
120
+ paths = []
121
+ if os.path.isdir(stubs):
122
+ paths.append(stubs)
123
+ if os.path.isdir(stdlib):
124
+ paths.append(stdlib)
125
+ return paths
126
+
127
+
128
+ def _candidate_from_typeshed(base: str, parts: list[str]) -> Optional[Tuple[str, str]]:
129
+ """Find .pyi files in typeshed, trying module.pyi then package/__init__.pyi."""
130
+ if not parts: #
131
+ return None
132
+
133
+ # This is the path prefix for the module/package, e.g., os.path.join(base, "collections", "abc")
134
+ candidate_prefix = os.path.join(base, *parts)
135
+
136
+ # 1. Check for a direct module file (e.g., base/parts.pyi or base/package/module.pyi)
137
+ # Example: parts=["collections", "abc"] -> candidate_prefix = base/collections/abc
138
+ # module_file_pyi = base/collections/abc.pyi
139
+ # Example: parts=["sys"] -> candidate_prefix = base/sys
140
+ # module_file_pyi = base/sys.pyi
141
+ module_file_pyi = candidate_prefix + ".pyi"
142
+ if os.path.isfile(module_file_pyi):
143
+ return module_file_pyi, "pyi"
144
+
145
+ # 2. Check if the candidate_prefix itself is a directory (package)
146
+ # and look for __init__.pyi inside it.
147
+ # Example: parts=["_typeshed"] -> candidate_prefix = base/_typeshed
148
+ # init_pyi = base/_typeshed/__init__.pyi
149
+ if os.path.isdir(candidate_prefix):
150
+ init_pyi = os.path.join(candidate_prefix, "__init__.pyi")
151
+ if os.path.isfile(init_pyi):
152
+ return init_pyi, "pyi"
153
+
154
+ # Heuristic for packages where stubs are in a subdirectory of the same name
155
+ # e.g., parts = ["requests"], candidate_prefix = base/requests
156
+ # checks base/requests/requests/__init__.pyi
157
+ # This part of the original heuristic is preserved.
158
+ if parts: # Ensure parts is not empty for parts[-1]
159
+ inner_pkg_init_pyi = os.path.join(
160
+ candidate_prefix, parts[-1], "__init__.pyi"
161
+ )
162
+ if os.path.isfile(inner_pkg_init_pyi):
163
+ return inner_pkg_init_pyi, "pyi"
164
+
165
+ return None
166
+
167
+
168
+ class PythonModuleResolver:
169
+ """Resolver for Python modules with enhanced import capabilities."""
170
+
171
+ def resolve_module_path(self, target: str, base_path: str) -> str:
172
+ """Resolve Python module path without importing."""
173
+ caller_dir = (
174
+ base_path if os.path.isdir(base_path) else os.path.dirname(base_path)
175
+ )
176
+ caller_dir = caller_dir if caller_dir else os.getcwd()
177
+ local_py_file = os.path.join(caller_dir, target.split(".")[-1] + ".py")
178
+
179
+ if os.path.exists(local_py_file):
180
+ return local_py_file
181
+ else:
182
+ raise ImportError(f"Module '{target}' not found in {caller_dir}")
@@ -0,0 +1,125 @@
1
+ """Symbol table testing helpers for Jaseci."""
2
+
3
+ from typing import Optional
4
+
5
+ from jaclang.compiler.unitree import Symbol, UniScopeNode
6
+ from jaclang.utils.test import TestCase
7
+
8
+
9
+ class SymTableTestMixin(TestCase):
10
+ """Mixin class providing assertion methods for symbol table testing."""
11
+
12
+ def assert_symbol_exists(
13
+ self,
14
+ sym_table: UniScopeNode,
15
+ symbol_name: str,
16
+ symbol_type: Optional[str] = None,
17
+ ) -> Symbol:
18
+ """Assert that a symbol exists in the symbol table."""
19
+ symbol = look_down(sym_table, symbol_name)
20
+ self.assertIsNotNone(
21
+ symbol, f"Symbol '{symbol_name}' not found in symbol table"
22
+ )
23
+ if symbol_type:
24
+ self.assertIn(
25
+ symbol_type,
26
+ str(symbol),
27
+ f"Symbol '{symbol_name}' is not of type '{symbol_type}'",
28
+ )
29
+ return symbol
30
+
31
+ def assert_symbol_decl_at(self, symbol: Symbol, line: int, col: int) -> None:
32
+ """Assert that a symbol is declared at specific line and column."""
33
+ decl_info = str(symbol)
34
+ expected_decl = f"{line}:{col}"
35
+ self.assertIn(
36
+ expected_decl,
37
+ decl_info,
38
+ f"Symbol declaration not found at {expected_decl}. Got: {decl_info}",
39
+ )
40
+
41
+ def assert_symbol_defns_at(
42
+ self, symbol: Symbol, expected_defns: list[tuple[int, int]]
43
+ ) -> None:
44
+ """Assert that a symbol has definitions at specific locations."""
45
+ symbol_str = str(symbol)
46
+ for line, col in expected_defns:
47
+ expected_defn = f"{line}:{col}"
48
+ self.assertIn(
49
+ expected_defn,
50
+ symbol_str,
51
+ f"Symbol definition not found at {expected_defn}. Got: {symbol_str}",
52
+ )
53
+
54
+ def assert_symbol_uses_at(
55
+ self, symbol: Symbol, expected_uses: list[tuple[int, int]]
56
+ ) -> None:
57
+ """Assert that a symbol has uses at specific locations."""
58
+ symbol_uses_str = str(symbol.uses)
59
+ for line, col in expected_uses:
60
+ expected_use = f"{line}:{col}"
61
+ self.assertIn(
62
+ expected_use,
63
+ symbol_uses_str,
64
+ f"Symbol use not found at {expected_use}. Got: {symbol_uses_str}",
65
+ )
66
+
67
+ def assert_symbol_complete(
68
+ self,
69
+ sym_table: UniScopeNode,
70
+ symbol_name: str,
71
+ symbol_type: str,
72
+ decl: tuple[int, int],
73
+ defns: Optional[list[tuple[int, int]]] = None,
74
+ uses: Optional[list[tuple[int, int]]] = None,
75
+ ) -> None:
76
+ """Assert complete symbol information (declaration, definitions, uses)."""
77
+ symbol = self.assert_symbol_exists(sym_table, symbol_name, symbol_type)
78
+ self.assert_symbol_decl_at(symbol, decl[0], decl[1])
79
+
80
+ if defns:
81
+ self.assert_symbol_defns_at(symbol, defns)
82
+
83
+ if uses:
84
+ self.assert_symbol_uses_at(symbol, uses)
85
+
86
+ def assert_sub_table_exists(
87
+ self, sym_table: UniScopeNode, table_name: str, tab_type: str
88
+ ) -> None:
89
+ """Assert that a sub-table exists in the symbol table."""
90
+ sub_tables = sym_table.kid_scope
91
+ table_names = [table.scope_name for table in sub_tables]
92
+ type_names = [table.get_type() for table in sub_tables]
93
+ matching_tables = [name for name in table_names if table_name in name]
94
+ matching_types = [
95
+ type_name for type_name in type_names if tab_type in str(type_name)
96
+ ]
97
+ self.assertTrue(
98
+ len(matching_tables) > 0,
99
+ f"Sub-table '{table_name}' not found. Available: {table_names}",
100
+ )
101
+ self.assertTrue(
102
+ len(matching_types) > 0,
103
+ f"Sub-table type '{tab_type}' not found in {table_names} of types {type_names}",
104
+ )
105
+ return sub_tables[table_names.index(matching_tables[0])]
106
+
107
+
108
+ def look_down(tab: UniScopeNode, name: str, deep: bool = True) -> Optional[Symbol]:
109
+ """Lookup a variable in the symbol table."""
110
+ if name in tab.names_in_scope:
111
+ if not tab.names_in_scope[name].imported:
112
+ return tab.names_in_scope[name]
113
+ else:
114
+ sym = tab.names_in_scope[name]
115
+ return sym
116
+ for i in tab.inherited_scope:
117
+ found = i.lookup(name, deep=False)
118
+ if found:
119
+ return found
120
+ if deep and tab.kid_scope:
121
+ for kid in tab.kid_scope:
122
+ found = kid.lookup(name, deep=True)
123
+ if found:
124
+ return found
125
+ return None
jaclang/utils/test.py CHANGED
@@ -9,6 +9,7 @@ from _pytest.logging import LogCaptureFixture
9
9
 
10
10
  import jaclang
11
11
  from jaclang.compiler.passes import UniPass
12
+ from jaclang.runtimelib.utils import read_file_with_encoding
12
13
  from jaclang.utils.helpers import get_uni_nodes_as_snake_case as ast_snakes
13
14
 
14
15
  import pytest
@@ -41,13 +42,11 @@ class TestCase(_TestCase):
41
42
  raise ValueError("Unable to determine the file of the module.")
42
43
  fixture_src = module.__file__
43
44
  fixture_path = os.path.join(os.path.dirname(fixture_src), "fixtures", fixture)
44
- with open(fixture_path, "r", encoding="utf-8") as f:
45
- return f.read()
45
+ return read_file_with_encoding(fixture_path)
46
46
 
47
47
  def file_to_str(self, file_path: str) -> str:
48
48
  """Load fixture from fixtures directory."""
49
- with open(file_path, "r", encoding="utf-8") as f:
50
- return f.read()
49
+ return read_file_with_encoding(file_path)
51
50
 
52
51
  def fixture_abs_path(self, fixture: str) -> str:
53
52
  """Get absolute path of a fixture from fixtures directory."""
@@ -0,0 +1,34 @@
1
+ """
2
+ A package to compare python-style regexes and test if they have intersections.
3
+ Based on the `greenery`-package by @qntm, adapted and specialized for `lark-parser`
4
+ """
5
+
6
+ from typing import Iterable, Tuple
7
+
8
+ from interegular.fsm import FSM
9
+ from interegular.patterns import Pattern, parse_pattern, REFlags, Unsupported, InvalidSyntax
10
+ from interegular.comparator import Comparator
11
+ from interegular.utils import logger
12
+
13
+ __all__ = ['FSM', 'Pattern', 'Comparator', 'parse_pattern', 'compare_patterns', 'compare_regexes', '__version__', 'REFlags', 'Unsupported',
14
+ 'InvalidSyntax']
15
+
16
+
17
+ def compare_regexes(*regexes: str) -> Iterable[Tuple[str, str]]:
18
+ """
19
+ Checks the regexes for intersections. Returns all pairs it found
20
+ """
21
+ c = Comparator({r: parse_pattern(r) for r in regexes})
22
+ print(c._patterns)
23
+ return c.check(regexes)
24
+
25
+
26
+ def compare_patterns(*ps: Pattern) -> Iterable[Tuple[Pattern, Pattern]]:
27
+ """
28
+ Checks the Patterns for intersections. Returns all pairs it found
29
+ """
30
+ c = Comparator({p: p for p in ps})
31
+ return c.check(ps)
32
+
33
+
34
+ __version__ = "0.3.3"
@@ -0,0 +1,163 @@
1
+ from collections import namedtuple
2
+ from dataclasses import dataclass
3
+ from itertools import combinations
4
+ from typing import List, Tuple, Any, Dict, Iterable, Set, FrozenSet, Optional
5
+
6
+ from interegular import InvalidSyntax, REFlags
7
+ from interegular.fsm import FSM, Alphabet, anything_else
8
+ from interegular.patterns import Pattern, Unsupported, parse_pattern
9
+ from interegular.utils import logger, soft_repr
10
+
11
+
12
+ @dataclass
13
+ class ExampleCollision:
14
+ """
15
+ Captures the full text of an example collision between two regex.
16
+ `main_text` is the part that actually gets captured by the two regex
17
+ `prefix` is the part that is potentially needed for lookbehinds
18
+ `postfix` is the part that is potentially needed for lookahead
19
+ """
20
+ prefix: str
21
+ main_text: str
22
+ postfix: str
23
+
24
+ def format_multiline(self, intro: str = "Example Collision: ", indent: str = "",
25
+ force_pointer: bool = False) -> str:
26
+ """
27
+ Formats this example somewhat similar to a python syntax error.
28
+ - intro is added on the first line
29
+ - indent is added on the second line
30
+ The three parts of the example are concatenated and `^` is used to underline them.
31
+
32
+ ExampleCollision(prefix='a', main_text='cd', postfix='ef').format_multiline()
33
+
34
+ leads to
35
+
36
+ Example Collision: acdef
37
+ ^^
38
+
39
+ This function will escape the character where necessary to stay readable.
40
+ if `force_pointer` is False, the function will not produce the second line if only main_text is set
41
+ """
42
+ if len(intro) < len(indent):
43
+ raise ValueError("Can't have intro be shorter than indent")
44
+ prefix = soft_repr(self.prefix)
45
+ main_text = soft_repr(self.main_text)
46
+ postfix = soft_repr(self.postfix)
47
+ text = f"{prefix}{main_text}{postfix}"
48
+ if len(text) != len(main_text):
49
+ whitespace = ' ' * (len(intro) - len(indent) + len(prefix))
50
+ pointers = '^' * len(main_text)
51
+ return f"{intro}{text}\n{indent}{whitespace}{pointers}"
52
+ else:
53
+ return f"{intro}{text}"
54
+
55
+ @property
56
+ def full_text(self):
57
+ return self.prefix + self.main_text + self.postfix
58
+
59
+
60
+ class Comparator:
61
+ """
62
+ A class that represents the main interface for comparing a list of regex to each other.
63
+ It expects a dictionary of arbitrary labels mapped to `Pattern` instances,
64
+ but there is a utility function to create the instances `from_regex` strings.
65
+
66
+ The main interface function all expect the abitrary labels to be given, which
67
+ then get mapped to the correct `Pattern` and/or `FSM` instance.
68
+
69
+ There is a utility function `mark(a,b)` which allows to mark pairs that shouldn't
70
+ be checked again by `check`.
71
+ """
72
+
73
+ def __init__(self, patterns: Dict[Any, Pattern]):
74
+ self._patterns = patterns
75
+ self._marked_pairs: Set[FrozenSet[Any]] = set()
76
+ if not patterns: # `isdisjoint` can not be called anyway, so we don't need to create a valid state
77
+ return
78
+ self._alphabet = Alphabet.union(*(p.get_alphabet(REFlags(0)) for p in patterns.values()))[0]
79
+ prefix_postfix_s = [p.prefix_postfix for p in patterns.values()]
80
+ self._prefix_postfix = max(p[0] for p in prefix_postfix_s), max(p[1] for p in prefix_postfix_s)
81
+ self._fsms: Dict[Any, FSM] = {}
82
+ self._know_pairs: Dict[Tuple[Any, Any], bool] = {}
83
+
84
+ def get_fsm(self, a: Any) -> FSM:
85
+ if a not in self._fsms:
86
+ try:
87
+ self._fsms[a] = self._patterns[a].to_fsm(self._alphabet, self._prefix_postfix)
88
+ except Unsupported as e:
89
+ self._fsms[a] = None
90
+ logger.warning(f"Can't compile Pattern to fsm for {a}\n {repr(e)}")
91
+ except KeyError:
92
+ self._fsms[a] = None # In case it was thrown away in `from_regexes`
93
+ return self._fsms[a]
94
+
95
+ def isdisjoint(self, a: Any, b: Any) -> bool:
96
+ if (a, b) not in self._know_pairs:
97
+ fa, fb = self.get_fsm(a), self.get_fsm(b)
98
+ if fa is None or fb is None:
99
+ self._know_pairs[a, b] = True # We can't know. Assume they are disjoint
100
+ else:
101
+ self._know_pairs[a, b] = fa.isdisjoint(fb)
102
+ return self._know_pairs[a, b]
103
+
104
+ def check(self, keys: Iterable[Any] = None, skip_marked: bool = False) -> Iterable[Tuple[Any, Any]]:
105
+ if keys is None:
106
+ keys = self._patterns
107
+ for a, b in combinations(keys, 2):
108
+ if skip_marked and self.is_marked(a, b):
109
+ continue
110
+ if not self.isdisjoint(a, b):
111
+ yield a, b
112
+
113
+ def get_example_overlap(self, a: Any, b: Any, max_time: float = None) -> ExampleCollision:
114
+ pa, pb = self._patterns[a], self._patterns[b]
115
+ needed_pre = max(pa.prefix_postfix[0], pb.prefix_postfix[0])
116
+ needed_post = max(pa.prefix_postfix[1], pb.prefix_postfix[1])
117
+
118
+ # We use the optimal alphabet here instead of the general one since that
119
+ # massively improves performance by every metric.
120
+ alphabet = pa.get_alphabet(REFlags(0)).union(pb.get_alphabet(REFlags(0)))[0]
121
+ fa, fb = pa.to_fsm(alphabet, (needed_pre, needed_post)), pb.to_fsm(alphabet, (needed_pre, needed_post))
122
+ intersection = fa.intersection(fb)
123
+ if max_time is None:
124
+ max_iterations = None
125
+ else:
126
+ # We calculate an approximation for that value of max_iterations
127
+ # that makes sure for this function to finish in under max_time seconds
128
+ # This values will heavily depend on CPU, python version, exact patterns
129
+ # and probably more factors, but this should generally be in the correct
130
+ # ballpark.
131
+ max_iterations = int((max_time - 0.09)/(1.4e-6 * len(alphabet)))
132
+ try:
133
+ text = next(intersection.strings(max_iterations))
134
+ except StopIteration:
135
+ raise ValueError(f"No overlap between {a} and {b} exists")
136
+ text = ''.join(c if c != anything_else else '?' for c in text)
137
+ if needed_post > 0:
138
+ return ExampleCollision(text[:needed_pre], text[needed_pre:-needed_post], text[-needed_post:])
139
+ else:
140
+ return ExampleCollision(text[:needed_pre], text[needed_pre:], '')
141
+
142
+ def is_marked(self, a: Any, b: Any) -> bool:
143
+ return frozenset({a, b}) in self._marked_pairs
144
+
145
+ @property
146
+ def marked_pairs(self):
147
+ return self._marked_pairs
148
+
149
+ def count_marked_pairs(self):
150
+ return len(self._marked_pairs)
151
+
152
+ def mark(self, a: Any, b: Any):
153
+ self._marked_pairs.add(frozenset({a, b}))
154
+
155
+ @classmethod
156
+ def from_regexes(cls, regexes: Dict[Any, str]):
157
+ patterns = {}
158
+ for k, r in regexes.items():
159
+ try:
160
+ patterns[k] = parse_pattern(r)
161
+ except (Unsupported, InvalidSyntax) as e:
162
+ logger.warning(f"Can't compile regex to Pattern for {k}\n {repr(e)}")
163
+ return cls(patterns)