PyPI - jaclang - Versions diffs - 0.8.4__py3-none-any.whl → 0.8.6__py3-none-any.whl - Mend

jaclang 0.8.4py3-none-any.whl → 0.8.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of jaclang might be problematic. Click here for more details.

Files changed (88) hide show

jaclang/cli/cli.md +1 -0
jaclang/cli/cli.py +109 -37
jaclang/compiler/jac.lark +3 -3
jaclang/compiler/larkparse/jac_parser.py +2 -2
jaclang/compiler/parser.py +14 -21
jaclang/compiler/passes/main/__init__.py +5 -1
jaclang/compiler/passes/main/binder_pass.py +594 -0
jaclang/compiler/passes/main/cfg_build_pass.py +21 -1
jaclang/compiler/passes/main/import_pass.py +8 -256
jaclang/compiler/passes/main/inheritance_pass.py +10 -3
jaclang/compiler/passes/main/pyast_gen_pass.py +92 -77
jaclang/compiler/passes/main/pyast_load_pass.py +24 -13
jaclang/compiler/passes/main/sem_def_match_pass.py +1 -1
jaclang/compiler/passes/main/sym_tab_build_pass.py +4 -0
jaclang/compiler/passes/main/tests/fixtures/M1.jac +3 -0
jaclang/compiler/passes/main/tests/fixtures/cfg_has_var.jac +12 -0
jaclang/compiler/passes/main/tests/fixtures/cfg_if_no_else.jac +11 -0
jaclang/compiler/passes/main/tests/fixtures/cfg_return.jac +9 -0
jaclang/compiler/passes/main/tests/fixtures/checker_imported.jac +2 -0
jaclang/compiler/passes/main/tests/fixtures/checker_importer.jac +6 -0
jaclang/compiler/passes/main/tests/fixtures/data_spatial_types.jac +1 -1
jaclang/compiler/passes/main/tests/fixtures/import_symbol_type_infer.jac +11 -0
jaclang/compiler/passes/main/tests/fixtures/infer_type_assignment.jac +5 -0
jaclang/compiler/passes/main/tests/fixtures/member_access_type_inferred.jac +13 -0
jaclang/compiler/passes/main/tests/fixtures/member_access_type_resolve.jac +11 -0
jaclang/compiler/passes/main/tests/fixtures/sym_binder.jac +47 -0
jaclang/compiler/passes/main/tests/fixtures/type_annotation_assignment.jac +8 -0
jaclang/compiler/passes/main/tests/test_binder_pass.py +111 -0
jaclang/compiler/passes/main/tests/test_cfg_build_pass.py +62 -24
jaclang/compiler/passes/main/tests/test_checker_pass.py +87 -0
jaclang/compiler/passes/main/tests/test_pyast_gen_pass.py +13 -13
jaclang/compiler/passes/main/tests/test_sem_def_match_pass.py +6 -6
jaclang/compiler/passes/main/type_checker_pass.py +128 -0
jaclang/compiler/passes/tool/doc_ir_gen_pass.py +2 -0
jaclang/compiler/passes/tool/tests/fixtures/simple_walk_fmt.jac +3 -0
jaclang/compiler/program.py +32 -11
jaclang/compiler/tests/test_sr_errors.py +32 -0
jaclang/compiler/type_system/__init__.py +1 -0
jaclang/compiler/type_system/type_evaluator.py +421 -0
jaclang/compiler/type_system/type_utils.py +41 -0
jaclang/compiler/type_system/types.py +240 -0
jaclang/compiler/unitree.py +36 -24
jaclang/langserve/dev_engine.jac +645 -0
jaclang/langserve/dev_server.jac +201 -0
jaclang/langserve/engine.jac +24 -5
jaclang/langserve/tests/server_test/test_lang_serve.py +2 -2
jaclang/langserve/tests/test_dev_server.py +80 -0
jaclang/langserve/tests/test_server.py +13 -0
jaclang/runtimelib/builtin.py +28 -39
jaclang/runtimelib/importer.py +34 -63
jaclang/runtimelib/machine.py +48 -64
jaclang/runtimelib/memory.py +23 -5
jaclang/runtimelib/tests/fixtures/savable_object.jac +10 -2
jaclang/runtimelib/utils.py +42 -6
jaclang/tests/fixtures/edge_node_walk.jac +1 -1
jaclang/tests/fixtures/edges_walk.jac +1 -1
jaclang/tests/fixtures/gendot_bubble_sort.jac +1 -1
jaclang/tests/fixtures/py_run.jac +8 -0
jaclang/tests/fixtures/py_run.py +23 -0
jaclang/tests/fixtures/pyfunc.py +2 -0
jaclang/tests/fixtures/pyfunc_fmt.py +60 -0
jaclang/tests/fixtures/pyfunc_fstr.py +25 -0
jaclang/tests/fixtures/pyfunc_kwesc.py +33 -0
jaclang/tests/fixtures/python_run_test.py +19 -0
jaclang/tests/test_cli.py +107 -0
jaclang/tests/test_language.py +106 -5
jaclang/utils/lang_tools.py +6 -3
jaclang/utils/module_resolver.py +90 -0
jaclang/utils/symtable_test_helpers.py +125 -0
jaclang/utils/test.py +3 -4
jaclang/vendor/interegular/__init__.py +34 -0
jaclang/vendor/interegular/comparator.py +163 -0
jaclang/vendor/interegular/fsm.py +1015 -0
jaclang/vendor/interegular/patterns.py +732 -0
jaclang/vendor/interegular/py.typed +0 -0
jaclang/vendor/interegular/utils/__init__.py +15 -0
jaclang/vendor/interegular/utils/simple_parser.py +165 -0
jaclang/vendor/interegular-0.3.3.dist-info/INSTALLER +1 -0
jaclang/vendor/interegular-0.3.3.dist-info/LICENSE.txt +21 -0
jaclang/vendor/interegular-0.3.3.dist-info/METADATA +64 -0
jaclang/vendor/interegular-0.3.3.dist-info/RECORD +20 -0
jaclang/vendor/interegular-0.3.3.dist-info/REQUESTED +0 -0
jaclang/vendor/interegular-0.3.3.dist-info/WHEEL +5 -0
jaclang/vendor/interegular-0.3.3.dist-info/top_level.txt +1 -0
{jaclang-0.8.4.dist-info → jaclang-0.8.6.dist-info}/METADATA +2 -1
{jaclang-0.8.4.dist-info → jaclang-0.8.6.dist-info}/RECORD +88 -43
{jaclang-0.8.4.dist-info → jaclang-0.8.6.dist-info}/WHEEL +0 -0
{jaclang-0.8.4.dist-info → jaclang-0.8.6.dist-info}/entry_points.txt +0 -0

jaclang/tests/test_language.py CHANGED Viewed

@@ -13,6 +13,7 @@ from jaclang import JacMachine as Jac
 from jaclang.cli import cli
 from jaclang.compiler.program import JacProgram
 from jaclang.utils.test import TestCase
+from jaclang.runtimelib.utils import read_file_with_encoding
 class JacLanguageTests(TestCase):
@@ -121,13 +122,19 @@ class JacLanguageTests(TestCase):
         data = json.loads(stdout_value)
         nodes = data["nodes"]
-        self.assertEqual(len(nodes), 7)
+        edges = data["edges"]
+        self.assertEqual(len(nodes), 5)
+        self.assertEqual(len(edges), 6)
         for node in nodes:
             label = node["label"]
             self.assertIn(label, ["root", "N(val=0)", "N(val=1)"])
-        edges = data["edges"]
-        self.assertEqual(len(edges), 6)
+        for edge in edges:
+            label = edge["label"]
+            self.assertIn(label, ["E(val=1)", "E(val=1)", "E(val=1)", "E(val=0)", "E(val=0)", "E(val=0)"])
     def test_printgraph_mermaid(self) -> None:
         """Test the mermaid gen of builtin function."""
@@ -228,7 +235,7 @@ class JacLanguageTests(TestCase):
         sys.stdout = sys.__stdout__
         stdout_value = captured_output.getvalue()
         self.assertIn(
-            '[label="inner_node(main=5, sub=2)"];',
+            '[label="inner_node(main=5, sub=2)"fillcolor="#FFDEAD"];',
             stdout_value,
         )
@@ -1363,4 +1370,98 @@ class JacLanguageTests(TestCase):
         stdout_value = captured_output.getvalue().split("\n")
         self.assertIn("Num:  4", stdout_value[0])
         self.assertIn("Num:  3", stdout_value[1])
-        self.assertIn("Completed", stdout_value[2])
+        self.assertIn("Completed", stdout_value[2])
+    def test_read_file_with_encoding_utf8(self) -> None:
+        """Test reading UTF-8 encoded file."""
+        with tempfile.NamedTemporaryFile(mode='w', encoding='utf-8', delete=False) as f:
+            test_content = "Hello, 世界! 🌍 Testing UTF-8 encoding."
+            f.write(test_content)
+            temp_path = f.name
+        try:
+            result = read_file_with_encoding(temp_path)
+            self.assertEqual(result, test_content)
+        finally:
+            os.unlink(temp_path)
+    def test_read_file_with_encoding_utf16(self) -> None:
+        """Test reading UTF-16 encoded file when UTF-8 fails."""
+        with tempfile.NamedTemporaryFile(delete=False, mode="w", encoding="utf-16") as f:
+            test_content = "Hello, 世界! UTF-16 encoding test."
+            f.write(test_content)
+            temp_path = f.name
+        try:
+            result = read_file_with_encoding(temp_path)
+            self.assertEqual(result, test_content)
+        finally:
+            os.unlink(temp_path)
+    def test_read_file_with_encoding_utf8_bom(self) -> None:
+        """Test reading UTF-8 with BOM encoded file."""
+        with tempfile.NamedTemporaryFile(delete=False, mode='w', encoding='utf-8-sig') as f:
+            test_content = "Hello, UTF-8 BOM test! 🚀"
+            f.write(test_content)
+            temp_path = f.name
+        try:
+            result = read_file_with_encoding(temp_path)
+            self.assertEqual(result, test_content)
+        finally:
+            os.unlink(temp_path)
+    # TODO: Support reading files with Latin-1 encoding
+    # def test_read_file_with_encoding_latin1(self) -> None:
+    #     """Test reading Latin-1 encoded file as fallback."""
+    #     with tempfile.NamedTemporaryFile(mode='w', encoding='latin-1', delete=False) as f:
+    #         test_content = "Hello, café! Latin-1 test."
+    #         f.write(test_content)
+    #         f.flush()
+    #         temp_path = f.name
+    #     try:
+    #         result = read_file_with_encoding(temp_path)
+    #         self.assertEqual(result, test_content)
+    #     finally:
+    #         os.unlink(temp_path)
+    def test_read_file_with_encoding_binary_file_fallback(self) -> None:
+        """Test reading binary file falls back to latin-1."""
+        with tempfile.NamedTemporaryFile(delete=False) as f:
+            binary_data = bytes([0xFF, 0xFE, 0x00, 0x48, 0x65, 0x6C, 0x6C, 0x6F])
+            f.write(binary_data)
+            f.flush()
+            temp_path = f.name
+        try:
+            result = read_file_with_encoding(temp_path)
+            self.assertIsInstance(result, str)
+            self.assertGreater(len(result), 0)
+        finally:
+            os.unlink(temp_path)
+    def test_read_file_with_encoding_special_characters(self) -> None:
+        """Test reading file with various special characters."""
+        with tempfile.NamedTemporaryFile(mode='w', encoding='utf-8', delete=False) as f:
+            test_content = (
+                "Special chars: åäö ñ ü ç é\n"
+                "Symbols: ©®™ §¶†‡•\n"
+                "Math: ∑∏∫√±≤≥≠\n"
+                "Arrows: ←→↑↓↔\n"
+                "Emoji: 😀😍🎉🔥💯\n"
+            )
+            f.write(test_content)
+            f.flush()
+            temp_path = f.name
+        try:
+            result = read_file_with_encoding(temp_path)
+            self.assertEqual(result, test_content)
+            self.assertIn("åäö", result)
+            self.assertIn("©®™", result)
+            self.assertIn("∑∏∫", result)
+            self.assertIn("😀😍", result)
+        finally:
+            os.unlink(temp_path)

jaclang/utils/lang_tools.py CHANGED Viewed

@@ -8,9 +8,11 @@ from typing import List, Optional, Type
 import jaclang.compiler.unitree as uni
 from jaclang.compiler.passes.main import PyastBuildPass
+from jaclang.compiler.passes.main.cfg_build_pass import cfg_dot_from_file
 from jaclang.compiler.passes.tool.doc_ir_gen_pass import DocIRGenPass
 from jaclang.compiler.program import JacProgram
 from jaclang.compiler.unitree import UniScopeNode
+from jaclang.runtimelib.utils import read_file_with_encoding
 from jaclang.utils.helpers import auto_generate_refs, pascal_to_snake
@@ -194,9 +196,8 @@ class AstTool:
             base = base if base else "./"
             if file_name.endswith(".py"):
-                with open(file_name, "r") as f:
-                    file_source = f.read()
-                    parsed_ast = py_ast.parse(file_source)
+                file_source = read_file_with_encoding(file_name)
+                parsed_ast = py_ast.parse(file_source)
                 if output == "pyast":
                     return f"\n{py_ast.dump(parsed_ast, indent=2)}"
                 try:
@@ -242,6 +243,8 @@ class AstTool:
                     return out
                 case "ast.":
                     return ir.printgraph()
+                case "cfg.":
+                    return cfg_dot_from_file(file_name)
                 case "unparse":
                     return ir.unparse()
                 case "pyast":

jaclang/utils/module_resolver.py CHANGED Viewed

@@ -57,6 +57,18 @@ def resolve_module(target: str, base_path: str) -> Tuple[str, str]:
         if res:
             return res
+    typeshed_paths = get_typeshed_paths()
+    for typeshed_dir in typeshed_paths:
+        res = _candidate_from_typeshed(typeshed_dir, actual_parts)
+        if res:
+            # print(f"Found '{target}' in typeshed: {res[0]}")
+            return res
+    # If not found in any typeshed directory, but typeshed is configured,
+    # return a stub .pyi path for type checking.
+    stub_pyi_path = os.path.join(typeshed_paths[0], *actual_parts) + ".pyi"
+    if os.path.isfile(stub_pyi_path):
+        return stub_pyi_path, "pyi"
     base_dir = base_path if os.path.isdir(base_path) else os.path.dirname(base_path)
     for _ in range(max(level - 1, 0)):
         base_dir = os.path.dirname(base_dir)
@@ -90,3 +102,81 @@ def resolve_relative_path(target: str, base_path: str) -> str:
     """Resolve only the path component for a target."""
     path, _ = resolve_module(target, base_path)
     return path
+def get_typeshed_paths() -> list[str]:
+    """Return the typeshed stubs and stdlib directories if available."""
+    # You may want to make this configurable or autodetect
+    # Corrected base path calculation: removed one ".."
+    base = os.path.join(
+        os.path.dirname(__file__),  # jaclang/utils
+        "..",  # jaclang
+        "vendor",
+        "typeshed",  # jaclang/vendor/typeshed
+    )
+    base = os.path.abspath(base)
+    stubs = os.path.join(base, "stubs")
+    stdlib = os.path.join(base, "stdlib")
+    paths = []
+    if os.path.isdir(stubs):
+        paths.append(stubs)
+    if os.path.isdir(stdlib):
+        paths.append(stdlib)
+    return paths
+def _candidate_from_typeshed(base: str, parts: list[str]) -> Optional[Tuple[str, str]]:
+    """Find .pyi files in typeshed, trying module.pyi then package/__init__.pyi."""
+    if not parts:  #
+        return None
+    # This is the path prefix for the module/package, e.g., os.path.join(base, "collections", "abc")
+    candidate_prefix = os.path.join(base, *parts)
+    # 1. Check for a direct module file (e.g., base/parts.pyi or base/package/module.pyi)
+    # Example: parts=["collections", "abc"] -> candidate_prefix = base/collections/abc
+    # module_file_pyi = base/collections/abc.pyi
+    # Example: parts=["sys"] -> candidate_prefix = base/sys
+    # module_file_pyi = base/sys.pyi
+    module_file_pyi = candidate_prefix + ".pyi"
+    if os.path.isfile(module_file_pyi):
+        return module_file_pyi, "pyi"
+    # 2. Check if the candidate_prefix itself is a directory (package)
+    #    and look for __init__.pyi inside it.
+    # Example: parts=["_typeshed"] -> candidate_prefix = base/_typeshed
+    # init_pyi = base/_typeshed/__init__.pyi
+    if os.path.isdir(candidate_prefix):
+        init_pyi = os.path.join(candidate_prefix, "__init__.pyi")
+        if os.path.isfile(init_pyi):
+            return init_pyi, "pyi"
+        # Heuristic for packages where stubs are in a subdirectory of the same name
+        # e.g., parts = ["requests"], candidate_prefix = base/requests
+        # checks base/requests/requests/__init__.pyi
+        # This part of the original heuristic is preserved.
+        if parts:  # Ensure parts is not empty for parts[-1]
+            inner_pkg_init_pyi = os.path.join(
+                candidate_prefix, parts[-1], "__init__.pyi"
+            )
+            if os.path.isfile(inner_pkg_init_pyi):
+                return inner_pkg_init_pyi, "pyi"
+    return None
+class PythonModuleResolver:
+    """Resolver for Python modules with enhanced import capabilities."""
+    def resolve_module_path(self, target: str, base_path: str) -> str:
+        """Resolve Python module path without importing."""
+        caller_dir = (
+            base_path if os.path.isdir(base_path) else os.path.dirname(base_path)
+        )
+        caller_dir = caller_dir if caller_dir else os.getcwd()
+        local_py_file = os.path.join(caller_dir, target.split(".")[-1] + ".py")
+        if os.path.exists(local_py_file):
+            return local_py_file
+        else:
+            raise ImportError(f"Module '{target}' not found in {caller_dir}")

jaclang/utils/symtable_test_helpers.py ADDED Viewed

@@ -0,0 +1,125 @@
+"""Symbol table testing helpers for Jaseci."""
+from typing import Optional
+from jaclang.compiler.unitree import Symbol, UniScopeNode
+from jaclang.utils.test import TestCase
+class SymTableTestMixin(TestCase):
+    """Mixin class providing assertion methods for symbol table testing."""
+    def assert_symbol_exists(
+        self,
+        sym_table: UniScopeNode,
+        symbol_name: str,
+        symbol_type: Optional[str] = None,
+    ) -> Symbol:
+        """Assert that a symbol exists in the symbol table."""
+        symbol = look_down(sym_table, symbol_name)
+        self.assertIsNotNone(
+            symbol, f"Symbol '{symbol_name}' not found in symbol table"
+        )
+        if symbol_type:
+            self.assertIn(
+                symbol_type,
+                str(symbol),
+                f"Symbol '{symbol_name}' is not of type '{symbol_type}'",
+            )
+        return symbol
+    def assert_symbol_decl_at(self, symbol: Symbol, line: int, col: int) -> None:
+        """Assert that a symbol is declared at specific line and column."""
+        decl_info = str(symbol)
+        expected_decl = f"{line}:{col}"
+        self.assertIn(
+            expected_decl,
+            decl_info,
+            f"Symbol declaration not found at {expected_decl}. Got: {decl_info}",
+        )
+    def assert_symbol_defns_at(
+        self, symbol: Symbol, expected_defns: list[tuple[int, int]]
+    ) -> None:
+        """Assert that a symbol has definitions at specific locations."""
+        symbol_str = str(symbol)
+        for line, col in expected_defns:
+            expected_defn = f"{line}:{col}"
+            self.assertIn(
+                expected_defn,
+                symbol_str,
+                f"Symbol definition not found at {expected_defn}. Got: {symbol_str}",
+            )
+    def assert_symbol_uses_at(
+        self, symbol: Symbol, expected_uses: list[tuple[int, int]]
+    ) -> None:
+        """Assert that a symbol has uses at specific locations."""
+        symbol_uses_str = str(symbol.uses)
+        for line, col in expected_uses:
+            expected_use = f"{line}:{col}"
+            self.assertIn(
+                expected_use,
+                symbol_uses_str,
+                f"Symbol use not found at {expected_use}. Got: {symbol_uses_str}",
+            )
+    def assert_symbol_complete(
+        self,
+        sym_table: UniScopeNode,
+        symbol_name: str,
+        symbol_type: str,
+        decl: tuple[int, int],
+        defns: Optional[list[tuple[int, int]]] = None,
+        uses: Optional[list[tuple[int, int]]] = None,
+    ) -> None:
+        """Assert complete symbol information (declaration, definitions, uses)."""
+        symbol = self.assert_symbol_exists(sym_table, symbol_name, symbol_type)
+        self.assert_symbol_decl_at(symbol, decl[0], decl[1])
+        if defns:
+            self.assert_symbol_defns_at(symbol, defns)
+        if uses:
+            self.assert_symbol_uses_at(symbol, uses)
+    def assert_sub_table_exists(
+        self, sym_table: UniScopeNode, table_name: str, tab_type: str
+    ) -> None:
+        """Assert that a sub-table exists in the symbol table."""
+        sub_tables = sym_table.kid_scope
+        table_names = [table.scope_name for table in sub_tables]
+        type_names = [table.get_type() for table in sub_tables]
+        matching_tables = [name for name in table_names if table_name in name]
+        matching_types = [
+            type_name for type_name in type_names if tab_type in str(type_name)
+        ]
+        self.assertTrue(
+            len(matching_tables) > 0,
+            f"Sub-table '{table_name}' not found. Available: {table_names}",
+        )
+        self.assertTrue(
+            len(matching_types) > 0,
+            f"Sub-table type '{tab_type}' not found in {table_names} of types {type_names}",
+        )
+        return sub_tables[table_names.index(matching_tables[0])]
+def look_down(tab: UniScopeNode, name: str, deep: bool = True) -> Optional[Symbol]:
+    """Lookup a variable in the symbol table."""
+    if name in tab.names_in_scope:
+        if not tab.names_in_scope[name].imported:
+            return tab.names_in_scope[name]
+        else:
+            sym = tab.names_in_scope[name]
+            return sym
+    for i in tab.inherited_scope:
+        found = i.lookup(name, deep=False)
+        if found:
+            return found
+    if deep and tab.kid_scope:
+        for kid in tab.kid_scope:
+            found = kid.lookup(name, deep=True)
+            if found:
+                return found
+    return None

jaclang/utils/test.py CHANGED Viewed

@@ -9,6 +9,7 @@ from _pytest.logging import LogCaptureFixture
 import jaclang
 from jaclang.compiler.passes import UniPass
+from jaclang.runtimelib.utils import read_file_with_encoding
 from jaclang.utils.helpers import get_uni_nodes_as_snake_case as ast_snakes
 import pytest
@@ -41,13 +42,11 @@ class TestCase(_TestCase):
             raise ValueError("Unable to determine the file of the module.")
         fixture_src = module.__file__
         fixture_path = os.path.join(os.path.dirname(fixture_src), "fixtures", fixture)
-        with open(fixture_path, "r", encoding="utf-8") as f:
-            return f.read()
+        return read_file_with_encoding(fixture_path)
     def file_to_str(self, file_path: str) -> str:
         """Load fixture from fixtures directory."""
-        with open(file_path, "r", encoding="utf-8") as f:
-            return f.read()
+        return read_file_with_encoding(file_path)
     def fixture_abs_path(self, fixture: str) -> str:
         """Get absolute path of a fixture from fixtures directory."""

jaclang/vendor/interegular/__init__.py ADDED Viewed

@@ -0,0 +1,34 @@
+"""
+A package to compare python-style regexes and test if they have intersections.
+Based on the `greenery`-package by @qntm, adapted and specialized for `lark-parser`
+"""
+from typing import Iterable, Tuple
+from interegular.fsm import FSM
+from interegular.patterns import Pattern, parse_pattern, REFlags, Unsupported, InvalidSyntax
+from interegular.comparator import Comparator
+from interegular.utils import logger
+__all__ = ['FSM', 'Pattern', 'Comparator', 'parse_pattern', 'compare_patterns', 'compare_regexes', '__version__', 'REFlags', 'Unsupported',
+           'InvalidSyntax']
+def compare_regexes(*regexes: str) -> Iterable[Tuple[str, str]]:
+    """
+    Checks the regexes for intersections. Returns all pairs it found
+    """
+    c = Comparator({r: parse_pattern(r) for r in regexes})
+    print(c._patterns)
+    return c.check(regexes)
+def compare_patterns(*ps: Pattern) -> Iterable[Tuple[Pattern, Pattern]]:
+    """
+    Checks the Patterns for intersections. Returns all pairs it found
+    """
+    c = Comparator({p: p for p in ps})
+    return c.check(ps)
+__version__ = "0.3.3"

jaclang/vendor/interegular/comparator.py ADDED Viewed

@@ -0,0 +1,163 @@
+from collections import namedtuple
+from dataclasses import dataclass
+from itertools import combinations
+from typing import List, Tuple, Any, Dict, Iterable, Set, FrozenSet, Optional
+from interegular import InvalidSyntax, REFlags
+from interegular.fsm import FSM, Alphabet, anything_else
+from interegular.patterns import Pattern, Unsupported, parse_pattern
+from interegular.utils import logger, soft_repr
+@dataclass
+class ExampleCollision:
+    """
+    Captures the full text of an example collision between two regex.
+    `main_text` is the part that actually gets captured by the two regex
+    `prefix` is the part that is potentially needed for lookbehinds
+    `postfix` is the part that is potentially needed for lookahead
+    """
+    prefix: str
+    main_text: str
+    postfix: str
+    def format_multiline(self, intro: str = "Example Collision: ", indent: str = "",
+                         force_pointer: bool = False) -> str:
+        """
+        Formats this example somewhat similar to a python syntax error.
+        - intro is added on the first line
+        - indent is added on the second line
+        The three parts of the example are concatenated and `^` is used to underline them.
+        ExampleCollision(prefix='a', main_text='cd', postfix='ef').format_multiline()
+        leads to
+        Example Collision: acdef
+                             ^^
+        This function will escape the character where necessary to stay readable.
+        if `force_pointer` is False, the function will not produce the second line if only main_text is set
+        """
+        if len(intro) < len(indent):
+            raise ValueError("Can't have intro be shorter than indent")
+        prefix = soft_repr(self.prefix)
+        main_text = soft_repr(self.main_text)
+        postfix = soft_repr(self.postfix)
+        text = f"{prefix}{main_text}{postfix}"
+        if len(text) != len(main_text):
+            whitespace = ' ' * (len(intro) - len(indent) + len(prefix))
+            pointers = '^' * len(main_text)
+            return f"{intro}{text}\n{indent}{whitespace}{pointers}"
+        else:
+            return f"{intro}{text}"
+    @property
+    def full_text(self):
+        return self.prefix + self.main_text + self.postfix
+class Comparator:
+    """
+    A class that represents the main interface for comparing a list of regex to each other.
+    It expects a dictionary of arbitrary labels mapped to `Pattern` instances,
+    but there is a utility function to create the instances `from_regex` strings.
+    The main interface function all expect the abitrary labels to be given, which
+    then get mapped to the correct `Pattern` and/or `FSM` instance.
+    There is a utility function `mark(a,b)` which allows to mark pairs that shouldn't
+    be checked again by `check`.
+    """
+    def __init__(self, patterns: Dict[Any, Pattern]):
+        self._patterns = patterns
+        self._marked_pairs: Set[FrozenSet[Any]] = set()
+        if not patterns:  # `isdisjoint` can not be called anyway, so we don't need to create a valid state
+            return
+        self._alphabet = Alphabet.union(*(p.get_alphabet(REFlags(0)) for p in patterns.values()))[0]
+        prefix_postfix_s = [p.prefix_postfix for p in patterns.values()]
+        self._prefix_postfix = max(p[0] for p in prefix_postfix_s), max(p[1] for p in prefix_postfix_s)
+        self._fsms: Dict[Any, FSM] = {}
+        self._know_pairs: Dict[Tuple[Any, Any], bool] = {}
+    def get_fsm(self, a: Any) -> FSM:
+        if a not in self._fsms:
+            try:
+                self._fsms[a] = self._patterns[a].to_fsm(self._alphabet, self._prefix_postfix)
+            except Unsupported as e:
+                self._fsms[a] = None
+                logger.warning(f"Can't compile Pattern to fsm for {a}\n     {repr(e)}")
+            except KeyError:
+                self._fsms[a] = None  # In case it was thrown away in `from_regexes`
+        return self._fsms[a]
+    def isdisjoint(self, a: Any, b: Any) -> bool:
+        if (a, b) not in self._know_pairs:
+            fa, fb = self.get_fsm(a), self.get_fsm(b)
+            if fa is None or fb is None:
+                self._know_pairs[a, b] = True  # We can't know. Assume they are disjoint
+            else:
+                self._know_pairs[a, b] = fa.isdisjoint(fb)
+        return self._know_pairs[a, b]
+    def check(self, keys: Iterable[Any] = None, skip_marked: bool = False) -> Iterable[Tuple[Any, Any]]:
+        if keys is None:
+            keys = self._patterns
+        for a, b in combinations(keys, 2):
+            if skip_marked and self.is_marked(a, b):
+                continue
+            if not self.isdisjoint(a, b):
+                yield a, b
+    def get_example_overlap(self, a: Any, b: Any, max_time: float = None) -> ExampleCollision:
+        pa, pb = self._patterns[a], self._patterns[b]
+        needed_pre = max(pa.prefix_postfix[0], pb.prefix_postfix[0])
+        needed_post = max(pa.prefix_postfix[1], pb.prefix_postfix[1])
+        # We use the optimal alphabet here instead of the general one since that
+        # massively improves performance by every metric.
+        alphabet = pa.get_alphabet(REFlags(0)).union(pb.get_alphabet(REFlags(0)))[0]
+        fa, fb = pa.to_fsm(alphabet, (needed_pre, needed_post)), pb.to_fsm(alphabet, (needed_pre, needed_post))
+        intersection = fa.intersection(fb)
+        if max_time is None:
+            max_iterations = None
+        else:
+            # We calculate an approximation for that value of max_iterations
+            # that makes sure for this function to finish in under max_time seconds
+            # This values will heavily depend on CPU, python version, exact patterns
+            # and probably more factors, but this should generally be in the correct
+            # ballpark.
+            max_iterations = int((max_time - 0.09)/(1.4e-6 * len(alphabet)))
+        try:
+            text = next(intersection.strings(max_iterations))
+        except StopIteration:
+            raise ValueError(f"No overlap between {a} and {b} exists")
+        text = ''.join(c if c != anything_else else '?' for c in text)
+        if needed_post > 0:
+            return ExampleCollision(text[:needed_pre], text[needed_pre:-needed_post], text[-needed_post:])
+        else:
+            return ExampleCollision(text[:needed_pre], text[needed_pre:], '')
+    def is_marked(self, a: Any, b: Any) -> bool:
+        return frozenset({a, b}) in self._marked_pairs
+    @property
+    def marked_pairs(self):
+        return self._marked_pairs
+    def count_marked_pairs(self):
+        return len(self._marked_pairs)
+    def mark(self, a: Any, b: Any):
+        self._marked_pairs.add(frozenset({a, b}))
+    @classmethod
+    def from_regexes(cls, regexes: Dict[Any, str]):
+        patterns = {}
+        for k, r in regexes.items():
+            try:
+                patterns[k] = parse_pattern(r)
+            except (Unsupported, InvalidSyntax) as e:
+                logger.warning(f"Can't compile regex to Pattern for {k}\n     {repr(e)}")
+        return cls(patterns)

jaclang 0.8.4__py3-none-any.whl → 0.8.6__py3-none-any.whl

Potentially problematic release.

jaclang 0.8.4py3-none-any.whl → 0.8.6py3-none-any.whl