modwire 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,177 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from dataclasses import dataclass
5
+ from fnmatch import fnmatch
6
+ from pathlib import Path
7
+ from subprocess import run
8
+ from typing import Protocol
9
+
10
+ from ..definitions import SourceFile, SourceImport
11
+
12
+
13
+ @dataclass(frozen=True)
14
+ class SourceExtraction:
15
+ files: dict[str, SourceFile]
16
+ files_found: int
17
+ files_excluded: int
18
+
19
+
20
+ @dataclass(frozen=True)
21
+ class ExtractionTarget:
22
+ source_id: str
23
+ path: Path
24
+
25
+
26
+ class SourceExtractor(Protocol):
27
+ language: str
28
+ file_extensions: tuple[str, ...]
29
+ command: str
30
+ extractor_file: str
31
+
32
+ def normalize_source_id(self, value: str) -> str:
33
+ source_id = value.strip().strip("/")
34
+ for file_extension in self.file_extensions:
35
+ if source_id.endswith(file_extension):
36
+ return source_id[: -len(file_extension)]
37
+ return source_id
38
+
39
+ def normalize_import(
40
+ self,
41
+ source_id: str,
42
+ source_import: SourceImport,
43
+ known_source_ids: set[str],
44
+ ) -> SourceImport:
45
+ if source_import.normalized_path in known_source_ids:
46
+ return source_import
47
+ return SourceImport(
48
+ path=source_import.path,
49
+ is_relative=source_import.is_relative,
50
+ normalized_path=source_import.normalized_path.strip().strip("/"),
51
+ imported_name=source_import.imported_name,
52
+ is_aliased=source_import.is_aliased,
53
+ crossing_type=source_import.crossing_type,
54
+ file_barrier_crossed=False,
55
+ statement_id=source_import.statement_id,
56
+ join_key=source_import.join_key,
57
+ uses_joined_import=source_import.uses_joined_import,
58
+ )
59
+
60
+ def extract_files(
61
+ self,
62
+ sources_root: Path,
63
+ exclusions: tuple[str, ...],
64
+ ) -> SourceExtraction:
65
+ script = Path(__file__).parent / "scripts" / self.extractor_file
66
+ assert script.is_file(), f"Extractor script {script} not found"
67
+
68
+ targets, files_found, files_excluded = _collect_extraction_targets(
69
+ sources_root,
70
+ self.file_extensions,
71
+ exclusions,
72
+ )
73
+ result = {}
74
+ for target in targets:
75
+ cmd = [
76
+ self.command,
77
+ str(script),
78
+ str(target.path.resolve()),
79
+ str(sources_root.resolve()),
80
+ ]
81
+ result[self.normalize_source_id(target.source_id)] = (
82
+ SourceFile.model_validate(_json_from_output(cmd))
83
+ )
84
+
85
+ known_source_ids = set(result)
86
+ result = {
87
+ source_id: self.normalize_source_file(
88
+ source_id,
89
+ source_file,
90
+ known_source_ids,
91
+ )
92
+ for source_id, source_file in result.items()
93
+ }
94
+
95
+ return SourceExtraction(
96
+ files=result,
97
+ files_found=files_found,
98
+ files_excluded=files_excluded,
99
+ )
100
+
101
+ def normalize_source_file(
102
+ self,
103
+ source_id: str,
104
+ source_file: SourceFile,
105
+ known_source_ids: set[str],
106
+ ) -> SourceFile:
107
+ return SourceFile(
108
+ imports=[
109
+ self.normalize_import(source_id, source_import, known_source_ids)
110
+ for source_import in source_file.imports
111
+ ],
112
+ classes=source_file.classes,
113
+ interfaces=source_file.interfaces,
114
+ types=source_file.types,
115
+ abstract_classes=source_file.abstract_classes,
116
+ functions=source_file.functions,
117
+ line_count=source_file.line_count,
118
+ code_line_count=source_file.code_line_count,
119
+ public_symbol_count=source_file.public_symbol_count,
120
+ )
121
+
122
+
123
+ def _collect_extraction_targets(
124
+ sources_root: Path,
125
+ file_extensions: tuple[str, ...],
126
+ exclusions: tuple[str, ...],
127
+ ) -> tuple[tuple[ExtractionTarget, ...], int, int]:
128
+ targets = []
129
+ files_found = 0
130
+ files_excluded = 0
131
+ for path in sorted(sources_root.rglob("*")):
132
+ if path.suffix not in file_extensions:
133
+ continue
134
+
135
+ files_found += 1
136
+ source_id = path.relative_to(sources_root).as_posix()
137
+ if any(_matches_exclusion(source_id, exclusion) for exclusion in exclusions):
138
+ files_excluded += 1
139
+ continue
140
+
141
+ targets.append(ExtractionTarget(source_id, path))
142
+
143
+ return tuple(targets), files_found, files_excluded
144
+
145
+
146
+ def _json_from_output(cmd: list[str], input_json: str | None = None) -> dict:
147
+ output_json = run(
148
+ cmd,
149
+ capture_output=True,
150
+ text=True,
151
+ input=input_json,
152
+ check=True,
153
+ ).stdout
154
+
155
+ try:
156
+ return json.loads(output_json)
157
+ except json.JSONDecodeError as e:
158
+ raise ValueError(f"Failed to parse JSON from output: {output_json}") from e
159
+
160
+
161
+ def _matches_exclusion(source_id: str, exclusion: str) -> bool:
162
+ if fnmatch(source_id, exclusion):
163
+ return True
164
+
165
+ normalized = exclusion.strip("/")
166
+ has_glob = any(char in normalized for char in "*?[")
167
+ if not normalized or has_glob:
168
+ return False
169
+
170
+ return source_id.startswith(f"{normalized}/")
171
+
172
+
173
+ __all__ = [
174
+ "ExtractionTarget",
175
+ "SourceExtraction",
176
+ "SourceExtractor",
177
+ ]
@@ -0,0 +1,31 @@
1
+ from .base import SourceExtractor
2
+
3
+ from .php import PhpExtractor
4
+ from .python import PythonExtractor
5
+ from .typescript import TypeScriptExtractor
6
+
7
+
8
+ _map: dict[str, type[SourceExtractor]] = {
9
+ "python": PythonExtractor,
10
+ "typescript": TypeScriptExtractor,
11
+ "php": PhpExtractor,
12
+ }
13
+
14
+ _instances: dict[str, SourceExtractor] = {}
15
+
16
+
17
+ def supported_languages() -> tuple[str, ...]:
18
+ return tuple(_map)
19
+
20
+
21
+ def load_extractor(language: str) -> SourceExtractor:
22
+ assert language in _map, f"Unsupported language: {language}"
23
+
24
+ if language not in _instances:
25
+ _instances[language] = _map[language]()
26
+ return _instances[language]
27
+
28
+
29
+ def normalize_source_id(language: str, value: str) -> str:
30
+ normalized = value.replace("\\", "/").strip().strip("/")
31
+ return load_extractor(language).normalize_source_id(normalized)
@@ -0,0 +1,170 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import re
5
+ from dataclasses import dataclass
6
+ from pathlib import Path, PurePosixPath
7
+
8
+ from ..definitions import SourceFile, SourceImport
9
+ from .base import (
10
+ SourceExtraction,
11
+ SourceExtractor,
12
+ _collect_extraction_targets,
13
+ _json_from_output,
14
+ )
15
+
16
+
17
+ @dataclass(frozen=True)
18
+ class PhpExtractor(SourceExtractor):
19
+ language = "php"
20
+ file_extensions = (".php",)
21
+ command = "php"
22
+ extractor_file = "php_extractor.php"
23
+
24
+ def extract_files(
25
+ self,
26
+ sources_root: Path,
27
+ exclusions: tuple[str, ...],
28
+ ) -> SourceExtraction:
29
+ script = Path(__file__).parent / "scripts" / self.extractor_file
30
+ assert script.is_file(), f"Extractor script {script} not found"
31
+
32
+ targets, files_found, files_excluded = _collect_extraction_targets(
33
+ sources_root,
34
+ self.file_extensions,
35
+ exclusions,
36
+ )
37
+ if not targets:
38
+ return SourceExtraction(
39
+ files={},
40
+ files_found=files_found,
41
+ files_excluded=files_excluded,
42
+ )
43
+
44
+ input_data = {
45
+ self.normalize_source_id(target.source_id): str(target.path.resolve())
46
+ for target in targets
47
+ }
48
+ cmd = [self.command, str(script), "--batch", str(sources_root.resolve())]
49
+ raw_files = _json_from_output(cmd, json.dumps(input_data))
50
+ result = {
51
+ source_id: SourceFile.model_validate(source_file)
52
+ for source_id, source_file in raw_files.items()
53
+ }
54
+
55
+ known_source_ids = set(result)
56
+ result = {
57
+ source_id: self.normalize_source_file(
58
+ source_id,
59
+ source_file,
60
+ known_source_ids,
61
+ )
62
+ for source_id, source_file in result.items()
63
+ }
64
+
65
+ return SourceExtraction(
66
+ files=result,
67
+ files_found=files_found,
68
+ files_excluded=files_excluded,
69
+ )
70
+
71
+ def normalize_import(
72
+ self,
73
+ source_id: str,
74
+ source_import: SourceImport,
75
+ known_source_ids: set[str],
76
+ ) -> SourceImport:
77
+ normalized_path = self._known_source_id(
78
+ source_import.normalized_path,
79
+ known_source_ids,
80
+ )
81
+ return SourceImport(
82
+ path=source_import.path,
83
+ is_relative=source_import.is_relative,
84
+ normalized_path=normalized_path,
85
+ imported_name=source_import.imported_name,
86
+ is_aliased=source_import.is_aliased,
87
+ crossing_type=source_import.crossing_type,
88
+ file_barrier_crossed=(
89
+ source_import.file_barrier_crossed
90
+ and normalized_path in known_source_ids
91
+ ),
92
+ statement_id=source_import.statement_id,
93
+ join_key=self._normalized_join_key(normalized_path, source_import),
94
+ uses_joined_import=source_import.uses_joined_import,
95
+ )
96
+
97
+ def _normalized_join_key(
98
+ self,
99
+ normalized_path: str,
100
+ source_import: SourceImport,
101
+ ) -> str:
102
+ if not source_import.join_key or not normalized_path:
103
+ return ""
104
+ parts = normalized_path.split("/")
105
+ return "/".join(parts[:-1])
106
+
107
+ def _known_source_id(
108
+ self,
109
+ normalized_path: str,
110
+ known_source_ids: set[str],
111
+ ) -> str:
112
+ candidates = (normalized_path, *self._php_source_id_candidates(normalized_path))
113
+ for candidate in candidates:
114
+ if candidate in known_source_ids:
115
+ return candidate
116
+ match = self._unique_suffix_match(candidate, known_source_ids)
117
+ if match is not None:
118
+ return match
119
+
120
+ namespace_match = self._namespace_source_id(normalized_path, known_source_ids)
121
+ if namespace_match is not None:
122
+ return namespace_match
123
+ return normalized_path
124
+
125
+ def _php_source_id_candidates(self, normalized_path: str) -> tuple[str, ...]:
126
+ parts = [self._source_part(part) for part in normalized_path.split("/") if part]
127
+ return tuple("/".join(parts[index:]) for index in range(len(parts)))
128
+
129
+ def _namespace_source_id(
130
+ self,
131
+ normalized_path: str,
132
+ known_source_ids: set[str],
133
+ ) -> str | None:
134
+ namespace_matches: list[str] = []
135
+ for candidate in self._php_source_id_candidates(normalized_path):
136
+ namespace_path = PurePosixPath(candidate).parent.as_posix()
137
+ if not namespace_path or namespace_path == ".":
138
+ continue
139
+ namespace_suffix = f"/{namespace_path}"
140
+ namespace_matches.extend(
141
+ source_id
142
+ for source_id in known_source_ids
143
+ if self._source_parent(source_id) == namespace_path
144
+ or self._source_parent(source_id).endswith(namespace_suffix)
145
+ )
146
+
147
+ matches = sorted(set(namespace_matches))
148
+ return matches[0] if len(matches) == 1 else None
149
+
150
+ def _unique_suffix_match(
151
+ self,
152
+ candidate: str,
153
+ known_source_ids: set[str],
154
+ ) -> str | None:
155
+ suffix = f"/{candidate}"
156
+ matches = sorted(
157
+ source_id
158
+ for source_id in known_source_ids
159
+ if source_id == candidate or source_id.endswith(suffix)
160
+ )
161
+ return matches[0] if len(matches) == 1 else None
162
+
163
+ def _source_part(self, value: str) -> str:
164
+ return re.sub(r"(?<!^)(?=[A-Z])", "_", value).lower()
165
+
166
+ def _source_parent(self, source_id: str) -> str:
167
+ return PurePosixPath(source_id).parent.as_posix()
168
+
169
+
170
+ __all__ = ["PhpExtractor"]
@@ -0,0 +1,113 @@
1
+ from __future__ import annotations
2
+
3
+ import sys
4
+
5
+ from dataclasses import dataclass
6
+ from posixpath import normpath
7
+ from pathlib import PurePosixPath
8
+
9
+ from ..definitions import SourceImport
10
+ from .base import SourceExtractor
11
+
12
+
13
+ @dataclass(frozen=True)
14
+ class PythonExtractor(SourceExtractor):
15
+ language = "python"
16
+ file_extensions = (".py",)
17
+ command = sys.executable
18
+ extractor_file = "python_extractor.py"
19
+
20
+ def normalize_import(
21
+ self,
22
+ source_id: str,
23
+ source_import: SourceImport,
24
+ known_source_ids: set[str],
25
+ ) -> SourceImport:
26
+ normalized_path = self._normalize_relative_import(source_id, source_import)
27
+ crossing_type = source_import.crossing_type
28
+ if source_import.imported_name and source_import.imported_name != "*":
29
+ module_import_path = "/".join(
30
+ part for part in (normalized_path, source_import.imported_name) if part
31
+ )
32
+ module_source_id = self._known_source_id(module_import_path, known_source_ids)
33
+ if module_source_id in known_source_ids:
34
+ normalized_path = module_source_id
35
+ crossing_type = "module"
36
+ else:
37
+ normalized_path = self._known_source_id(normalized_path, known_source_ids)
38
+ else:
39
+ normalized_path = self._known_source_id(normalized_path, known_source_ids)
40
+
41
+ return SourceImport(
42
+ path=source_import.path,
43
+ is_relative=source_import.is_relative,
44
+ normalized_path=normalized_path,
45
+ imported_name=source_import.imported_name,
46
+ is_aliased=source_import.is_aliased,
47
+ crossing_type=crossing_type,
48
+ file_barrier_crossed=(
49
+ source_import.file_barrier_crossed
50
+ and normalized_path in known_source_ids
51
+ ),
52
+ statement_id=source_import.statement_id,
53
+ join_key=self._normalized_join_key(normalized_path, source_import),
54
+ uses_joined_import=source_import.uses_joined_import,
55
+ )
56
+
57
+ def _normalize_relative_import(
58
+ self,
59
+ source_id: str,
60
+ source_import: SourceImport,
61
+ ) -> str:
62
+ if not source_import.is_relative:
63
+ return source_import.normalized_path
64
+
65
+ level = len(source_import.path) - len(source_import.path.lstrip("."))
66
+ module_path = source_import.path[level:].replace(".", "/").strip("/")
67
+ package_path = PurePosixPath(source_id).parent
68
+ for _ in range(max(level - 1, 0)):
69
+ package_path = package_path.parent
70
+
71
+ return normpath(
72
+ "/".join(part for part in (package_path.as_posix(), module_path) if part)
73
+ )
74
+
75
+ def _known_source_id(
76
+ self,
77
+ normalized_path: str,
78
+ known_source_ids: set[str],
79
+ ) -> str:
80
+ candidates = (normalized_path, f"{normalized_path}/__init__")
81
+ for candidate in candidates:
82
+ if candidate in known_source_ids:
83
+ return candidate
84
+ match = self._unique_suffix_match(candidate, known_source_ids)
85
+ if match is not None:
86
+ return match
87
+ return normalized_path
88
+
89
+ def _normalized_join_key(
90
+ self,
91
+ normalized_path: str,
92
+ source_import: SourceImport,
93
+ ) -> str:
94
+ if not source_import.join_key or not normalized_path:
95
+ return ""
96
+ parent = PurePosixPath(normalized_path).parent.as_posix()
97
+ return "" if parent == "." else parent
98
+
99
+ def _unique_suffix_match(
100
+ self,
101
+ candidate: str,
102
+ known_source_ids: set[str],
103
+ ) -> str | None:
104
+ suffix = f"/{candidate}"
105
+ matches = sorted(
106
+ source_id
107
+ for source_id in known_source_ids
108
+ if source_id == candidate or source_id.endswith(suffix)
109
+ )
110
+ return matches[0] if len(matches) == 1 else None
111
+
112
+
113
+ __all__ = ["PythonExtractor"]