modwire 1.0.0__tar.gz → 1.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {modwire-1.0.0 → modwire-1.1.0}/.github/workflows/ci.yml +1 -0
- {modwire-1.0.0 → modwire-1.1.0}/PKG-INFO +2 -1
- {modwire-1.0.0 → modwire-1.1.0}/README.md +1 -0
- {modwire-1.0.0 → modwire-1.1.0}/src/modwire/__init__.py +3 -0
- {modwire-1.0.0 → modwire-1.1.0}/src/modwire/_version.py +3 -3
- {modwire-1.0.0 → modwire-1.1.0}/src/modwire/definitions.py +35 -0
- modwire-1.1.0/src/modwire/exports.py +188 -0
- modwire-1.1.0/src/modwire/extractors/base.py +340 -0
- {modwire-1.0.0 → modwire-1.1.0}/src/modwire/extractors/php.py +26 -1
- {modwire-1.0.0 → modwire-1.1.0}/src/modwire/extractors/python.py +38 -1
- {modwire-1.0.0 → modwire-1.1.0}/src/modwire/extractors/scripts/php_extractor.php +50 -0
- {modwire-1.0.0 → modwire-1.1.0}/src/modwire/extractors/scripts/python_extractor.py +211 -0
- {modwire-1.0.0 → modwire-1.1.0}/src/modwire/extractors/scripts/typescript_extractor.js +297 -4
- {modwire-1.0.0 → modwire-1.1.0}/src/modwire/extractors/typescript.py +29 -1
- {modwire-1.0.0 → modwire-1.1.0}/src/modwire.egg-info/PKG-INFO +2 -1
- {modwire-1.0.0 → modwire-1.1.0}/src/modwire.egg-info/SOURCES.txt +1 -0
- {modwire-1.0.0 → modwire-1.1.0}/tests/test_api.py +338 -0
- modwire-1.0.0/src/modwire/extractors/base.py +0 -177
- {modwire-1.0.0 → modwire-1.1.0}/.github/workflows/release.yml +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/.gitignore +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/LICENSE +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/pyproject.toml +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/setup.cfg +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/show_test_source_files.py +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/src/modwire/architecture/__init__.py +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/src/modwire/architecture/analyzers.py +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/src/modwire/architecture/matching.py +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/src/modwire/architecture/policy.py +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/src/modwire/architecture/render.py +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/src/modwire/architecture/violations.py +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/src/modwire/extraction.py +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/src/modwire/extractors/__init__.py +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/src/modwire/extractors/loader.py +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/src/modwire/graph.py +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/src/modwire.egg-info/dependency_links.txt +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/src/modwire.egg-info/requires.txt +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/src/modwire.egg-info/top_level.txt +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/tests/apps/php/ignored/generated.php +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/tests/apps/php/src/application/use_cases/activate.php +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/tests/apps/php/src/domain/model/user.php +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/tests/apps/php/src/domain/services/policy.php +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/tests/apps/php/src/interfaces/http/controller.php +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/tests/apps/python/ignored/generated.py +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/tests/apps/python/src/application/use_cases/activate.py +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/tests/apps/python/src/domain/model/user.py +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/tests/apps/python/src/domain/services/policy.py +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/tests/apps/python/src/interfaces/http/controller.py +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/tests/apps/typescript/ignored/generated.ts +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/tests/apps/typescript/src/application/use_cases/activate.ts +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/tests/apps/typescript/src/domain/model/profile.tsx +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/tests/apps/typescript/src/domain/model/user.ts +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/tests/apps/typescript/src/domain/services/audit.js +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/tests/apps/typescript/src/domain/services/policy.ts +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/tests/apps/typescript/src/interfaces/http/controller.ts +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/tests/apps/typescript/src/interfaces/http/view.jsx +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/tests/test_architecture_api.py +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/tests/test_standalone.py +0 -0
- {modwire-1.0.0 → modwire-1.1.0}/uv.lock +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: modwire
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.1.0
|
|
4
4
|
Summary: Extract source-code dependencies and build dependency graphs.
|
|
5
5
|
Author: Tomasz Szpak
|
|
6
6
|
License-Expression: MIT
|
|
@@ -70,6 +70,7 @@ TypeScript, and PHP projects can be compared through the same graph shape.
|
|
|
70
70
|
|
|
71
71
|
## Supported Languages
|
|
72
72
|
|
|
73
|
+
|
|
73
74
|
```python
|
|
74
75
|
from modwire import supported_languages
|
|
75
76
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from .extraction import CodeMap, extract_code
|
|
2
2
|
from .extractors.loader import normalize_source_id, supported_languages
|
|
3
|
+
from .exports import UnusedExport, find_unused_exports
|
|
3
4
|
from .graph import DependencyGraph, Edge, Node, build_dependency_graph
|
|
4
5
|
|
|
5
6
|
|
|
@@ -8,8 +9,10 @@ __all__ = [
|
|
|
8
9
|
"DependencyGraph",
|
|
9
10
|
"Edge",
|
|
10
11
|
"Node",
|
|
12
|
+
"UnusedExport",
|
|
11
13
|
"build_dependency_graph",
|
|
12
14
|
"extract_code",
|
|
15
|
+
"find_unused_exports",
|
|
13
16
|
"normalize_source_id",
|
|
14
17
|
"supported_languages",
|
|
15
18
|
]
|
|
@@ -18,7 +18,7 @@ version_tuple: tuple[int | str, ...]
|
|
|
18
18
|
commit_id: str | None
|
|
19
19
|
__commit_id__: str | None
|
|
20
20
|
|
|
21
|
-
__version__ = version = '1.
|
|
22
|
-
__version_tuple__ = version_tuple = (1,
|
|
21
|
+
__version__ = version = '1.1.0'
|
|
22
|
+
__version_tuple__ = version_tuple = (1, 1, 0)
|
|
23
23
|
|
|
24
|
-
__commit_id__ = commit_id = '
|
|
24
|
+
__commit_id__ = commit_id = 'g371c8ff6e'
|
|
@@ -6,6 +6,25 @@ from pydantic import BaseModel, Field
|
|
|
6
6
|
ImportCrossingType = Literal["module", "symbol"]
|
|
7
7
|
SourceVisibility = Literal["public", "protected", "private"]
|
|
8
8
|
SourceSignatureKind = Literal["call", "construct", "index"]
|
|
9
|
+
SourceExportKind = Literal[
|
|
10
|
+
"module",
|
|
11
|
+
"class",
|
|
12
|
+
"interface",
|
|
13
|
+
"type",
|
|
14
|
+
"abstract_class",
|
|
15
|
+
"function",
|
|
16
|
+
"value",
|
|
17
|
+
"unknown",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class SourceImportedSymbol(BaseModel):
|
|
22
|
+
name: str
|
|
23
|
+
alias: str
|
|
24
|
+
is_aliased: bool
|
|
25
|
+
is_default: bool
|
|
26
|
+
is_namespace: bool
|
|
27
|
+
is_star: bool
|
|
9
28
|
|
|
10
29
|
|
|
11
30
|
class SourceImport(BaseModel):
|
|
@@ -19,6 +38,21 @@ class SourceImport(BaseModel):
|
|
|
19
38
|
statement_id: int
|
|
20
39
|
join_key: str
|
|
21
40
|
uses_joined_import: bool
|
|
41
|
+
imported_symbols: list[SourceImportedSymbol] = Field(default_factory=list)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class SourceExport(BaseModel):
|
|
45
|
+
name: str
|
|
46
|
+
local_name: str
|
|
47
|
+
kind: SourceExportKind
|
|
48
|
+
crossing_type: ImportCrossingType
|
|
49
|
+
path: str
|
|
50
|
+
is_relative: bool
|
|
51
|
+
normalized_path: str
|
|
52
|
+
is_reexport: bool
|
|
53
|
+
is_default: bool
|
|
54
|
+
is_aliased: bool
|
|
55
|
+
statement_id: int
|
|
22
56
|
|
|
23
57
|
|
|
24
58
|
class SourceFunction(BaseModel):
|
|
@@ -91,6 +125,7 @@ class SourceAbstractClass(BaseModel):
|
|
|
91
125
|
|
|
92
126
|
class SourceFile(BaseModel):
|
|
93
127
|
imports: list[SourceImport]
|
|
128
|
+
exports: list[SourceExport] = Field(default_factory=list)
|
|
94
129
|
classes: list[SourceClass]
|
|
95
130
|
interfaces: list[SourceInterface] = Field(default_factory=list)
|
|
96
131
|
types: list[SourceType] = Field(default_factory=list)
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
from .definitions import SourceExport, SourceFile, SourceImportedSymbol, SourceImport
|
|
6
|
+
from .extraction import ExtractionResult
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass(frozen=True)
|
|
10
|
+
class UnusedExport:
|
|
11
|
+
source_id: str
|
|
12
|
+
name: str
|
|
13
|
+
kind: str
|
|
14
|
+
crossing_type: str
|
|
15
|
+
reason: str
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def find_unused_exports(
|
|
19
|
+
extraction_result: ExtractionResult | dict[str, SourceFile],
|
|
20
|
+
) -> tuple[UnusedExport, ...]:
|
|
21
|
+
files = _source_files(extraction_result)
|
|
22
|
+
exports_by_source = {
|
|
23
|
+
source_id: tuple(source_file.exports)
|
|
24
|
+
for source_id, source_file in files.items()
|
|
25
|
+
}
|
|
26
|
+
export_index: dict[tuple[str, str, str], list[SourceExport]] = {}
|
|
27
|
+
module_exports: dict[str, list[SourceExport]] = {}
|
|
28
|
+
star_reexports: dict[str, list[SourceExport]] = {}
|
|
29
|
+
|
|
30
|
+
for source_id, source_exports in exports_by_source.items():
|
|
31
|
+
for source_export in source_exports:
|
|
32
|
+
export_index.setdefault(
|
|
33
|
+
(source_id, source_export.name, source_export.crossing_type),
|
|
34
|
+
[],
|
|
35
|
+
).append(source_export)
|
|
36
|
+
if source_export.kind == "module" and source_export.crossing_type == "module":
|
|
37
|
+
module_exports.setdefault(source_id, []).append(source_export)
|
|
38
|
+
if (
|
|
39
|
+
source_export.is_reexport
|
|
40
|
+
and source_export.crossing_type == "module"
|
|
41
|
+
and source_export.name == "*"
|
|
42
|
+
):
|
|
43
|
+
star_reexports.setdefault(source_id, []).append(source_export)
|
|
44
|
+
|
|
45
|
+
used: set[tuple[str, str, str, str, str, bool]] = set()
|
|
46
|
+
|
|
47
|
+
def mark_export(source_id: str, source_export: SourceExport) -> None:
|
|
48
|
+
key = _export_key(source_id, source_export)
|
|
49
|
+
if key in used:
|
|
50
|
+
return
|
|
51
|
+
used.add(key)
|
|
52
|
+
if not source_export.is_reexport:
|
|
53
|
+
return
|
|
54
|
+
if source_export.normalized_path not in files:
|
|
55
|
+
return
|
|
56
|
+
mark_module(source_export.normalized_path)
|
|
57
|
+
if source_export.crossing_type == "symbol":
|
|
58
|
+
mark_symbol(
|
|
59
|
+
source_export.normalized_path,
|
|
60
|
+
source_export.local_name,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
def mark_module(source_id: str) -> None:
|
|
64
|
+
for source_export in module_exports.get(source_id, ()):
|
|
65
|
+
mark_export(source_id, source_export)
|
|
66
|
+
|
|
67
|
+
def mark_symbol(source_id: str, name: str) -> None:
|
|
68
|
+
matched_exports = export_index.get((source_id, name, "symbol"), [])
|
|
69
|
+
if matched_exports:
|
|
70
|
+
for source_export in matched_exports:
|
|
71
|
+
mark_export(source_id, source_export)
|
|
72
|
+
return
|
|
73
|
+
|
|
74
|
+
for source_export in star_reexports.get(source_id, ()):
|
|
75
|
+
mark_export(source_id, source_export)
|
|
76
|
+
if source_export.normalized_path in files:
|
|
77
|
+
mark_module(source_export.normalized_path)
|
|
78
|
+
mark_symbol(source_export.normalized_path, name)
|
|
79
|
+
|
|
80
|
+
def mark_all_symbols(source_id: str) -> None:
|
|
81
|
+
for source_export in exports_by_source.get(source_id, ()):
|
|
82
|
+
if source_export.crossing_type == "symbol":
|
|
83
|
+
mark_export(source_id, source_export)
|
|
84
|
+
|
|
85
|
+
for source_id, source_file in files.items():
|
|
86
|
+
for source_import in source_file.imports:
|
|
87
|
+
target_id = source_import.normalized_path
|
|
88
|
+
if target_id not in files or target_id == source_id:
|
|
89
|
+
continue
|
|
90
|
+
|
|
91
|
+
symbols = _imported_symbols(source_import)
|
|
92
|
+
if source_import.crossing_type == "symbol" and symbols:
|
|
93
|
+
usable_symbols = [
|
|
94
|
+
symbol
|
|
95
|
+
for symbol in symbols
|
|
96
|
+
if not _is_reexport_plumbing(
|
|
97
|
+
exports_by_source.get(source_id, ()),
|
|
98
|
+
target_id,
|
|
99
|
+
symbol,
|
|
100
|
+
)
|
|
101
|
+
]
|
|
102
|
+
if not usable_symbols:
|
|
103
|
+
continue
|
|
104
|
+
mark_module(target_id)
|
|
105
|
+
for symbol in usable_symbols:
|
|
106
|
+
if symbol.is_namespace:
|
|
107
|
+
continue
|
|
108
|
+
if symbol.is_star or symbol.name == "*":
|
|
109
|
+
mark_all_symbols(target_id)
|
|
110
|
+
else:
|
|
111
|
+
mark_symbol(target_id, symbol.name)
|
|
112
|
+
continue
|
|
113
|
+
|
|
114
|
+
mark_module(target_id)
|
|
115
|
+
|
|
116
|
+
unused = []
|
|
117
|
+
for source_id, source_exports in exports_by_source.items():
|
|
118
|
+
for source_export in source_exports:
|
|
119
|
+
if _export_key(source_id, source_export) in used:
|
|
120
|
+
continue
|
|
121
|
+
unused.append(
|
|
122
|
+
UnusedExport(
|
|
123
|
+
source_id=source_id,
|
|
124
|
+
name=source_export.name,
|
|
125
|
+
kind=source_export.kind,
|
|
126
|
+
crossing_type=source_export.crossing_type,
|
|
127
|
+
reason="export is not imported by another source file",
|
|
128
|
+
)
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
return tuple(unused)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _source_files(
|
|
135
|
+
extraction_result: ExtractionResult | dict[str, SourceFile],
|
|
136
|
+
) -> dict[str, SourceFile]:
|
|
137
|
+
if isinstance(extraction_result, dict):
|
|
138
|
+
return extraction_result
|
|
139
|
+
return extraction_result.files
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _imported_symbols(source_import: SourceImport) -> tuple[SourceImportedSymbol, ...]:
|
|
143
|
+
if source_import.imported_symbols:
|
|
144
|
+
return tuple(source_import.imported_symbols)
|
|
145
|
+
if not source_import.imported_name:
|
|
146
|
+
return ()
|
|
147
|
+
return (
|
|
148
|
+
SourceImportedSymbol(
|
|
149
|
+
name=source_import.imported_name,
|
|
150
|
+
alias="",
|
|
151
|
+
is_aliased=False,
|
|
152
|
+
is_default=False,
|
|
153
|
+
is_namespace=False,
|
|
154
|
+
is_star=source_import.imported_name == "*",
|
|
155
|
+
),
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _is_reexport_plumbing(
|
|
160
|
+
source_exports: tuple[SourceExport, ...],
|
|
161
|
+
target_id: str,
|
|
162
|
+
symbol: SourceImportedSymbol,
|
|
163
|
+
) -> bool:
|
|
164
|
+
for source_export in source_exports:
|
|
165
|
+
if not source_export.is_reexport or source_export.normalized_path != target_id:
|
|
166
|
+
continue
|
|
167
|
+
if source_export.name == "*":
|
|
168
|
+
return True
|
|
169
|
+
if source_export.local_name == symbol.name or source_export.name == symbol.name:
|
|
170
|
+
return True
|
|
171
|
+
return False
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _export_key(
|
|
175
|
+
source_id: str,
|
|
176
|
+
source_export: SourceExport,
|
|
177
|
+
) -> tuple[str, str, str, str, str, bool]:
|
|
178
|
+
return (
|
|
179
|
+
source_id,
|
|
180
|
+
source_export.name,
|
|
181
|
+
source_export.crossing_type,
|
|
182
|
+
source_export.normalized_path,
|
|
183
|
+
source_export.local_name,
|
|
184
|
+
source_export.is_reexport,
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
__all__ = ["UnusedExport", "find_unused_exports"]
|
|
@@ -0,0 +1,340 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from fnmatch import fnmatch
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from subprocess import run
|
|
8
|
+
from typing import Protocol
|
|
9
|
+
|
|
10
|
+
from ..definitions import SourceExport, SourceFile, SourceImport
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True)
|
|
14
|
+
class SourceExtraction:
|
|
15
|
+
files: dict[str, SourceFile]
|
|
16
|
+
files_found: int
|
|
17
|
+
files_excluded: int
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass(frozen=True)
|
|
21
|
+
class ExtractionTarget:
|
|
22
|
+
source_id: str
|
|
23
|
+
path: Path
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class SourceExtractor(Protocol):
|
|
27
|
+
language: str
|
|
28
|
+
file_extensions: tuple[str, ...]
|
|
29
|
+
command: str
|
|
30
|
+
extractor_file: str
|
|
31
|
+
|
|
32
|
+
def normalize_source_id(self, value: str) -> str:
|
|
33
|
+
source_id = value.strip().strip("/")
|
|
34
|
+
for file_extension in self.file_extensions:
|
|
35
|
+
if source_id.endswith(file_extension):
|
|
36
|
+
return source_id[: -len(file_extension)]
|
|
37
|
+
return source_id
|
|
38
|
+
|
|
39
|
+
def normalize_import(
|
|
40
|
+
self,
|
|
41
|
+
source_id: str,
|
|
42
|
+
source_import: SourceImport,
|
|
43
|
+
known_source_ids: set[str],
|
|
44
|
+
) -> SourceImport:
|
|
45
|
+
if source_import.normalized_path in known_source_ids:
|
|
46
|
+
return source_import
|
|
47
|
+
return SourceImport(
|
|
48
|
+
path=source_import.path,
|
|
49
|
+
is_relative=source_import.is_relative,
|
|
50
|
+
normalized_path=source_import.normalized_path.strip().strip("/"),
|
|
51
|
+
imported_name=source_import.imported_name,
|
|
52
|
+
is_aliased=source_import.is_aliased,
|
|
53
|
+
crossing_type=source_import.crossing_type,
|
|
54
|
+
file_barrier_crossed=False,
|
|
55
|
+
statement_id=source_import.statement_id,
|
|
56
|
+
join_key=source_import.join_key,
|
|
57
|
+
uses_joined_import=source_import.uses_joined_import,
|
|
58
|
+
imported_symbols=source_import.imported_symbols,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
def normalize_export(
|
|
62
|
+
self,
|
|
63
|
+
source_id: str,
|
|
64
|
+
source_export: SourceExport,
|
|
65
|
+
known_source_ids: set[str],
|
|
66
|
+
) -> SourceExport:
|
|
67
|
+
return SourceExport(
|
|
68
|
+
name=source_export.name,
|
|
69
|
+
local_name=source_export.local_name,
|
|
70
|
+
kind=source_export.kind,
|
|
71
|
+
crossing_type=source_export.crossing_type,
|
|
72
|
+
path=source_export.path,
|
|
73
|
+
is_relative=source_export.is_relative,
|
|
74
|
+
normalized_path=source_export.normalized_path.strip().strip("/"),
|
|
75
|
+
is_reexport=source_export.is_reexport,
|
|
76
|
+
is_default=source_export.is_default,
|
|
77
|
+
is_aliased=source_export.is_aliased,
|
|
78
|
+
statement_id=source_export.statement_id,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
def extract_files(
|
|
82
|
+
self,
|
|
83
|
+
sources_root: Path,
|
|
84
|
+
exclusions: tuple[str, ...],
|
|
85
|
+
) -> SourceExtraction:
|
|
86
|
+
script = Path(__file__).parent / "scripts" / self.extractor_file
|
|
87
|
+
assert script.is_file(), f"Extractor script {script} not found"
|
|
88
|
+
|
|
89
|
+
targets, files_found, files_excluded = _collect_extraction_targets(
|
|
90
|
+
sources_root,
|
|
91
|
+
self.file_extensions,
|
|
92
|
+
exclusions,
|
|
93
|
+
)
|
|
94
|
+
if not targets:
|
|
95
|
+
return SourceExtraction(
|
|
96
|
+
files={},
|
|
97
|
+
files_found=files_found,
|
|
98
|
+
files_excluded=files_excluded,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
input_data = {
|
|
102
|
+
self.normalize_source_id(target.source_id): str(target.path.resolve())
|
|
103
|
+
for target in targets
|
|
104
|
+
}
|
|
105
|
+
cmd = [self.command, str(script), "--batch", str(sources_root.resolve())]
|
|
106
|
+
raw_files = _json_from_output(cmd, json.dumps(input_data))
|
|
107
|
+
result = {
|
|
108
|
+
source_id: SourceFile.model_validate(source_file)
|
|
109
|
+
for source_id, source_file in raw_files.items()
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
known_source_ids = set(result)
|
|
113
|
+
result = {
|
|
114
|
+
source_id: self.normalize_source_file(
|
|
115
|
+
source_id,
|
|
116
|
+
source_file,
|
|
117
|
+
known_source_ids,
|
|
118
|
+
)
|
|
119
|
+
for source_id, source_file in result.items()
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
return SourceExtraction(
|
|
123
|
+
files=result,
|
|
124
|
+
files_found=files_found,
|
|
125
|
+
files_excluded=files_excluded,
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
def normalize_source_file(
|
|
129
|
+
self,
|
|
130
|
+
source_id: str,
|
|
131
|
+
source_file: SourceFile,
|
|
132
|
+
known_source_ids: set[str],
|
|
133
|
+
) -> SourceFile:
|
|
134
|
+
exports = [
|
|
135
|
+
self.normalize_export(source_id, source_export, known_source_ids)
|
|
136
|
+
for source_export in source_file.exports
|
|
137
|
+
]
|
|
138
|
+
exports = self._with_module_export(source_id, exports)
|
|
139
|
+
|
|
140
|
+
return SourceFile(
|
|
141
|
+
imports=[
|
|
142
|
+
self.normalize_import(source_id, source_import, known_source_ids)
|
|
143
|
+
for source_import in source_file.imports
|
|
144
|
+
],
|
|
145
|
+
exports=exports,
|
|
146
|
+
classes=source_file.classes,
|
|
147
|
+
interfaces=source_file.interfaces,
|
|
148
|
+
types=source_file.types,
|
|
149
|
+
abstract_classes=source_file.abstract_classes,
|
|
150
|
+
functions=source_file.functions,
|
|
151
|
+
line_count=source_file.line_count,
|
|
152
|
+
code_line_count=source_file.code_line_count,
|
|
153
|
+
public_symbol_count=source_file.public_symbol_count,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
def _with_module_export(
|
|
157
|
+
self,
|
|
158
|
+
source_id: str,
|
|
159
|
+
exports: list[SourceExport],
|
|
160
|
+
) -> list[SourceExport]:
|
|
161
|
+
module_export = SourceExport(
|
|
162
|
+
name=source_id,
|
|
163
|
+
local_name=source_id,
|
|
164
|
+
kind="module",
|
|
165
|
+
crossing_type="module",
|
|
166
|
+
path=source_id,
|
|
167
|
+
is_relative=False,
|
|
168
|
+
normalized_path=source_id,
|
|
169
|
+
is_reexport=False,
|
|
170
|
+
is_default=False,
|
|
171
|
+
is_aliased=False,
|
|
172
|
+
statement_id=0,
|
|
173
|
+
)
|
|
174
|
+
seen = {
|
|
175
|
+
(
|
|
176
|
+
source_export.name,
|
|
177
|
+
source_export.kind,
|
|
178
|
+
source_export.crossing_type,
|
|
179
|
+
source_export.normalized_path,
|
|
180
|
+
)
|
|
181
|
+
for source_export in exports
|
|
182
|
+
}
|
|
183
|
+
module_key = (
|
|
184
|
+
module_export.name,
|
|
185
|
+
module_export.kind,
|
|
186
|
+
module_export.crossing_type,
|
|
187
|
+
module_export.normalized_path,
|
|
188
|
+
)
|
|
189
|
+
if module_key in seen:
|
|
190
|
+
return exports
|
|
191
|
+
return [module_export, *exports]
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _collect_extraction_targets(
|
|
195
|
+
sources_root: Path,
|
|
196
|
+
file_extensions: tuple[str, ...],
|
|
197
|
+
exclusions: tuple[str, ...],
|
|
198
|
+
) -> tuple[tuple[ExtractionTarget, ...], int, int]:
|
|
199
|
+
directory_exclusions, file_exclusions = _partition_exclusions(
|
|
200
|
+
exclusions,
|
|
201
|
+
file_extensions,
|
|
202
|
+
)
|
|
203
|
+
targets: list[ExtractionTarget] = []
|
|
204
|
+
files_found = 0
|
|
205
|
+
files_excluded = 0
|
|
206
|
+
|
|
207
|
+
def walk(directory: Path, relative_dir: str = "") -> None:
|
|
208
|
+
nonlocal files_found, files_excluded
|
|
209
|
+
directories: list[tuple[str, Path]] = []
|
|
210
|
+
files: list[tuple[str, Path]] = []
|
|
211
|
+
|
|
212
|
+
for entry in sorted(directory.iterdir(), key=lambda path: path.name):
|
|
213
|
+
source_id = (
|
|
214
|
+
f"{relative_dir}/{entry.name}" if relative_dir else entry.name
|
|
215
|
+
)
|
|
216
|
+
if entry.is_dir() and not entry.is_symlink():
|
|
217
|
+
if _matches_directory_exclusion(source_id, directory_exclusions):
|
|
218
|
+
# Exact file counts would require descending into the pruned tree.
|
|
219
|
+
files_found += 1
|
|
220
|
+
files_excluded += 1
|
|
221
|
+
continue
|
|
222
|
+
directories.append((source_id, entry))
|
|
223
|
+
continue
|
|
224
|
+
|
|
225
|
+
if entry.is_file() and entry.suffix in file_extensions:
|
|
226
|
+
files.append((source_id, entry))
|
|
227
|
+
|
|
228
|
+
for source_id, path in files:
|
|
229
|
+
files_found += 1
|
|
230
|
+
if any(
|
|
231
|
+
_matches_exclusion(source_id, exclusion)
|
|
232
|
+
for exclusion in file_exclusions
|
|
233
|
+
):
|
|
234
|
+
files_excluded += 1
|
|
235
|
+
continue
|
|
236
|
+
|
|
237
|
+
targets.append(ExtractionTarget(source_id, path))
|
|
238
|
+
|
|
239
|
+
for source_id, path in directories:
|
|
240
|
+
walk(path, source_id)
|
|
241
|
+
|
|
242
|
+
walk(sources_root)
|
|
243
|
+
|
|
244
|
+
return tuple(targets), files_found, files_excluded
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _json_from_output(cmd: list[str], input_json: str | None = None) -> dict:
|
|
248
|
+
output_json = run(
|
|
249
|
+
cmd,
|
|
250
|
+
capture_output=True,
|
|
251
|
+
text=True,
|
|
252
|
+
input=input_json,
|
|
253
|
+
check=True,
|
|
254
|
+
).stdout
|
|
255
|
+
|
|
256
|
+
try:
|
|
257
|
+
return json.loads(output_json)
|
|
258
|
+
except json.JSONDecodeError as e:
|
|
259
|
+
raise ValueError(f"Failed to parse JSON from output: {output_json}") from e
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def _matches_exclusion(source_id: str, exclusion: str) -> bool:
|
|
263
|
+
normalized = exclusion.replace("\\", "/").strip().strip("/")
|
|
264
|
+
if not normalized:
|
|
265
|
+
return False
|
|
266
|
+
|
|
267
|
+
if _matches_path_pattern(source_id, normalized):
|
|
268
|
+
return True
|
|
269
|
+
|
|
270
|
+
has_glob = any(char in normalized for char in "*?[")
|
|
271
|
+
if not normalized or has_glob:
|
|
272
|
+
return False
|
|
273
|
+
|
|
274
|
+
return source_id.startswith(f"{normalized}/")
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def _partition_exclusions(
|
|
278
|
+
exclusions: tuple[str, ...],
|
|
279
|
+
file_extensions: tuple[str, ...],
|
|
280
|
+
) -> tuple[tuple[str, ...], tuple[str, ...]]:
|
|
281
|
+
directory_exclusions = []
|
|
282
|
+
file_exclusions = []
|
|
283
|
+
for exclusion in exclusions:
|
|
284
|
+
normalized = exclusion.replace("\\", "/").strip()
|
|
285
|
+
if not normalized:
|
|
286
|
+
continue
|
|
287
|
+
if _is_recursive_directory_exclusion(normalized, file_extensions):
|
|
288
|
+
directory_exclusions.append(_directory_exclusion_pattern(normalized))
|
|
289
|
+
else:
|
|
290
|
+
file_exclusions.append(normalized.strip("/"))
|
|
291
|
+
return tuple(directory_exclusions), tuple(file_exclusions)
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def _is_recursive_directory_exclusion(
|
|
295
|
+
exclusion: str,
|
|
296
|
+
file_extensions: tuple[str, ...],
|
|
297
|
+
) -> bool:
|
|
298
|
+
normalized = exclusion.strip("/")
|
|
299
|
+
if not normalized:
|
|
300
|
+
return False
|
|
301
|
+
if exclusion.endswith("/") or normalized.endswith("/**"):
|
|
302
|
+
return True
|
|
303
|
+
if any(char in normalized for char in "*?["):
|
|
304
|
+
return False
|
|
305
|
+
return not normalized.endswith(file_extensions)
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def _directory_exclusion_pattern(exclusion: str) -> str:
|
|
309
|
+
normalized = exclusion.strip("/")
|
|
310
|
+
if normalized.endswith("/**"):
|
|
311
|
+
return normalized[:-3].rstrip("/")
|
|
312
|
+
return normalized
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def _matches_directory_exclusion(
|
|
316
|
+
source_id: str,
|
|
317
|
+
directory_exclusions: tuple[str, ...],
|
|
318
|
+
) -> bool:
|
|
319
|
+
for exclusion in directory_exclusions:
|
|
320
|
+
if _matches_path_pattern(source_id, exclusion):
|
|
321
|
+
return True
|
|
322
|
+
if "/" not in exclusion and not any(char in exclusion for char in "*?["):
|
|
323
|
+
if Path(source_id).name == exclusion:
|
|
324
|
+
return True
|
|
325
|
+
return False
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def _matches_path_pattern(source_id: str, pattern: str) -> bool:
|
|
329
|
+
if fnmatch(source_id, pattern):
|
|
330
|
+
return True
|
|
331
|
+
if pattern.startswith("**/") and fnmatch(source_id, pattern[3:]):
|
|
332
|
+
return True
|
|
333
|
+
return False
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
__all__ = [
|
|
337
|
+
"ExtractionTarget",
|
|
338
|
+
"SourceExtraction",
|
|
339
|
+
"SourceExtractor",
|
|
340
|
+
]
|
|
@@ -5,7 +5,7 @@ import re
|
|
|
5
5
|
from dataclasses import dataclass
|
|
6
6
|
from pathlib import Path, PurePosixPath
|
|
7
7
|
|
|
8
|
-
from ..definitions import SourceFile, SourceImport
|
|
8
|
+
from ..definitions import SourceExport, SourceFile, SourceImport
|
|
9
9
|
from .base import (
|
|
10
10
|
SourceExtraction,
|
|
11
11
|
SourceExtractor,
|
|
@@ -92,6 +92,31 @@ class PhpExtractor(SourceExtractor):
|
|
|
92
92
|
statement_id=source_import.statement_id,
|
|
93
93
|
join_key=self._normalized_join_key(normalized_path, source_import),
|
|
94
94
|
uses_joined_import=source_import.uses_joined_import,
|
|
95
|
+
imported_symbols=source_import.imported_symbols,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
def normalize_export(
|
|
99
|
+
self,
|
|
100
|
+
source_id: str,
|
|
101
|
+
source_export: SourceExport,
|
|
102
|
+
known_source_ids: set[str],
|
|
103
|
+
) -> SourceExport:
|
|
104
|
+
normalized_path = source_export.normalized_path
|
|
105
|
+
if normalized_path:
|
|
106
|
+
normalized_path = self._known_source_id(normalized_path, known_source_ids)
|
|
107
|
+
|
|
108
|
+
return SourceExport(
|
|
109
|
+
name=source_export.name,
|
|
110
|
+
local_name=source_export.local_name,
|
|
111
|
+
kind=source_export.kind,
|
|
112
|
+
crossing_type=source_export.crossing_type,
|
|
113
|
+
path=source_export.path,
|
|
114
|
+
is_relative=source_export.is_relative,
|
|
115
|
+
normalized_path=normalized_path,
|
|
116
|
+
is_reexport=source_export.is_reexport,
|
|
117
|
+
is_default=source_export.is_default,
|
|
118
|
+
is_aliased=source_export.is_aliased,
|
|
119
|
+
statement_id=source_export.statement_id,
|
|
95
120
|
)
|
|
96
121
|
|
|
97
122
|
def _normalized_join_key(
|