mafia-graph 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. mafia/__init__.py +19 -0
  2. mafia/api.py +240 -0
  3. mafia/compat.py +8 -0
  4. mafia/config.py +16 -0
  5. mafia/graph.py +67 -0
  6. mafia/ids.py +60 -0
  7. mafia/migrate.py +92 -0
  8. mafia/plugins.py +64 -0
  9. mafia/skill.py +183 -0
  10. mafia_cli.py +1156 -0
  11. mafia_graph-0.9.0.dist-info/METADATA +711 -0
  12. mafia_graph-0.9.0.dist-info/RECORD +134 -0
  13. mafia_graph-0.9.0.dist-info/WHEEL +5 -0
  14. mafia_graph-0.9.0.dist-info/entry_points.txt +26 -0
  15. mafia_graph-0.9.0.dist-info/licenses/LICENSE +21 -0
  16. mafia_graph-0.9.0.dist-info/top_level.txt +19 -0
  17. phase10_ast/__init__.py +1 -0
  18. phase10_ast/run_ast_summaries.py +655 -0
  19. phase10_intelligence/__init__.py +1 -0
  20. phase10_intelligence/component_intelligence.py +647 -0
  21. phase10_intelligence/flow_intelligence.py +399 -0
  22. phase10_intelligence/issue_diagnosis.py +337 -0
  23. phase10_intelligence/provenance.py +84 -0
  24. phase11_search/__init__.py +1 -0
  25. phase11_search/bigquery_store.py +168 -0
  26. phase11_search/chat_engine.py +381 -0
  27. phase11_search/code_reader.py +264 -0
  28. phase11_search/document_builder.py +433 -0
  29. phase11_search/graph_retriever.py +317 -0
  30. phase11_search/ingestion/__init__.py +1 -0
  31. phase11_search/ingestion/doc_extractor.py +252 -0
  32. phase11_search/ingestion/doc_store.py +309 -0
  33. phase11_search/pipeline_runner.py +410 -0
  34. phase11_search/search_api.py +1292 -0
  35. phase11_search/search_api1.py +2404 -0
  36. phase11_search/vector_index.py +425 -0
  37. phase12_ui/__init__.py +0 -0
  38. phase12_ui/index.html +175 -0
  39. phase12_ui/landing.html +343 -0
  40. phase12_ui/static/app.js +3431 -0
  41. phase12_ui/static/chat.js +543 -0
  42. phase12_ui/static/landing.css +1422 -0
  43. phase12_ui/static/landing.js +812 -0
  44. phase12_ui/static/styles.css +2137 -0
  45. phase12_ui/static/v2/app.js +3110 -0
  46. phase12_ui/static/v2/styles.css +1514 -0
  47. phase12_ui/v2/index.html +152 -0
  48. phase13_evaluation/__init__.py +1 -0
  49. phase13_evaluation/gold_dataset_builder.py +171 -0
  50. phase13_evaluation/metric_calculators.py +179 -0
  51. phase14_orchestration/__init__.py +1 -0
  52. phase14_orchestration/artifact_versioning.py +134 -0
  53. phase14_orchestration/orchestrator.py +17 -0
  54. phase14_orchestration/run_state.py +103 -0
  55. phase2_repo_intake/__init__.py +1 -0
  56. phase2_repo_intake/arch_detector.py +526 -0
  57. phase2_repo_intake/manifest_writer.py +83 -0
  58. phase2_repo_intake/repo_scanner.py +667 -0
  59. phase2_repo_intake/scope_discovery.py +142 -0
  60. phase2_repo_intake/source_adapters.py +153 -0
  61. phase2_repo_intake/stack_detector.py +140 -0
  62. phase3_extraction_framework/__init__.py +1 -0
  63. phase3_extraction_framework/base_extractor.py +407 -0
  64. phase3_extraction_framework/cross_scope/__init__.py +97 -0
  65. phase3_extraction_framework/cross_scope/artifact_stitcher.py +54 -0
  66. phase3_extraction_framework/cross_scope/cross_workspace_imports.py +104 -0
  67. phase3_extraction_framework/cross_scope/shared_env_stitcher.py +50 -0
  68. phase3_extraction_framework/cross_scope/shared_lib_stitcher.py +61 -0
  69. phase3_extraction_framework/cross_scope/shared_table_stitcher.py +70 -0
  70. phase3_extraction_framework/cross_scope/workspace_dep_stitcher.py +102 -0
  71. phase3_extraction_framework/dispatcher.py +311 -0
  72. phase3_extraction_framework/run_extraction.py +428 -0
  73. phase3_extraction_framework/skeleton_extractors.py +224 -0
  74. phase3_extraction_framework/structural_relations.py +1686 -0
  75. phase3_extraction_framework/symbol_resolver.py +375 -0
  76. phase4_extractors/__init__.py +1 -0
  77. phase4_extractors/api/__init__.py +1 -0
  78. phase4_extractors/api/api_extractor.py +595 -0
  79. phase4_extractors/ast_helpers.py +268 -0
  80. phase4_extractors/bff/__init__.py +1 -0
  81. phase4_extractors/bff/bff_extractor.py +377 -0
  82. phase4_extractors/config/__init__.py +1 -0
  83. phase4_extractors/config/config_extractor.py +132 -0
  84. phase4_extractors/db/__init__.py +1 -0
  85. phase4_extractors/db/bigquery_extractor.py +285 -0
  86. phase4_extractors/db/databricks_extractor.py +274 -0
  87. phase4_extractors/db/pg_sql_extractor.py +529 -0
  88. phase4_extractors/generic/__init__.py +0 -0
  89. phase4_extractors/generic/ast_extractor.py +293 -0
  90. phase4_extractors/ts_extractor.py +551 -0
  91. phase4_extractors/ui/__init__.py +1 -0
  92. phase4_extractors/ui/ui_extractor.py +643 -0
  93. phase5_compatibility/__init__.py +1 -0
  94. phase5_compatibility/legacy_generators.py +271 -0
  95. phase6_coverage/__init__.py +1 -0
  96. phase6_coverage/coverage_builder.py +509 -0
  97. phase7_genai/__init__.py +1 -0
  98. phase7_genai/gemini_client.py +212 -0
  99. phase7_genai/pattern_discovery.py +559 -0
  100. phase7_genai/prompt_templates.py +181 -0
  101. phase7_mapping/__init__.py +1 -0
  102. phase7_mapping/mapping_evaluator.py +246 -0
  103. phase8_hardening/__init__.py +1 -0
  104. phase8_hardening/graph_diff.py +156 -0
  105. phase8_hardening/quality_gate.py +231 -0
  106. phase8_hardening/regression_runner.py +201 -0
  107. phase9_graph/__init__.py +1 -0
  108. phase9_graph/annotation_store.py +165 -0
  109. phase9_graph/blast_radius_engine.py +451 -0
  110. phase9_graph/change_simulator.py +247 -0
  111. phase9_graph/confidence_propagation.py +175 -0
  112. phase9_graph/graph_builder.py +740 -0
  113. phase9_graph/graph_query.py +565 -0
  114. phase9_graph/graph_validator.py +215 -0
  115. phase9_graph/graph_visualizer.py +331 -0
  116. phase9_graph/temporal_snapshots.py +304 -0
  117. phase9_graph/topology_analyzer.py +429 -0
  118. phase9_graph/weak_point_detector.py +288 -0
  119. schemas/__init__.py +1 -0
  120. schemas/artifact_schemas.json +314 -0
  121. schemas/emitter.py +425 -0
  122. schemas/id_format.py +228 -0
  123. schemas/samples/repo_manifest_sample.json +71 -0
  124. schemas/scope.py +75 -0
  125. schemas/validator.py +230 -0
  126. utils/__init__.py +1 -0
  127. utils/ai_config.py +192 -0
  128. utils/ast_summarizer.py +579 -0
  129. utils/file_cache.py +100 -0
  130. utils/file_classifier.py +195 -0
  131. utils/fuzzy.py +84 -0
  132. utils/ignore_loader.py +65 -0
  133. utils/security.py +51 -0
  134. utils/ts_parser.py +575 -0
mafia/__init__.py ADDED
@@ -0,0 +1,19 @@
1
+ """
2
+ MAFIA — Multi-Artifact Flow Intelligence Architecture
3
+ ======================================================
4
+ Public Python API. This is the only stable surface.
5
+
6
+ Usage:
7
+ from mafia import MAFIA
8
+ m = MAFIA.from_path("/path/to/project").run_all()
9
+ results = m.search("getUserProfile")
10
+ skill = m.as_skill()
11
+ """
12
+
13
+ from mafia.api import MAFIA
14
+ from mafia.ids import EntityID
15
+ from mafia.graph import Graph
16
+ from mafia.skill import Skill
17
+ from schemas.scope import Scope
18
+
19
+ __all__ = ["MAFIA", "EntityID", "Graph", "Skill", "Scope"]
mafia/api.py ADDED
@@ -0,0 +1,240 @@
1
+ """
2
+ MAFIA Public API — Façade
3
+ ==========================
4
+ Single entry point for all MAFIA operations.
5
+ """
6
+
7
+ import json
8
+ from pathlib import Path
9
+ from typing import Optional
10
+
11
+ from mafia.ids import EntityID
12
+ from mafia.graph import Graph
13
+
14
+
15
+ class MAFIA:
16
+ """Main façade for MAFIA codebase intelligence."""
17
+
18
+ def __init__(self, codebase_root: Path, mafia_dir: Path):
19
+ self._root = codebase_root
20
+ self._mafia_dir = mafia_dir
21
+ self._output_dir = mafia_dir / "output"
22
+ self._graph: Optional[Graph] = None
23
+ self._config: dict = {}
24
+
25
+ # ── Constructors ─────────────────────────────────────────────────────────
26
+
27
+ @classmethod
28
+ def from_path(cls, path: str | Path, *, name: str | None = None) -> "MAFIA":
29
+ from phase4_source_adapters import resolve_source
30
+ root = resolve_source(str(path), Path(".mafia"))
31
+ mafia_dir = root / ".mafia"
32
+ mafia_dir.mkdir(parents=True, exist_ok=True)
33
+ (mafia_dir / "output").mkdir(parents=True, exist_ok=True)
34
+ inst = cls(root, mafia_dir)
35
+ # Apply mafia.json overrides if present
36
+ from mafia.config import load_config
37
+ inst._config = load_config(str(mafia_dir))
38
+ return inst
39
+
40
+ @classmethod
41
+ def from_archive(cls, archive: str | Path, *, name: str | None = None) -> "MAFIA":
42
+ from phase4_source_adapters import resolve_source
43
+ work_dir = Path(".mafia")
44
+ root = resolve_source(str(archive), work_dir)
45
+ mafia_dir = root / ".mafia"
46
+ mafia_dir.mkdir(parents=True, exist_ok=True)
47
+ (mafia_dir / "output").mkdir(parents=True, exist_ok=True)
48
+ return cls(root, mafia_dir)
49
+
50
+ @classmethod
51
+ def from_git(cls, url: str, *, ref: str = "HEAD", name: str | None = None) -> "MAFIA":
52
+ from phase4_source_adapters.git_adapter import GitAdapter
53
+ root = GitAdapter().resolve(url, Path(".mafia"), ref=ref)
54
+ mafia_dir = root / ".mafia"
55
+ mafia_dir.mkdir(parents=True, exist_ok=True)
56
+ (mafia_dir / "output").mkdir(parents=True, exist_ok=True)
57
+ return cls(root, mafia_dir)
58
+
59
+ @classmethod
60
+ def open(cls, mafia_dir: str | Path) -> "MAFIA":
61
+ """Open an already-initialized .mafia/ directory."""
62
+ md = Path(mafia_dir)
63
+ if not md.exists():
64
+ raise FileNotFoundError(f".mafia directory not found: {md}")
65
+ root = md.parent
66
+ return cls(root, md)
67
+
68
+ # ── Pipeline methods ─────────────────────────────────────────────────────
69
+
70
+ def scan(self) -> "MAFIA":
71
+ from phase2_repo_intake.scope_discovery import discover
72
+ from phase2_repo_intake.manifest_writer import write_workspace_manifest
73
+ result = discover(str(self._root))
74
+ write_workspace_manifest(result, str(self._output_dir))
75
+ return self
76
+
77
+ def extract(self, *, full: bool = False, workers: int = 8) -> "MAFIA":
78
+ # Build a minimal manifest for the dispatcher
79
+ manifest_path = self._output_dir / "repo_manifest.json"
80
+ if not manifest_path.exists():
81
+ self.scan()
82
+ from phase3_extraction_framework.dispatcher import ExtractionDispatcher
83
+ from phase4_extractors.generic.ast_extractor import GenericAstExtractor
84
+ dispatcher = ExtractionDispatcher(str(manifest_path), str(self._output_dir))
85
+ dispatcher.register("*", GenericAstExtractor)
86
+ # Try to register framework extractors if available
87
+ try:
88
+ from phase4_extractors.ui.ui_extractor import UiExtractor
89
+ dispatcher.register("ui", UiExtractor)
90
+ except Exception:
91
+ pass
92
+ try:
93
+ from phase4_extractors.bff.bff_extractor import BffExtractor
94
+ dispatcher.register("bff", BffExtractor)
95
+ except Exception:
96
+ pass
97
+ try:
98
+ from phase4_extractors.api.api_extractor import ApiExtractor
99
+ dispatcher.register("api", ApiExtractor)
100
+ except Exception:
101
+ pass
102
+ dispatcher.run(repo_workers=1, file_workers=workers)
103
+ return self
104
+
105
+ def stitch(self) -> "MAFIA":
106
+ from phase3_extraction_framework.cross_scope import run_all_stitchers
107
+ run_all_stitchers(str(self._output_dir))
108
+ return self
109
+
110
+ def build_graph(self) -> "MAFIA":
111
+ from phase9_graph.graph_builder import build_graph
112
+ build_graph(str(self._output_dir))
113
+ self._graph = None # reset lazy cache
114
+ return self
115
+
116
+ def run_all(self, *, full: bool = False) -> "MAFIA":
117
+ return self.scan().extract(full=full).stitch().build_graph()
118
+
119
+ def update(self) -> "MAFIA":
120
+ return self.scan().extract().stitch().build_graph()
121
+
122
+ # ── Scope inspection ─────────────────────────────────────────────────────
123
+
124
+ @property
125
+ def codebase(self):
126
+ manifest = self._load_workspace_manifest()
127
+ if manifest:
128
+ return manifest.get("codebase", {})
129
+ return {"id": self._root.name, "root": str(self._root)}
130
+
131
+ @property
132
+ def repos(self) -> list:
133
+ manifest = self._load_workspace_manifest()
134
+ return manifest.get("repos", []) if manifest else []
135
+
136
+ @property
137
+ def workspaces(self) -> list:
138
+ manifest = self._load_workspace_manifest()
139
+ if not manifest:
140
+ return []
141
+ result = []
142
+ for repo in manifest.get("repos", []):
143
+ result.extend(repo.get("workspaces", []))
144
+ return result
145
+
146
+ # ── Graph queries ────────────────────────────────────────────────────────
147
+
148
+ def _get_graph(self) -> Graph:
149
+ if self._graph is None:
150
+ self._graph = Graph(str(self._output_dir))
151
+ return self._graph
152
+
153
+ def search(self, query: str, *, layer: str | None = None, limit: int = 10) -> list:
154
+ return self._get_graph().search(query, layer=layer, limit=limit)
155
+
156
+ def neighbors(self, eid: str | EntityID, *, direction: str = "both", depth: int = 1) -> dict:
157
+ return self._get_graph().neighbors(str(eid), direction=direction, depth=depth)
158
+
159
+ def trace(self, eid: str | EntityID, *, depth: int = 10) -> dict:
160
+ return self._get_graph().trace(str(eid), depth=depth)
161
+
162
+ def blast(self, eid: str | EntityID, *, direction: str = "downstream", depth: int = 4) -> dict:
163
+ return self._get_graph().neighbors(str(eid), direction=direction, depth=depth)
164
+
165
+ def flows(self, *, node: str | EntityID | None = None) -> list:
166
+ return self._get_graph().flows(node=str(node) if node else None)
167
+
168
+ def path(self, src: str | EntityID, dst: str | EntityID) -> Optional[dict]:
169
+ return self._get_graph().path(str(src), str(dst))
170
+
171
+ def orphans(self) -> list:
172
+ return self._get_graph().orphans()
173
+
174
+ # ── Cross-scope queries ──────────────────────────────────────────────────
175
+
176
+ def shared_libs(self) -> list:
177
+ return self._load_shared_resources().get("shared_libs", [])
178
+
179
+ def shared_tables(self) -> list:
180
+ return self._load_shared_resources().get("shared_tables", [])
181
+
182
+ def shared_envs(self) -> list:
183
+ return self._load_shared_resources().get("shared_envs", [])
184
+
185
+ def cross_repo_edges(self) -> list:
186
+ return self._load_cross_edges("repo")
187
+
188
+ def cross_workspace_edges(self) -> list:
189
+ return self._load_cross_edges("workspace")
190
+
191
+ # ── Export ────────────────────────────────────────────────────────────────
192
+
193
+ def export_graph(self, path: str | Path) -> None:
194
+ import shutil
195
+ sg = self._output_dir / "system_graph.json"
196
+ if sg.exists():
197
+ shutil.copy2(sg, path)
198
+
199
+ def networkx_graph(self):
200
+ import networkx as nx
201
+ g = self._get_graph()
202
+ G = nx.DiGraph()
203
+ for nid, node in g.nodes().items():
204
+ G.add_node(nid, **node)
205
+ for (s, t), edge in g.edges().items():
206
+ G.add_edge(s, t, **edge)
207
+ return G
208
+
209
+ # ── Skill adapter ────────────────────────────────────────────────────────
210
+
211
+ def as_skill(self) -> "Skill":
212
+ from mafia.skill import Skill
213
+ return Skill(self)
214
+
215
+ # ── Internal helpers ─────────────────────────────────────────────────────
216
+
217
+ def _load_workspace_manifest(self) -> Optional[dict]:
218
+ p = self._output_dir / "workspace_manifest.json"
219
+ if p.exists():
220
+ return json.loads(p.read_text(encoding="utf-8"))
221
+ return None
222
+
223
+ def _load_shared_resources(self) -> dict:
224
+ p = self._output_dir / "shared_resources.json"
225
+ if p.exists():
226
+ return json.loads(p.read_text(encoding="utf-8"))
227
+ return {}
228
+
229
+ def _load_cross_edges(self, boundary: str) -> list:
230
+ p = self._output_dir / "relations.jsonl"
231
+ if not p.exists():
232
+ return []
233
+ edges = []
234
+ for line in p.read_text(encoding="utf-8").splitlines():
235
+ if not line.strip():
236
+ continue
237
+ r = json.loads(line)
238
+ if r.get("cross_scope") and r.get("boundary") == boundary:
239
+ edges.append(r)
240
+ return edges
mafia/compat.py ADDED
@@ -0,0 +1,8 @@
1
+ """MAFIA public API — backward compatibility helpers."""
2
+
3
+ from schemas.id_format import migrate_entity_id_v1_to_v2
4
+
5
+
6
+ def migrate_id(old_id: str) -> str:
7
+ """Migrate a v1 entity ID to v2 format (currently identity)."""
8
+ return migrate_entity_id_v1_to_v2(old_id)
mafia/config.py ADDED
@@ -0,0 +1,16 @@
1
+ """MAFIA config loader — reads .mafia/mafia.json."""
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Optional
6
+
7
+
8
+ def load_config(mafia_dir: str | Path) -> dict:
9
+ """Load .mafia/mafia.json if present, else return empty dict."""
10
+ p = Path(mafia_dir) / "mafia.json"
11
+ if not p.exists():
12
+ return {}
13
+ try:
14
+ return json.loads(p.read_text(encoding="utf-8"))
15
+ except Exception:
16
+ return {}
mafia/graph.py ADDED
@@ -0,0 +1,67 @@
1
+ """MAFIA public API — Graph wrapper."""
2
+
3
+ from pathlib import Path
4
+ from typing import Optional
5
+
6
+
7
+ class Graph:
8
+ """Lazy-loading wrapper around system_graph.json + e2e_flows.json."""
9
+
10
+ def __init__(self, output_dir: str):
11
+ self._output_dir = Path(output_dir)
12
+ self._gq = None
13
+
14
+ def _ensure_loaded(self):
15
+ if self._gq is not None:
16
+ return
17
+ from phase9_graph.graph_query import GraphQuery
18
+ sg = self._output_dir / "system_graph.json"
19
+ fl = self._output_dir / "e2e_flows.json"
20
+ self._gq = GraphQuery(
21
+ str(sg) if sg.exists() else None,
22
+ str(fl) if fl.exists() else None,
23
+ )
24
+
25
+ def nodes(self) -> dict:
26
+ self._ensure_loaded()
27
+ return dict(self._gq.nodes)
28
+
29
+ def edges(self) -> dict:
30
+ self._ensure_loaded()
31
+ return dict(self._gq._edges)
32
+
33
+ def node(self, eid: str) -> Optional[dict]:
34
+ self._ensure_loaded()
35
+ return self._gq.nodes.get(eid)
36
+
37
+ def neighbors(self, eid: str, direction: str = "both", depth: int = 2) -> dict:
38
+ self._ensure_loaded()
39
+ return self._gq.neighborhood(eid, direction=direction, depth=depth)
40
+
41
+ def search(self, query: str, layer: str = None, limit: int = 10) -> list:
42
+ self._ensure_loaded()
43
+ return self._gq.find_by_name(query, layer=layer, limit=limit)
44
+
45
+ def trace(self, eid: str, depth: int = 10) -> dict:
46
+ self._ensure_loaded()
47
+ return self._gq.trace_flow(eid, max_depth=depth)
48
+
49
+ def flows(self, node: str = None, limit: int = 10) -> list:
50
+ self._ensure_loaded()
51
+ if node:
52
+ return self._gq.flows_for_node(node, limit=limit)
53
+ return self._gq.complete_flows(limit=limit)
54
+
55
+ def path(self, src: str, dst: str) -> Optional[dict]:
56
+ self._ensure_loaded()
57
+ return self._gq.find_path(src, dst)
58
+
59
+ def orphans(self) -> list:
60
+ self._ensure_loaded()
61
+ return self._gq.orphan_nodes()
62
+
63
+ @property
64
+ def query(self):
65
+ """Access the underlying GraphQuery for advanced operations."""
66
+ self._ensure_loaded()
67
+ return self._gq
mafia/ids.py ADDED
@@ -0,0 +1,60 @@
1
+ """MAFIA public API — EntityID type."""
2
+
3
+ from schemas.id_format import parse_entity_id, ParsedEntityId
4
+
5
+
6
+ class EntityID:
7
+ """Thin wrapper around a parsed entity ID string."""
8
+
9
+ def __init__(self, eid: str):
10
+ self._raw = eid
11
+ self._parsed = parse_entity_id(eid)
12
+
13
+ @classmethod
14
+ def from_string(cls, s: str) -> "EntityID":
15
+ return cls(s)
16
+
17
+ @property
18
+ def layer(self) -> str:
19
+ return self._parsed.layer
20
+
21
+ @property
22
+ def entity_type(self) -> str:
23
+ return self._parsed.entity_type
24
+
25
+ @property
26
+ def scope_path(self) -> str:
27
+ return self._parsed.scope_path
28
+
29
+ @property
30
+ def qualifier(self) -> str:
31
+ return self._parsed.qualifier
32
+
33
+ @property
34
+ def codebase(self) -> str:
35
+ parts = self._parsed.scope_path.split("/")
36
+ return parts[0]
37
+
38
+ @property
39
+ def repo(self) -> str:
40
+ parts = self._parsed.scope_path.split("/")
41
+ return parts[1] if len(parts) > 1 else parts[0]
42
+
43
+ @property
44
+ def workspace(self) -> str:
45
+ parts = self._parsed.scope_path.split("/")
46
+ return parts[-1]
47
+
48
+ def __str__(self) -> str:
49
+ return self._raw
50
+
51
+ def __repr__(self) -> str:
52
+ return f"EntityID({self._raw!r})"
53
+
54
+ def __eq__(self, other) -> bool:
55
+ if isinstance(other, EntityID):
56
+ return self._raw == other._raw
57
+ return self._raw == str(other)
58
+
59
+ def __hash__(self) -> int:
60
+ return hash(self._raw)
mafia/migrate.py ADDED
@@ -0,0 +1,92 @@
1
+ """
2
+ MAFIA Migrate
3
+ ==============
4
+ Rewrites v1 artifacts to v2 format in place. Backs up originals.
5
+ """
6
+
7
+ import json
8
+ import shutil
9
+ from pathlib import Path
10
+
11
+ from schemas.id_format import migrate_entity_id_v1_to_v2, ID_FORMAT_VERSION
12
+
13
+
14
+ def migrate(output_dir: str) -> dict:
15
+ """Migrate v1 artifacts to v2 in place. Returns migration stats."""
16
+ out = Path(output_dir)
17
+ backup = out / "backup-v1"
18
+
19
+ stats = {"entities_migrated": 0, "relations_migrated": 0, "already_v2": False}
20
+
21
+ # Check if already v2
22
+ sidecar = out / "id_format.json"
23
+ if sidecar.exists():
24
+ data = json.loads(sidecar.read_text(encoding="utf-8"))
25
+ if data.get("id_format_version", 1) >= 2:
26
+ stats["already_v2"] = True
27
+ return stats
28
+
29
+ # Backup
30
+ backup.mkdir(parents=True, exist_ok=True)
31
+ for fname in ("entities.jsonl", "relations.jsonl", "system_graph.json", "e2e_flows.json"):
32
+ src = out / fname
33
+ if src.exists():
34
+ shutil.copy2(src, backup / fname)
35
+
36
+ # Migrate entities.jsonl
37
+ ent_path = out / "entities.jsonl"
38
+ if ent_path.exists():
39
+ lines = ent_path.read_text(encoding="utf-8").splitlines()
40
+ migrated = []
41
+ for line in lines:
42
+ if not line.strip():
43
+ continue
44
+ e = json.loads(line)
45
+ eid = e.get("id", "")
46
+ try:
47
+ e["id"] = migrate_entity_id_v1_to_v2(eid)
48
+ except Exception:
49
+ pass
50
+ # Add v2 fields if missing
51
+ if "scope_path" not in e:
52
+ e["scope_path"] = e.get("repo", "")
53
+ if "codebase" not in e:
54
+ e["codebase"] = e.get("repo", "")
55
+ if "workspace" not in e:
56
+ e["workspace"] = e.get("repo", "")
57
+ migrated.append(json.dumps(e, ensure_ascii=False))
58
+ stats["entities_migrated"] += 1
59
+ ent_path.write_text("\n".join(migrated) + "\n", encoding="utf-8")
60
+
61
+ # Migrate relations.jsonl
62
+ rel_path = out / "relations.jsonl"
63
+ if rel_path.exists():
64
+ lines = rel_path.read_text(encoding="utf-8").splitlines()
65
+ migrated = []
66
+ for line in lines:
67
+ if not line.strip():
68
+ continue
69
+ r = json.loads(line)
70
+ if "cross_scope" not in r:
71
+ r["cross_scope"] = False
72
+ if "boundary" not in r:
73
+ r["boundary"] = None
74
+ migrated.append(json.dumps(r, ensure_ascii=False))
75
+ stats["relations_migrated"] += 1
76
+ rel_path.write_text("\n".join(migrated) + "\n", encoding="utf-8")
77
+
78
+ # Migrate system_graph.json
79
+ sg_path = out / "system_graph.json"
80
+ if sg_path.exists():
81
+ sg = json.loads(sg_path.read_text(encoding="utf-8"))
82
+ if "id_format_version" not in sg:
83
+ sg["id_format_version"] = ID_FORMAT_VERSION
84
+ for node in sg.get("nodes", []):
85
+ if "scope_path" not in node:
86
+ node["scope_path"] = node.get("repo", "")
87
+ sg_path.write_text(json.dumps(sg, indent=2, ensure_ascii=False), encoding="utf-8")
88
+
89
+ # Write sidecar
90
+ sidecar.write_text(json.dumps({"id_format_version": ID_FORMAT_VERSION}), encoding="utf-8")
91
+
92
+ return stats
mafia/plugins.py ADDED
@@ -0,0 +1,64 @@
1
+ """
2
+ MAFIA Plugin Loader
3
+ ====================
4
+ Discovers plugins via importlib.metadata entry points.
5
+ Groups: mafia.extractors, mafia.workspace_detectors, mafia.source_adapters
6
+ """
7
+
8
+ from typing import Optional
9
+
10
+
11
+ def load_plugins(group: str, config: Optional[dict] = None) -> dict[str, type]:
12
+ """Load plugins from entry points for the given group.
13
+
14
+ Args:
15
+ group: entry point group name (e.g. 'mafia.extractors')
16
+ config: optional plugins config from mafia.json with enable/disable/order
17
+
18
+ Returns:
19
+ dict of name → class, filtered and ordered per config.
20
+ """
21
+ try:
22
+ from importlib.metadata import entry_points
23
+ eps = entry_points(group=group)
24
+ except Exception:
25
+ return {}
26
+
27
+ plugins = {}
28
+ for ep in eps:
29
+ try:
30
+ plugins[ep.name] = ep.load()
31
+ except Exception:
32
+ continue
33
+
34
+ if not config:
35
+ return plugins
36
+
37
+ # Apply enable/disable/order from config
38
+ enable = config.get("enable")
39
+ disable = set(config.get("disable", []))
40
+ order = config.get("order", [])
41
+
42
+ if enable is not None:
43
+ enable_set = set(enable)
44
+ plugins = {k: v for k, v in plugins.items() if k in enable_set}
45
+
46
+ plugins = {k: v for k, v in plugins.items() if k not in disable}
47
+
48
+ if order:
49
+ ordered = {}
50
+ for name in order:
51
+ if name == "..." :
52
+ # "..." means "all remaining in original order"
53
+ for k, v in plugins.items():
54
+ if k not in ordered:
55
+ ordered[k] = v
56
+ elif name in plugins:
57
+ ordered[name] = plugins[name]
58
+ # Add any remaining not in order list
59
+ for k, v in plugins.items():
60
+ if k not in ordered:
61
+ ordered[k] = v
62
+ plugins = ordered
63
+
64
+ return plugins