mafia-graph 0.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mafia/__init__.py +19 -0
- mafia/api.py +240 -0
- mafia/compat.py +8 -0
- mafia/config.py +16 -0
- mafia/graph.py +67 -0
- mafia/ids.py +60 -0
- mafia/migrate.py +92 -0
- mafia/plugins.py +64 -0
- mafia/skill.py +183 -0
- mafia_cli.py +1156 -0
- mafia_graph-0.9.0.dist-info/METADATA +711 -0
- mafia_graph-0.9.0.dist-info/RECORD +134 -0
- mafia_graph-0.9.0.dist-info/WHEEL +5 -0
- mafia_graph-0.9.0.dist-info/entry_points.txt +26 -0
- mafia_graph-0.9.0.dist-info/licenses/LICENSE +21 -0
- mafia_graph-0.9.0.dist-info/top_level.txt +19 -0
- phase10_ast/__init__.py +1 -0
- phase10_ast/run_ast_summaries.py +655 -0
- phase10_intelligence/__init__.py +1 -0
- phase10_intelligence/component_intelligence.py +647 -0
- phase10_intelligence/flow_intelligence.py +399 -0
- phase10_intelligence/issue_diagnosis.py +337 -0
- phase10_intelligence/provenance.py +84 -0
- phase11_search/__init__.py +1 -0
- phase11_search/bigquery_store.py +168 -0
- phase11_search/chat_engine.py +381 -0
- phase11_search/code_reader.py +264 -0
- phase11_search/document_builder.py +433 -0
- phase11_search/graph_retriever.py +317 -0
- phase11_search/ingestion/__init__.py +1 -0
- phase11_search/ingestion/doc_extractor.py +252 -0
- phase11_search/ingestion/doc_store.py +309 -0
- phase11_search/pipeline_runner.py +410 -0
- phase11_search/search_api.py +1292 -0
- phase11_search/search_api1.py +2404 -0
- phase11_search/vector_index.py +425 -0
- phase12_ui/__init__.py +0 -0
- phase12_ui/index.html +175 -0
- phase12_ui/landing.html +343 -0
- phase12_ui/static/app.js +3431 -0
- phase12_ui/static/chat.js +543 -0
- phase12_ui/static/landing.css +1422 -0
- phase12_ui/static/landing.js +812 -0
- phase12_ui/static/styles.css +2137 -0
- phase12_ui/static/v2/app.js +3110 -0
- phase12_ui/static/v2/styles.css +1514 -0
- phase12_ui/v2/index.html +152 -0
- phase13_evaluation/__init__.py +1 -0
- phase13_evaluation/gold_dataset_builder.py +171 -0
- phase13_evaluation/metric_calculators.py +179 -0
- phase14_orchestration/__init__.py +1 -0
- phase14_orchestration/artifact_versioning.py +134 -0
- phase14_orchestration/orchestrator.py +17 -0
- phase14_orchestration/run_state.py +103 -0
- phase2_repo_intake/__init__.py +1 -0
- phase2_repo_intake/arch_detector.py +526 -0
- phase2_repo_intake/manifest_writer.py +83 -0
- phase2_repo_intake/repo_scanner.py +667 -0
- phase2_repo_intake/scope_discovery.py +142 -0
- phase2_repo_intake/source_adapters.py +153 -0
- phase2_repo_intake/stack_detector.py +140 -0
- phase3_extraction_framework/__init__.py +1 -0
- phase3_extraction_framework/base_extractor.py +407 -0
- phase3_extraction_framework/cross_scope/__init__.py +97 -0
- phase3_extraction_framework/cross_scope/artifact_stitcher.py +54 -0
- phase3_extraction_framework/cross_scope/cross_workspace_imports.py +104 -0
- phase3_extraction_framework/cross_scope/shared_env_stitcher.py +50 -0
- phase3_extraction_framework/cross_scope/shared_lib_stitcher.py +61 -0
- phase3_extraction_framework/cross_scope/shared_table_stitcher.py +70 -0
- phase3_extraction_framework/cross_scope/workspace_dep_stitcher.py +102 -0
- phase3_extraction_framework/dispatcher.py +311 -0
- phase3_extraction_framework/run_extraction.py +428 -0
- phase3_extraction_framework/skeleton_extractors.py +224 -0
- phase3_extraction_framework/structural_relations.py +1686 -0
- phase3_extraction_framework/symbol_resolver.py +375 -0
- phase4_extractors/__init__.py +1 -0
- phase4_extractors/api/__init__.py +1 -0
- phase4_extractors/api/api_extractor.py +595 -0
- phase4_extractors/ast_helpers.py +268 -0
- phase4_extractors/bff/__init__.py +1 -0
- phase4_extractors/bff/bff_extractor.py +377 -0
- phase4_extractors/config/__init__.py +1 -0
- phase4_extractors/config/config_extractor.py +132 -0
- phase4_extractors/db/__init__.py +1 -0
- phase4_extractors/db/bigquery_extractor.py +285 -0
- phase4_extractors/db/databricks_extractor.py +274 -0
- phase4_extractors/db/pg_sql_extractor.py +529 -0
- phase4_extractors/generic/__init__.py +0 -0
- phase4_extractors/generic/ast_extractor.py +293 -0
- phase4_extractors/ts_extractor.py +551 -0
- phase4_extractors/ui/__init__.py +1 -0
- phase4_extractors/ui/ui_extractor.py +643 -0
- phase5_compatibility/__init__.py +1 -0
- phase5_compatibility/legacy_generators.py +271 -0
- phase6_coverage/__init__.py +1 -0
- phase6_coverage/coverage_builder.py +509 -0
- phase7_genai/__init__.py +1 -0
- phase7_genai/gemini_client.py +212 -0
- phase7_genai/pattern_discovery.py +559 -0
- phase7_genai/prompt_templates.py +181 -0
- phase7_mapping/__init__.py +1 -0
- phase7_mapping/mapping_evaluator.py +246 -0
- phase8_hardening/__init__.py +1 -0
- phase8_hardening/graph_diff.py +156 -0
- phase8_hardening/quality_gate.py +231 -0
- phase8_hardening/regression_runner.py +201 -0
- phase9_graph/__init__.py +1 -0
- phase9_graph/annotation_store.py +165 -0
- phase9_graph/blast_radius_engine.py +451 -0
- phase9_graph/change_simulator.py +247 -0
- phase9_graph/confidence_propagation.py +175 -0
- phase9_graph/graph_builder.py +740 -0
- phase9_graph/graph_query.py +565 -0
- phase9_graph/graph_validator.py +215 -0
- phase9_graph/graph_visualizer.py +331 -0
- phase9_graph/temporal_snapshots.py +304 -0
- phase9_graph/topology_analyzer.py +429 -0
- phase9_graph/weak_point_detector.py +288 -0
- schemas/__init__.py +1 -0
- schemas/artifact_schemas.json +314 -0
- schemas/emitter.py +425 -0
- schemas/id_format.py +228 -0
- schemas/samples/repo_manifest_sample.json +71 -0
- schemas/scope.py +75 -0
- schemas/validator.py +230 -0
- utils/__init__.py +1 -0
- utils/ai_config.py +192 -0
- utils/ast_summarizer.py +579 -0
- utils/file_cache.py +100 -0
- utils/file_classifier.py +195 -0
- utils/fuzzy.py +84 -0
- utils/ignore_loader.py +65 -0
- utils/security.py +51 -0
- utils/ts_parser.py +575 -0
mafia/__init__.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""
|
|
2
|
+
MAFIA — Multi-Artifact Flow Intelligence Architecture
|
|
3
|
+
======================================================
|
|
4
|
+
Public Python API. This is the only stable surface.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
from mafia import MAFIA
|
|
8
|
+
m = MAFIA.from_path("/path/to/project").run_all()
|
|
9
|
+
results = m.search("getUserProfile")
|
|
10
|
+
skill = m.as_skill()
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from mafia.api import MAFIA
|
|
14
|
+
from mafia.ids import EntityID
|
|
15
|
+
from mafia.graph import Graph
|
|
16
|
+
from mafia.skill import Skill
|
|
17
|
+
from schemas.scope import Scope
|
|
18
|
+
|
|
19
|
+
__all__ = ["MAFIA", "EntityID", "Graph", "Skill", "Scope"]
|
mafia/api.py
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
"""
|
|
2
|
+
MAFIA Public API — Façade
|
|
3
|
+
==========================
|
|
4
|
+
Single entry point for all MAFIA operations.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Optional
|
|
10
|
+
|
|
11
|
+
from mafia.ids import EntityID
|
|
12
|
+
from mafia.graph import Graph
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class MAFIA:
|
|
16
|
+
"""Main façade for MAFIA codebase intelligence."""
|
|
17
|
+
|
|
18
|
+
def __init__(self, codebase_root: Path, mafia_dir: Path):
|
|
19
|
+
self._root = codebase_root
|
|
20
|
+
self._mafia_dir = mafia_dir
|
|
21
|
+
self._output_dir = mafia_dir / "output"
|
|
22
|
+
self._graph: Optional[Graph] = None
|
|
23
|
+
self._config: dict = {}
|
|
24
|
+
|
|
25
|
+
# ── Constructors ─────────────────────────────────────────────────────────
|
|
26
|
+
|
|
27
|
+
@classmethod
|
|
28
|
+
def from_path(cls, path: str | Path, *, name: str | None = None) -> "MAFIA":
|
|
29
|
+
from phase4_source_adapters import resolve_source
|
|
30
|
+
root = resolve_source(str(path), Path(".mafia"))
|
|
31
|
+
mafia_dir = root / ".mafia"
|
|
32
|
+
mafia_dir.mkdir(parents=True, exist_ok=True)
|
|
33
|
+
(mafia_dir / "output").mkdir(parents=True, exist_ok=True)
|
|
34
|
+
inst = cls(root, mafia_dir)
|
|
35
|
+
# Apply mafia.json overrides if present
|
|
36
|
+
from mafia.config import load_config
|
|
37
|
+
inst._config = load_config(str(mafia_dir))
|
|
38
|
+
return inst
|
|
39
|
+
|
|
40
|
+
@classmethod
|
|
41
|
+
def from_archive(cls, archive: str | Path, *, name: str | None = None) -> "MAFIA":
|
|
42
|
+
from phase4_source_adapters import resolve_source
|
|
43
|
+
work_dir = Path(".mafia")
|
|
44
|
+
root = resolve_source(str(archive), work_dir)
|
|
45
|
+
mafia_dir = root / ".mafia"
|
|
46
|
+
mafia_dir.mkdir(parents=True, exist_ok=True)
|
|
47
|
+
(mafia_dir / "output").mkdir(parents=True, exist_ok=True)
|
|
48
|
+
return cls(root, mafia_dir)
|
|
49
|
+
|
|
50
|
+
@classmethod
|
|
51
|
+
def from_git(cls, url: str, *, ref: str = "HEAD", name: str | None = None) -> "MAFIA":
|
|
52
|
+
from phase4_source_adapters.git_adapter import GitAdapter
|
|
53
|
+
root = GitAdapter().resolve(url, Path(".mafia"), ref=ref)
|
|
54
|
+
mafia_dir = root / ".mafia"
|
|
55
|
+
mafia_dir.mkdir(parents=True, exist_ok=True)
|
|
56
|
+
(mafia_dir / "output").mkdir(parents=True, exist_ok=True)
|
|
57
|
+
return cls(root, mafia_dir)
|
|
58
|
+
|
|
59
|
+
@classmethod
|
|
60
|
+
def open(cls, mafia_dir: str | Path) -> "MAFIA":
|
|
61
|
+
"""Open an already-initialized .mafia/ directory."""
|
|
62
|
+
md = Path(mafia_dir)
|
|
63
|
+
if not md.exists():
|
|
64
|
+
raise FileNotFoundError(f".mafia directory not found: {md}")
|
|
65
|
+
root = md.parent
|
|
66
|
+
return cls(root, md)
|
|
67
|
+
|
|
68
|
+
# ── Pipeline methods ─────────────────────────────────────────────────────
|
|
69
|
+
|
|
70
|
+
def scan(self) -> "MAFIA":
|
|
71
|
+
from phase2_repo_intake.scope_discovery import discover
|
|
72
|
+
from phase2_repo_intake.manifest_writer import write_workspace_manifest
|
|
73
|
+
result = discover(str(self._root))
|
|
74
|
+
write_workspace_manifest(result, str(self._output_dir))
|
|
75
|
+
return self
|
|
76
|
+
|
|
77
|
+
def extract(self, *, full: bool = False, workers: int = 8) -> "MAFIA":
|
|
78
|
+
# Build a minimal manifest for the dispatcher
|
|
79
|
+
manifest_path = self._output_dir / "repo_manifest.json"
|
|
80
|
+
if not manifest_path.exists():
|
|
81
|
+
self.scan()
|
|
82
|
+
from phase3_extraction_framework.dispatcher import ExtractionDispatcher
|
|
83
|
+
from phase4_extractors.generic.ast_extractor import GenericAstExtractor
|
|
84
|
+
dispatcher = ExtractionDispatcher(str(manifest_path), str(self._output_dir))
|
|
85
|
+
dispatcher.register("*", GenericAstExtractor)
|
|
86
|
+
# Try to register framework extractors if available
|
|
87
|
+
try:
|
|
88
|
+
from phase4_extractors.ui.ui_extractor import UiExtractor
|
|
89
|
+
dispatcher.register("ui", UiExtractor)
|
|
90
|
+
except Exception:
|
|
91
|
+
pass
|
|
92
|
+
try:
|
|
93
|
+
from phase4_extractors.bff.bff_extractor import BffExtractor
|
|
94
|
+
dispatcher.register("bff", BffExtractor)
|
|
95
|
+
except Exception:
|
|
96
|
+
pass
|
|
97
|
+
try:
|
|
98
|
+
from phase4_extractors.api.api_extractor import ApiExtractor
|
|
99
|
+
dispatcher.register("api", ApiExtractor)
|
|
100
|
+
except Exception:
|
|
101
|
+
pass
|
|
102
|
+
dispatcher.run(repo_workers=1, file_workers=workers)
|
|
103
|
+
return self
|
|
104
|
+
|
|
105
|
+
def stitch(self) -> "MAFIA":
|
|
106
|
+
from phase3_extraction_framework.cross_scope import run_all_stitchers
|
|
107
|
+
run_all_stitchers(str(self._output_dir))
|
|
108
|
+
return self
|
|
109
|
+
|
|
110
|
+
def build_graph(self) -> "MAFIA":
|
|
111
|
+
from phase9_graph.graph_builder import build_graph
|
|
112
|
+
build_graph(str(self._output_dir))
|
|
113
|
+
self._graph = None # reset lazy cache
|
|
114
|
+
return self
|
|
115
|
+
|
|
116
|
+
def run_all(self, *, full: bool = False) -> "MAFIA":
|
|
117
|
+
return self.scan().extract(full=full).stitch().build_graph()
|
|
118
|
+
|
|
119
|
+
def update(self) -> "MAFIA":
|
|
120
|
+
return self.scan().extract().stitch().build_graph()
|
|
121
|
+
|
|
122
|
+
# ── Scope inspection ─────────────────────────────────────────────────────
|
|
123
|
+
|
|
124
|
+
@property
|
|
125
|
+
def codebase(self):
|
|
126
|
+
manifest = self._load_workspace_manifest()
|
|
127
|
+
if manifest:
|
|
128
|
+
return manifest.get("codebase", {})
|
|
129
|
+
return {"id": self._root.name, "root": str(self._root)}
|
|
130
|
+
|
|
131
|
+
@property
|
|
132
|
+
def repos(self) -> list:
|
|
133
|
+
manifest = self._load_workspace_manifest()
|
|
134
|
+
return manifest.get("repos", []) if manifest else []
|
|
135
|
+
|
|
136
|
+
@property
|
|
137
|
+
def workspaces(self) -> list:
|
|
138
|
+
manifest = self._load_workspace_manifest()
|
|
139
|
+
if not manifest:
|
|
140
|
+
return []
|
|
141
|
+
result = []
|
|
142
|
+
for repo in manifest.get("repos", []):
|
|
143
|
+
result.extend(repo.get("workspaces", []))
|
|
144
|
+
return result
|
|
145
|
+
|
|
146
|
+
# ── Graph queries ────────────────────────────────────────────────────────
|
|
147
|
+
|
|
148
|
+
def _get_graph(self) -> Graph:
|
|
149
|
+
if self._graph is None:
|
|
150
|
+
self._graph = Graph(str(self._output_dir))
|
|
151
|
+
return self._graph
|
|
152
|
+
|
|
153
|
+
def search(self, query: str, *, layer: str | None = None, limit: int = 10) -> list:
|
|
154
|
+
return self._get_graph().search(query, layer=layer, limit=limit)
|
|
155
|
+
|
|
156
|
+
def neighbors(self, eid: str | EntityID, *, direction: str = "both", depth: int = 1) -> dict:
|
|
157
|
+
return self._get_graph().neighbors(str(eid), direction=direction, depth=depth)
|
|
158
|
+
|
|
159
|
+
def trace(self, eid: str | EntityID, *, depth: int = 10) -> dict:
|
|
160
|
+
return self._get_graph().trace(str(eid), depth=depth)
|
|
161
|
+
|
|
162
|
+
def blast(self, eid: str | EntityID, *, direction: str = "downstream", depth: int = 4) -> dict:
|
|
163
|
+
return self._get_graph().neighbors(str(eid), direction=direction, depth=depth)
|
|
164
|
+
|
|
165
|
+
def flows(self, *, node: str | EntityID | None = None) -> list:
|
|
166
|
+
return self._get_graph().flows(node=str(node) if node else None)
|
|
167
|
+
|
|
168
|
+
def path(self, src: str | EntityID, dst: str | EntityID) -> Optional[dict]:
|
|
169
|
+
return self._get_graph().path(str(src), str(dst))
|
|
170
|
+
|
|
171
|
+
def orphans(self) -> list:
|
|
172
|
+
return self._get_graph().orphans()
|
|
173
|
+
|
|
174
|
+
# ── Cross-scope queries ──────────────────────────────────────────────────
|
|
175
|
+
|
|
176
|
+
def shared_libs(self) -> list:
|
|
177
|
+
return self._load_shared_resources().get("shared_libs", [])
|
|
178
|
+
|
|
179
|
+
def shared_tables(self) -> list:
|
|
180
|
+
return self._load_shared_resources().get("shared_tables", [])
|
|
181
|
+
|
|
182
|
+
def shared_envs(self) -> list:
|
|
183
|
+
return self._load_shared_resources().get("shared_envs", [])
|
|
184
|
+
|
|
185
|
+
def cross_repo_edges(self) -> list:
|
|
186
|
+
return self._load_cross_edges("repo")
|
|
187
|
+
|
|
188
|
+
def cross_workspace_edges(self) -> list:
|
|
189
|
+
return self._load_cross_edges("workspace")
|
|
190
|
+
|
|
191
|
+
# ── Export ────────────────────────────────────────────────────────────────
|
|
192
|
+
|
|
193
|
+
def export_graph(self, path: str | Path) -> None:
|
|
194
|
+
import shutil
|
|
195
|
+
sg = self._output_dir / "system_graph.json"
|
|
196
|
+
if sg.exists():
|
|
197
|
+
shutil.copy2(sg, path)
|
|
198
|
+
|
|
199
|
+
def networkx_graph(self):
|
|
200
|
+
import networkx as nx
|
|
201
|
+
g = self._get_graph()
|
|
202
|
+
G = nx.DiGraph()
|
|
203
|
+
for nid, node in g.nodes().items():
|
|
204
|
+
G.add_node(nid, **node)
|
|
205
|
+
for (s, t), edge in g.edges().items():
|
|
206
|
+
G.add_edge(s, t, **edge)
|
|
207
|
+
return G
|
|
208
|
+
|
|
209
|
+
# ── Skill adapter ────────────────────────────────────────────────────────
|
|
210
|
+
|
|
211
|
+
def as_skill(self) -> "Skill":
|
|
212
|
+
from mafia.skill import Skill
|
|
213
|
+
return Skill(self)
|
|
214
|
+
|
|
215
|
+
# ── Internal helpers ─────────────────────────────────────────────────────
|
|
216
|
+
|
|
217
|
+
def _load_workspace_manifest(self) -> Optional[dict]:
|
|
218
|
+
p = self._output_dir / "workspace_manifest.json"
|
|
219
|
+
if p.exists():
|
|
220
|
+
return json.loads(p.read_text(encoding="utf-8"))
|
|
221
|
+
return None
|
|
222
|
+
|
|
223
|
+
def _load_shared_resources(self) -> dict:
|
|
224
|
+
p = self._output_dir / "shared_resources.json"
|
|
225
|
+
if p.exists():
|
|
226
|
+
return json.loads(p.read_text(encoding="utf-8"))
|
|
227
|
+
return {}
|
|
228
|
+
|
|
229
|
+
def _load_cross_edges(self, boundary: str) -> list:
|
|
230
|
+
p = self._output_dir / "relations.jsonl"
|
|
231
|
+
if not p.exists():
|
|
232
|
+
return []
|
|
233
|
+
edges = []
|
|
234
|
+
for line in p.read_text(encoding="utf-8").splitlines():
|
|
235
|
+
if not line.strip():
|
|
236
|
+
continue
|
|
237
|
+
r = json.loads(line)
|
|
238
|
+
if r.get("cross_scope") and r.get("boundary") == boundary:
|
|
239
|
+
edges.append(r)
|
|
240
|
+
return edges
|
mafia/compat.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""MAFIA public API — backward compatibility helpers."""
|
|
2
|
+
|
|
3
|
+
from schemas.id_format import migrate_entity_id_v1_to_v2
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def migrate_id(old_id: str) -> str:
|
|
7
|
+
"""Migrate a v1 entity ID to v2 format (currently identity)."""
|
|
8
|
+
return migrate_entity_id_v1_to_v2(old_id)
|
mafia/config.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""MAFIA config loader — reads .mafia/mafia.json."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def load_config(mafia_dir: str | Path) -> dict:
|
|
9
|
+
"""Load .mafia/mafia.json if present, else return empty dict."""
|
|
10
|
+
p = Path(mafia_dir) / "mafia.json"
|
|
11
|
+
if not p.exists():
|
|
12
|
+
return {}
|
|
13
|
+
try:
|
|
14
|
+
return json.loads(p.read_text(encoding="utf-8"))
|
|
15
|
+
except Exception:
|
|
16
|
+
return {}
|
mafia/graph.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""MAFIA public API — Graph wrapper."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Graph:
|
|
8
|
+
"""Lazy-loading wrapper around system_graph.json + e2e_flows.json."""
|
|
9
|
+
|
|
10
|
+
def __init__(self, output_dir: str):
|
|
11
|
+
self._output_dir = Path(output_dir)
|
|
12
|
+
self._gq = None
|
|
13
|
+
|
|
14
|
+
def _ensure_loaded(self):
|
|
15
|
+
if self._gq is not None:
|
|
16
|
+
return
|
|
17
|
+
from phase9_graph.graph_query import GraphQuery
|
|
18
|
+
sg = self._output_dir / "system_graph.json"
|
|
19
|
+
fl = self._output_dir / "e2e_flows.json"
|
|
20
|
+
self._gq = GraphQuery(
|
|
21
|
+
str(sg) if sg.exists() else None,
|
|
22
|
+
str(fl) if fl.exists() else None,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
def nodes(self) -> dict:
|
|
26
|
+
self._ensure_loaded()
|
|
27
|
+
return dict(self._gq.nodes)
|
|
28
|
+
|
|
29
|
+
def edges(self) -> dict:
|
|
30
|
+
self._ensure_loaded()
|
|
31
|
+
return dict(self._gq._edges)
|
|
32
|
+
|
|
33
|
+
def node(self, eid: str) -> Optional[dict]:
|
|
34
|
+
self._ensure_loaded()
|
|
35
|
+
return self._gq.nodes.get(eid)
|
|
36
|
+
|
|
37
|
+
def neighbors(self, eid: str, direction: str = "both", depth: int = 2) -> dict:
|
|
38
|
+
self._ensure_loaded()
|
|
39
|
+
return self._gq.neighborhood(eid, direction=direction, depth=depth)
|
|
40
|
+
|
|
41
|
+
def search(self, query: str, layer: str = None, limit: int = 10) -> list:
|
|
42
|
+
self._ensure_loaded()
|
|
43
|
+
return self._gq.find_by_name(query, layer=layer, limit=limit)
|
|
44
|
+
|
|
45
|
+
def trace(self, eid: str, depth: int = 10) -> dict:
|
|
46
|
+
self._ensure_loaded()
|
|
47
|
+
return self._gq.trace_flow(eid, max_depth=depth)
|
|
48
|
+
|
|
49
|
+
def flows(self, node: str = None, limit: int = 10) -> list:
|
|
50
|
+
self._ensure_loaded()
|
|
51
|
+
if node:
|
|
52
|
+
return self._gq.flows_for_node(node, limit=limit)
|
|
53
|
+
return self._gq.complete_flows(limit=limit)
|
|
54
|
+
|
|
55
|
+
def path(self, src: str, dst: str) -> Optional[dict]:
|
|
56
|
+
self._ensure_loaded()
|
|
57
|
+
return self._gq.find_path(src, dst)
|
|
58
|
+
|
|
59
|
+
def orphans(self) -> list:
|
|
60
|
+
self._ensure_loaded()
|
|
61
|
+
return self._gq.orphan_nodes()
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def query(self):
|
|
65
|
+
"""Access the underlying GraphQuery for advanced operations."""
|
|
66
|
+
self._ensure_loaded()
|
|
67
|
+
return self._gq
|
mafia/ids.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""MAFIA public API — EntityID type."""
|
|
2
|
+
|
|
3
|
+
from schemas.id_format import parse_entity_id, ParsedEntityId
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class EntityID:
|
|
7
|
+
"""Thin wrapper around a parsed entity ID string."""
|
|
8
|
+
|
|
9
|
+
def __init__(self, eid: str):
|
|
10
|
+
self._raw = eid
|
|
11
|
+
self._parsed = parse_entity_id(eid)
|
|
12
|
+
|
|
13
|
+
@classmethod
|
|
14
|
+
def from_string(cls, s: str) -> "EntityID":
|
|
15
|
+
return cls(s)
|
|
16
|
+
|
|
17
|
+
@property
|
|
18
|
+
def layer(self) -> str:
|
|
19
|
+
return self._parsed.layer
|
|
20
|
+
|
|
21
|
+
@property
|
|
22
|
+
def entity_type(self) -> str:
|
|
23
|
+
return self._parsed.entity_type
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
def scope_path(self) -> str:
|
|
27
|
+
return self._parsed.scope_path
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def qualifier(self) -> str:
|
|
31
|
+
return self._parsed.qualifier
|
|
32
|
+
|
|
33
|
+
@property
|
|
34
|
+
def codebase(self) -> str:
|
|
35
|
+
parts = self._parsed.scope_path.split("/")
|
|
36
|
+
return parts[0]
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def repo(self) -> str:
|
|
40
|
+
parts = self._parsed.scope_path.split("/")
|
|
41
|
+
return parts[1] if len(parts) > 1 else parts[0]
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def workspace(self) -> str:
|
|
45
|
+
parts = self._parsed.scope_path.split("/")
|
|
46
|
+
return parts[-1]
|
|
47
|
+
|
|
48
|
+
def __str__(self) -> str:
|
|
49
|
+
return self._raw
|
|
50
|
+
|
|
51
|
+
def __repr__(self) -> str:
|
|
52
|
+
return f"EntityID({self._raw!r})"
|
|
53
|
+
|
|
54
|
+
def __eq__(self, other) -> bool:
|
|
55
|
+
if isinstance(other, EntityID):
|
|
56
|
+
return self._raw == other._raw
|
|
57
|
+
return self._raw == str(other)
|
|
58
|
+
|
|
59
|
+
def __hash__(self) -> int:
|
|
60
|
+
return hash(self._raw)
|
mafia/migrate.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""
|
|
2
|
+
MAFIA Migrate
|
|
3
|
+
==============
|
|
4
|
+
Rewrites v1 artifacts to v2 format in place. Backs up originals.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import shutil
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from schemas.id_format import migrate_entity_id_v1_to_v2, ID_FORMAT_VERSION
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def migrate(output_dir: str) -> dict:
|
|
15
|
+
"""Migrate v1 artifacts to v2 in place. Returns migration stats."""
|
|
16
|
+
out = Path(output_dir)
|
|
17
|
+
backup = out / "backup-v1"
|
|
18
|
+
|
|
19
|
+
stats = {"entities_migrated": 0, "relations_migrated": 0, "already_v2": False}
|
|
20
|
+
|
|
21
|
+
# Check if already v2
|
|
22
|
+
sidecar = out / "id_format.json"
|
|
23
|
+
if sidecar.exists():
|
|
24
|
+
data = json.loads(sidecar.read_text(encoding="utf-8"))
|
|
25
|
+
if data.get("id_format_version", 1) >= 2:
|
|
26
|
+
stats["already_v2"] = True
|
|
27
|
+
return stats
|
|
28
|
+
|
|
29
|
+
# Backup
|
|
30
|
+
backup.mkdir(parents=True, exist_ok=True)
|
|
31
|
+
for fname in ("entities.jsonl", "relations.jsonl", "system_graph.json", "e2e_flows.json"):
|
|
32
|
+
src = out / fname
|
|
33
|
+
if src.exists():
|
|
34
|
+
shutil.copy2(src, backup / fname)
|
|
35
|
+
|
|
36
|
+
# Migrate entities.jsonl
|
|
37
|
+
ent_path = out / "entities.jsonl"
|
|
38
|
+
if ent_path.exists():
|
|
39
|
+
lines = ent_path.read_text(encoding="utf-8").splitlines()
|
|
40
|
+
migrated = []
|
|
41
|
+
for line in lines:
|
|
42
|
+
if not line.strip():
|
|
43
|
+
continue
|
|
44
|
+
e = json.loads(line)
|
|
45
|
+
eid = e.get("id", "")
|
|
46
|
+
try:
|
|
47
|
+
e["id"] = migrate_entity_id_v1_to_v2(eid)
|
|
48
|
+
except Exception:
|
|
49
|
+
pass
|
|
50
|
+
# Add v2 fields if missing
|
|
51
|
+
if "scope_path" not in e:
|
|
52
|
+
e["scope_path"] = e.get("repo", "")
|
|
53
|
+
if "codebase" not in e:
|
|
54
|
+
e["codebase"] = e.get("repo", "")
|
|
55
|
+
if "workspace" not in e:
|
|
56
|
+
e["workspace"] = e.get("repo", "")
|
|
57
|
+
migrated.append(json.dumps(e, ensure_ascii=False))
|
|
58
|
+
stats["entities_migrated"] += 1
|
|
59
|
+
ent_path.write_text("\n".join(migrated) + "\n", encoding="utf-8")
|
|
60
|
+
|
|
61
|
+
# Migrate relations.jsonl
|
|
62
|
+
rel_path = out / "relations.jsonl"
|
|
63
|
+
if rel_path.exists():
|
|
64
|
+
lines = rel_path.read_text(encoding="utf-8").splitlines()
|
|
65
|
+
migrated = []
|
|
66
|
+
for line in lines:
|
|
67
|
+
if not line.strip():
|
|
68
|
+
continue
|
|
69
|
+
r = json.loads(line)
|
|
70
|
+
if "cross_scope" not in r:
|
|
71
|
+
r["cross_scope"] = False
|
|
72
|
+
if "boundary" not in r:
|
|
73
|
+
r["boundary"] = None
|
|
74
|
+
migrated.append(json.dumps(r, ensure_ascii=False))
|
|
75
|
+
stats["relations_migrated"] += 1
|
|
76
|
+
rel_path.write_text("\n".join(migrated) + "\n", encoding="utf-8")
|
|
77
|
+
|
|
78
|
+
# Migrate system_graph.json
|
|
79
|
+
sg_path = out / "system_graph.json"
|
|
80
|
+
if sg_path.exists():
|
|
81
|
+
sg = json.loads(sg_path.read_text(encoding="utf-8"))
|
|
82
|
+
if "id_format_version" not in sg:
|
|
83
|
+
sg["id_format_version"] = ID_FORMAT_VERSION
|
|
84
|
+
for node in sg.get("nodes", []):
|
|
85
|
+
if "scope_path" not in node:
|
|
86
|
+
node["scope_path"] = node.get("repo", "")
|
|
87
|
+
sg_path.write_text(json.dumps(sg, indent=2, ensure_ascii=False), encoding="utf-8")
|
|
88
|
+
|
|
89
|
+
# Write sidecar
|
|
90
|
+
sidecar.write_text(json.dumps({"id_format_version": ID_FORMAT_VERSION}), encoding="utf-8")
|
|
91
|
+
|
|
92
|
+
return stats
|
mafia/plugins.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""
|
|
2
|
+
MAFIA Plugin Loader
|
|
3
|
+
====================
|
|
4
|
+
Discovers plugins via importlib.metadata entry points.
|
|
5
|
+
Groups: mafia.extractors, mafia.workspace_detectors, mafia.source_adapters
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Optional
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def load_plugins(group: str, config: Optional[dict] = None) -> dict[str, type]:
|
|
12
|
+
"""Load plugins from entry points for the given group.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
group: entry point group name (e.g. 'mafia.extractors')
|
|
16
|
+
config: optional plugins config from mafia.json with enable/disable/order
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
dict of name → class, filtered and ordered per config.
|
|
20
|
+
"""
|
|
21
|
+
try:
|
|
22
|
+
from importlib.metadata import entry_points
|
|
23
|
+
eps = entry_points(group=group)
|
|
24
|
+
except Exception:
|
|
25
|
+
return {}
|
|
26
|
+
|
|
27
|
+
plugins = {}
|
|
28
|
+
for ep in eps:
|
|
29
|
+
try:
|
|
30
|
+
plugins[ep.name] = ep.load()
|
|
31
|
+
except Exception:
|
|
32
|
+
continue
|
|
33
|
+
|
|
34
|
+
if not config:
|
|
35
|
+
return plugins
|
|
36
|
+
|
|
37
|
+
# Apply enable/disable/order from config
|
|
38
|
+
enable = config.get("enable")
|
|
39
|
+
disable = set(config.get("disable", []))
|
|
40
|
+
order = config.get("order", [])
|
|
41
|
+
|
|
42
|
+
if enable is not None:
|
|
43
|
+
enable_set = set(enable)
|
|
44
|
+
plugins = {k: v for k, v in plugins.items() if k in enable_set}
|
|
45
|
+
|
|
46
|
+
plugins = {k: v for k, v in plugins.items() if k not in disable}
|
|
47
|
+
|
|
48
|
+
if order:
|
|
49
|
+
ordered = {}
|
|
50
|
+
for name in order:
|
|
51
|
+
if name == "..." :
|
|
52
|
+
# "..." means "all remaining in original order"
|
|
53
|
+
for k, v in plugins.items():
|
|
54
|
+
if k not in ordered:
|
|
55
|
+
ordered[k] = v
|
|
56
|
+
elif name in plugins:
|
|
57
|
+
ordered[name] = plugins[name]
|
|
58
|
+
# Add any remaining not in order list
|
|
59
|
+
for k, v in plugins.items():
|
|
60
|
+
if k not in ordered:
|
|
61
|
+
ordered[k] = v
|
|
62
|
+
plugins = ordered
|
|
63
|
+
|
|
64
|
+
return plugins
|