code2llm 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code2flow/__init__.py +47 -0
- code2flow/__main__.py +6 -0
- code2flow/analysis/__init__.py +23 -0
- code2flow/analysis/call_graph.py +210 -0
- code2flow/analysis/cfg.py +293 -0
- code2flow/analysis/coupling.py +77 -0
- code2flow/analysis/data_analysis.py +249 -0
- code2flow/analysis/dfg.py +224 -0
- code2flow/analysis/pipeline_detector.py +445 -0
- code2flow/analysis/side_effects.py +313 -0
- code2flow/analysis/smells.py +192 -0
- code2flow/analysis/type_inference.py +306 -0
- code2flow/cli.py +493 -0
- code2flow/core/__init__.py +36 -0
- code2flow/core/analyzer.py +765 -0
- code2flow/core/config.py +177 -0
- code2flow/core/models.py +194 -0
- code2flow/core/streaming_analyzer.py +666 -0
- code2flow/exporters/__init__.py +35 -0
- code2flow/exporters/base.py +13 -0
- code2flow/exporters/context_exporter.py +207 -0
- code2flow/exporters/flow_exporter.py +570 -0
- code2flow/exporters/json_exporter.py +17 -0
- code2flow/exporters/llm_exporter.py +12 -0
- code2flow/exporters/map_exporter.py +218 -0
- code2flow/exporters/mermaid_exporter.py +67 -0
- code2flow/exporters/toon.py +982 -0
- code2flow/exporters/yaml_exporter.py +108 -0
- code2flow/llm_flow_generator.py +451 -0
- code2flow/llm_task_generator.py +263 -0
- code2flow/mermaid_generator.py +481 -0
- code2flow/nlp/__init__.py +23 -0
- code2flow/nlp/config.py +174 -0
- code2flow/nlp/entity_resolution.py +326 -0
- code2flow/nlp/intent_matching.py +297 -0
- code2flow/nlp/normalization.py +122 -0
- code2flow/nlp/pipeline.py +388 -0
- code2flow/patterns/__init__.py +0 -0
- code2flow/patterns/detector.py +168 -0
- code2flow/refactor/__init__.py +0 -0
- code2flow/refactor/prompt_engine.py +150 -0
- code2flow/visualizers/__init__.py +0 -0
- code2flow/visualizers/graph.py +196 -0
- code2llm-0.3.7.dist-info/METADATA +604 -0
- code2llm-0.3.7.dist-info/RECORD +49 -0
- code2llm-0.3.7.dist-info/WHEEL +5 -0
- code2llm-0.3.7.dist-info/entry_points.txt +2 -0
- code2llm-0.3.7.dist-info/licenses/LICENSE +201 -0
- code2llm-0.3.7.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,570 @@
|
|
|
1
|
+
"""Flow Exporter — generates flow.toon (data-flow format).
|
|
2
|
+
|
|
3
|
+
Produces a data-flow-focused format with PIPELINES, TRANSFORMS, CONTRACTS,
|
|
4
|
+
DATA_TYPES, and SIDE_EFFECTS sections.
|
|
5
|
+
|
|
6
|
+
Purpose: "how data flows through the system"
|
|
7
|
+
Format: pipeline stages, transform fan-out, contracts, hub-type detection
|
|
8
|
+
|
|
9
|
+
Sprint 2 (v0.3.1): AST-based type inference and side-effect detection.
|
|
10
|
+
Sprint 3 (v0.3.2): networkx-based pipeline detection with domain grouping.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import ast
|
|
14
|
+
from collections import defaultdict
|
|
15
|
+
from datetime import datetime
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Any, Dict, List, Optional, Set, Tuple
|
|
18
|
+
|
|
19
|
+
from .base import Exporter
|
|
20
|
+
from ..core.models import (
|
|
21
|
+
AnalysisResult, FunctionInfo, ClassInfo, ModuleInfo, FlowNode
|
|
22
|
+
)
|
|
23
|
+
from ..analysis.type_inference import TypeInferenceEngine
|
|
24
|
+
from ..analysis.side_effects import SideEffectDetector, SideEffectInfo
|
|
25
|
+
from ..analysis.pipeline_detector import PipelineDetector, Pipeline
|
|
26
|
+
|
|
27
|
+
# Thresholds
|
|
28
|
+
CC_HIGH = 15
|
|
29
|
+
FAN_OUT_THRESHOLD = 10
|
|
30
|
+
HUB_TYPE_THRESHOLD = 10
|
|
31
|
+
|
|
32
|
+
# Patterns to exclude
|
|
33
|
+
EXCLUDE_PATTERNS = {
|
|
34
|
+
'venv', '.venv', 'env', '.env', 'publish-env', 'test-env',
|
|
35
|
+
'site-packages', 'node_modules', '__pycache__', '.git',
|
|
36
|
+
'dist', 'build', 'egg-info', '.tox', '.mypy_cache',
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# Hub-type split recommendations: type -> suggested sub-interfaces
|
|
41
|
+
HUB_SPLIT_RECOMMENDATIONS: Dict[str, List[str]] = {
|
|
42
|
+
"AnalysisResult": ["StructureResult (modules, classes, functions)",
|
|
43
|
+
"MetricsResult (complexity, coupling)",
|
|
44
|
+
"FlowResult (call_graph, cfg, dfg)"],
|
|
45
|
+
"dict": ["replace with typed alternatives (dataclass/TypedDict)"],
|
|
46
|
+
"str": [], # primitive, expected to be ubiquitous
|
|
47
|
+
"list": [],
|
|
48
|
+
"Any": [],
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class FlowExporter(Exporter):
|
|
53
|
+
"""Export to flow.toon — data-flow focused format.
|
|
54
|
+
|
|
55
|
+
Sections: PIPELINES, TRANSFORMS, CONTRACTS, DATA_TYPES, SIDE_EFFECTS
|
|
56
|
+
|
|
57
|
+
Sprint 2: TypeInferenceEngine + SideEffectDetector for AST-based analysis.
|
|
58
|
+
Sprint 3: PipelineDetector with networkx for graph-based pipeline detection.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
def __init__(self):
|
|
62
|
+
self._type_engine = TypeInferenceEngine()
|
|
63
|
+
self._side_effect_detector = SideEffectDetector()
|
|
64
|
+
self._pipeline_detector = PipelineDetector(
|
|
65
|
+
type_engine=self._type_engine,
|
|
66
|
+
side_effect_detector=self._side_effect_detector,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
def export(self, result: AnalysisResult, output_path: str, **kwargs) -> None:
|
|
70
|
+
"""Export analysis result to flow.toon format."""
|
|
71
|
+
ctx = self._build_context(result)
|
|
72
|
+
|
|
73
|
+
sections: List[str] = []
|
|
74
|
+
sections.extend(self._render_header(ctx))
|
|
75
|
+
sections.append("")
|
|
76
|
+
sections.extend(self._render_pipelines(ctx))
|
|
77
|
+
sections.append("")
|
|
78
|
+
sections.extend(self._render_transforms(ctx))
|
|
79
|
+
sections.append("")
|
|
80
|
+
sections.extend(self._render_contracts(ctx))
|
|
81
|
+
sections.append("")
|
|
82
|
+
sections.extend(self._render_data_types(ctx))
|
|
83
|
+
sections.append("")
|
|
84
|
+
sections.extend(self._render_side_effects(ctx))
|
|
85
|
+
|
|
86
|
+
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
|
87
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
|
88
|
+
f.write("\n".join(sections) + "\n")
|
|
89
|
+
|
|
90
|
+
# ------------------------------------------------------------------
|
|
91
|
+
# context builder
|
|
92
|
+
# ------------------------------------------------------------------
|
|
93
|
+
def _build_context(self, result: AnalysisResult) -> Dict[str, Any]:
|
|
94
|
+
ctx: Dict[str, Any] = {}
|
|
95
|
+
ctx["result"] = result
|
|
96
|
+
ctx["timestamp"] = datetime.now().strftime("%Y-%m-%d")
|
|
97
|
+
|
|
98
|
+
# Build function lookup excluding venv etc.
|
|
99
|
+
funcs = {
|
|
100
|
+
qname: fi for qname, fi in result.functions.items()
|
|
101
|
+
if not self._is_excluded(fi.file)
|
|
102
|
+
}
|
|
103
|
+
ctx["funcs"] = funcs
|
|
104
|
+
|
|
105
|
+
# AST-based type inference (Sprint 2)
|
|
106
|
+
ctx["type_info"] = self._type_engine.extract_all_types(funcs)
|
|
107
|
+
|
|
108
|
+
# AST-based side-effect detection (Sprint 2)
|
|
109
|
+
ctx["se_info"] = self._side_effect_detector.analyze_all(funcs)
|
|
110
|
+
|
|
111
|
+
# Detect pipelines using networkx-based PipelineDetector (Sprint 3)
|
|
112
|
+
raw_pipelines = self._pipeline_detector.detect(funcs, se_info=ctx["se_info"])
|
|
113
|
+
ctx["raw_pipelines"] = raw_pipelines
|
|
114
|
+
ctx["pipelines"] = [self._pipeline_to_dict(p) for p in raw_pipelines]
|
|
115
|
+
|
|
116
|
+
# Compute transforms (high fan-out functions)
|
|
117
|
+
ctx["transforms"] = self._compute_transforms(funcs)
|
|
118
|
+
|
|
119
|
+
# Compute type usage across functions (now AST-based)
|
|
120
|
+
ctx["type_usage"] = self._compute_type_usage(funcs, ctx["type_info"])
|
|
121
|
+
|
|
122
|
+
# Classify side effects (now AST-based)
|
|
123
|
+
ctx["side_effects"] = self._classify_side_effects(funcs, ctx["se_info"])
|
|
124
|
+
|
|
125
|
+
# Compute contracts per pipeline (now with IN/OUT/SIDE-EFFECT)
|
|
126
|
+
ctx["contracts"] = self._compute_contracts(
|
|
127
|
+
ctx["pipelines"], funcs, ctx["type_info"], ctx["se_info"]
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
return ctx
|
|
131
|
+
|
|
132
|
+
# ------------------------------------------------------------------
|
|
133
|
+
# pipeline conversion (Sprint 3)
|
|
134
|
+
# ------------------------------------------------------------------
|
|
135
|
+
def _pipeline_to_dict(self, pipeline: Pipeline) -> Dict[str, Any]:
|
|
136
|
+
"""Convert Pipeline dataclass to dict for rendering."""
|
|
137
|
+
stages = []
|
|
138
|
+
for s in pipeline.stages:
|
|
139
|
+
stages.append({
|
|
140
|
+
"name": s.name,
|
|
141
|
+
"qualified": s.qualified_name,
|
|
142
|
+
"signature": s.signature,
|
|
143
|
+
"cc": s.cc,
|
|
144
|
+
"purity": s.purity,
|
|
145
|
+
"is_entry": s.is_entry,
|
|
146
|
+
"is_exit": s.is_exit,
|
|
147
|
+
})
|
|
148
|
+
|
|
149
|
+
bn = pipeline.bottleneck
|
|
150
|
+
return {
|
|
151
|
+
"name": pipeline.name,
|
|
152
|
+
"domain": pipeline.domain,
|
|
153
|
+
"stages": stages,
|
|
154
|
+
"entry_point": pipeline.entry_point,
|
|
155
|
+
"exit_point": pipeline.exit_point,
|
|
156
|
+
"entry_type": pipeline.entry_type,
|
|
157
|
+
"exit_type": pipeline.exit_type,
|
|
158
|
+
"pure_count": pipeline.pure_count,
|
|
159
|
+
"total_stages": pipeline.total_stages,
|
|
160
|
+
"bottleneck": {"name": bn.name, "cc": bn.cc} if bn else None,
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
# ------------------------------------------------------------------
|
|
164
|
+
# transforms — high fan-out functions
|
|
165
|
+
# ------------------------------------------------------------------
|
|
166
|
+
def _compute_transforms(
|
|
167
|
+
self, funcs: Dict[str, FunctionInfo]
|
|
168
|
+
) -> List[Dict[str, Any]]:
|
|
169
|
+
"""Find functions with fan-out >= threshold."""
|
|
170
|
+
transforms = []
|
|
171
|
+
for qname, fi in funcs.items():
|
|
172
|
+
fan_out = len(set(fi.calls))
|
|
173
|
+
if fan_out >= FAN_OUT_THRESHOLD:
|
|
174
|
+
transforms.append({
|
|
175
|
+
"name": fi.name,
|
|
176
|
+
"qualified": qname,
|
|
177
|
+
"fan_out": fan_out,
|
|
178
|
+
"signature": self._type_engine.get_typed_signature(fi),
|
|
179
|
+
"label": self._transform_label(fi, fan_out),
|
|
180
|
+
})
|
|
181
|
+
transforms.sort(key=lambda x: x["fan_out"], reverse=True)
|
|
182
|
+
return transforms[:15]
|
|
183
|
+
|
|
184
|
+
def _transform_label(self, fi: FunctionInfo, fan_out: int) -> str:
|
|
185
|
+
if fi.name == "main" or fi.name == "__main__":
|
|
186
|
+
return "!! script-in-disguise"
|
|
187
|
+
if fan_out >= 30:
|
|
188
|
+
return "!! mutation-heavy"
|
|
189
|
+
if fan_out >= 20:
|
|
190
|
+
return "!! side-effects"
|
|
191
|
+
if fi.class_name:
|
|
192
|
+
return f"PIPELINE:{fi.class_name}.entry"
|
|
193
|
+
return f"fan={fan_out}"
|
|
194
|
+
|
|
195
|
+
# ------------------------------------------------------------------
|
|
196
|
+
# type usage — consumed/produced counts (AST-based, Sprint 2)
|
|
197
|
+
# ------------------------------------------------------------------
|
|
198
|
+
def _compute_type_usage(
|
|
199
|
+
self, funcs: Dict[str, FunctionInfo],
|
|
200
|
+
type_info: Dict[str, Dict[str, Any]]
|
|
201
|
+
) -> List[Dict[str, Any]]:
|
|
202
|
+
"""Count how many functions consume/produce each type using AST data."""
|
|
203
|
+
consumed: Dict[str, int] = defaultdict(int)
|
|
204
|
+
produced: Dict[str, int] = defaultdict(int)
|
|
205
|
+
|
|
206
|
+
for qname, fi in funcs.items():
|
|
207
|
+
ti = type_info.get(qname, {})
|
|
208
|
+
# Types from AST-extracted args (consumed)
|
|
209
|
+
for arg in ti.get("args", []):
|
|
210
|
+
if arg["name"] == "self":
|
|
211
|
+
continue
|
|
212
|
+
type_name = arg.get("type")
|
|
213
|
+
if type_name:
|
|
214
|
+
normalized = self._normalize_type(type_name)
|
|
215
|
+
if normalized:
|
|
216
|
+
consumed[normalized] += 1
|
|
217
|
+
|
|
218
|
+
# Types from AST-extracted return (produced)
|
|
219
|
+
ret = ti.get("returns")
|
|
220
|
+
if ret:
|
|
221
|
+
normalized = self._normalize_type(ret)
|
|
222
|
+
if normalized:
|
|
223
|
+
produced[normalized] += 1
|
|
224
|
+
|
|
225
|
+
# Merge into ranked list
|
|
226
|
+
all_types = set(consumed.keys()) | set(produced.keys())
|
|
227
|
+
type_list = []
|
|
228
|
+
for t in all_types:
|
|
229
|
+
c = consumed.get(t, 0)
|
|
230
|
+
p = produced.get(t, 0)
|
|
231
|
+
total = c + p
|
|
232
|
+
label = self._type_label(t, c, p)
|
|
233
|
+
type_list.append({
|
|
234
|
+
"type": t,
|
|
235
|
+
"consumed": c,
|
|
236
|
+
"produced": p,
|
|
237
|
+
"total": total,
|
|
238
|
+
"label": label,
|
|
239
|
+
})
|
|
240
|
+
|
|
241
|
+
type_list.sort(key=lambda x: x["total"], reverse=True)
|
|
242
|
+
return type_list[:20]
|
|
243
|
+
|
|
244
|
+
def _normalize_type(self, t: str) -> str:
|
|
245
|
+
t = t.strip().strip("'\"")
|
|
246
|
+
# Remove Optional[], List[], Dict[] wrappers for base type
|
|
247
|
+
for wrapper in ["Optional[", "List[", "Dict[", "Set[", "Tuple["]:
|
|
248
|
+
if t.startswith(wrapper) and t.endswith("]"):
|
|
249
|
+
t = t[len(wrapper):-1]
|
|
250
|
+
return t if t and t not in ("None", "Any") else ""
|
|
251
|
+
|
|
252
|
+
def _type_label(self, t: str, consumed: int, produced: int) -> str:
|
|
253
|
+
if consumed >= HUB_TYPE_THRESHOLD:
|
|
254
|
+
return "!! HUB-TYPE \u2192 split interface"
|
|
255
|
+
if consumed >= 5 and produced <= 1:
|
|
256
|
+
return "input-heavy (read-only flow)"
|
|
257
|
+
if produced >= 5 and consumed <= 1:
|
|
258
|
+
return "output-heavy"
|
|
259
|
+
if consumed >= 10 and produced >= 10:
|
|
260
|
+
return "ubiquitous"
|
|
261
|
+
if consumed + produced <= 4:
|
|
262
|
+
return "narrow scope"
|
|
263
|
+
return ""
|
|
264
|
+
|
|
265
|
+
# ------------------------------------------------------------------
|
|
266
|
+
# side effect classification (AST-based, Sprint 2)
|
|
267
|
+
# ------------------------------------------------------------------
|
|
268
|
+
def _classify_side_effects(
|
|
269
|
+
self, funcs: Dict[str, FunctionInfo],
|
|
270
|
+
se_info: Dict[str, SideEffectInfo]
|
|
271
|
+
) -> Dict[str, List[str]]:
|
|
272
|
+
"""Classify functions by side-effect type using AST analysis."""
|
|
273
|
+
io_funcs: List[str] = []
|
|
274
|
+
cache_funcs: List[str] = []
|
|
275
|
+
mutation_funcs: List[str] = []
|
|
276
|
+
pure_funcs: List[str] = []
|
|
277
|
+
|
|
278
|
+
for qname, fi in funcs.items():
|
|
279
|
+
se = se_info.get(qname)
|
|
280
|
+
classification = se.classification if se else "pure"
|
|
281
|
+
short = fi.name
|
|
282
|
+
if fi.class_name:
|
|
283
|
+
short = f"{fi.class_name}.{fi.name}"
|
|
284
|
+
|
|
285
|
+
if classification == "IO":
|
|
286
|
+
io_funcs.append(short)
|
|
287
|
+
elif classification == "cache":
|
|
288
|
+
cache_funcs.append(short)
|
|
289
|
+
elif classification == "mutation":
|
|
290
|
+
mutation_funcs.append(short)
|
|
291
|
+
else:
|
|
292
|
+
pure_funcs.append(short)
|
|
293
|
+
|
|
294
|
+
return {
|
|
295
|
+
"IO": io_funcs[:15],
|
|
296
|
+
"Cache": cache_funcs[:10],
|
|
297
|
+
"Mutation": mutation_funcs[:15],
|
|
298
|
+
"Pure": pure_funcs[:20],
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
# ------------------------------------------------------------------
|
|
302
|
+
# contracts per pipeline (enhanced, Sprint 2)
|
|
303
|
+
# ------------------------------------------------------------------
|
|
304
|
+
def _compute_contracts(
|
|
305
|
+
self, pipelines: List[Dict[str, Any]],
|
|
306
|
+
funcs: Dict[str, FunctionInfo],
|
|
307
|
+
type_info: Dict[str, Dict[str, Any]],
|
|
308
|
+
se_info: Dict[str, SideEffectInfo]
|
|
309
|
+
) -> List[Dict[str, Any]]:
|
|
310
|
+
"""Build rich contracts for each pipeline stage with IN/OUT/SIDE-EFFECT."""
|
|
311
|
+
contracts = []
|
|
312
|
+
for pipeline in pipelines:
|
|
313
|
+
stages_contracts = []
|
|
314
|
+
for stage in pipeline["stages"]:
|
|
315
|
+
fi = funcs.get(stage["qualified"])
|
|
316
|
+
if not fi:
|
|
317
|
+
continue
|
|
318
|
+
ti = type_info.get(stage["qualified"], {})
|
|
319
|
+
se = se_info.get(stage["qualified"])
|
|
320
|
+
|
|
321
|
+
contract = self._build_stage_contract(fi, ti, se, stage)
|
|
322
|
+
stages_contracts.append(contract)
|
|
323
|
+
|
|
324
|
+
contracts.append({
|
|
325
|
+
"pipeline": pipeline["name"],
|
|
326
|
+
"stages": stages_contracts,
|
|
327
|
+
})
|
|
328
|
+
return contracts
|
|
329
|
+
|
|
330
|
+
def _build_stage_contract(
|
|
331
|
+
self, fi: FunctionInfo,
|
|
332
|
+
ti: Dict[str, Any],
|
|
333
|
+
se: Optional[SideEffectInfo],
|
|
334
|
+
stage: Dict[str, Any]
|
|
335
|
+
) -> Dict[str, Any]:
|
|
336
|
+
"""Build a rich contract for a single pipeline stage."""
|
|
337
|
+
# IN types
|
|
338
|
+
in_types = []
|
|
339
|
+
for arg in ti.get("args", []):
|
|
340
|
+
if arg["name"] == "self":
|
|
341
|
+
continue
|
|
342
|
+
t = arg.get("type", "")
|
|
343
|
+
in_types.append(f"{arg['name']}:{t}" if t else arg["name"])
|
|
344
|
+
in_str = ", ".join(in_types) if in_types else "()"
|
|
345
|
+
|
|
346
|
+
# OUT type
|
|
347
|
+
out_str = ti.get("returns") or "None"
|
|
348
|
+
|
|
349
|
+
# Side-effect info
|
|
350
|
+
side_effect = ""
|
|
351
|
+
if se and not se.is_pure:
|
|
352
|
+
side_effect = se.side_effect_summary
|
|
353
|
+
|
|
354
|
+
# Smell note
|
|
355
|
+
cc = stage["cc"]
|
|
356
|
+
smell = ""
|
|
357
|
+
if cc >= CC_HIGH:
|
|
358
|
+
smell = f"CC={cc:.0f} \u2192 split"
|
|
359
|
+
|
|
360
|
+
# Invariant (heuristic)
|
|
361
|
+
invariant = self._infer_invariant(fi, ti)
|
|
362
|
+
|
|
363
|
+
return {
|
|
364
|
+
"name": fi.name,
|
|
365
|
+
"signature": stage["signature"],
|
|
366
|
+
"in": in_str,
|
|
367
|
+
"out": out_str,
|
|
368
|
+
"cc": cc,
|
|
369
|
+
"purity": stage["purity"],
|
|
370
|
+
"side_effect": side_effect,
|
|
371
|
+
"smell": smell,
|
|
372
|
+
"invariant": invariant,
|
|
373
|
+
"source": ti.get("source", "none"),
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
def _infer_invariant(self, fi: FunctionInfo, ti: Dict[str, Any]) -> str:
|
|
377
|
+
"""Infer a contract invariant from function semantics."""
|
|
378
|
+
name_lower = fi.name.lower()
|
|
379
|
+
ret = ti.get("returns", "")
|
|
380
|
+
|
|
381
|
+
if "normalize" in name_lower:
|
|
382
|
+
return "len(output) <= len(input)"
|
|
383
|
+
if "match" in name_lower and ret and "Match" in ret:
|
|
384
|
+
return "confidence \u2208 [0.0, 1.0]"
|
|
385
|
+
if "resolve" in name_lower:
|
|
386
|
+
return "all entities exist in analysis"
|
|
387
|
+
if "validate" in name_lower or "check" in name_lower:
|
|
388
|
+
return "raises on invalid input"
|
|
389
|
+
if "sort" in name_lower:
|
|
390
|
+
return "output is sorted"
|
|
391
|
+
if "filter" in name_lower:
|
|
392
|
+
return "len(output) <= len(input)"
|
|
393
|
+
return ""
|
|
394
|
+
|
|
395
|
+
# ------------------------------------------------------------------
|
|
396
|
+
# render sections
|
|
397
|
+
# ------------------------------------------------------------------
|
|
398
|
+
def _render_header(self, ctx: Dict[str, Any]) -> List[str]:
|
|
399
|
+
result: AnalysisResult = ctx["result"]
|
|
400
|
+
nfuncs = len(ctx["funcs"])
|
|
401
|
+
npipelines = len(ctx["pipelines"])
|
|
402
|
+
nhubs = sum(1 for t in ctx["type_usage"]
|
|
403
|
+
if t["consumed"] >= HUB_TYPE_THRESHOLD)
|
|
404
|
+
return [
|
|
405
|
+
f"# {Path(result.project_path).name if result.project_path else 'project'}/flow"
|
|
406
|
+
f" | {nfuncs} func | {npipelines} pipelines"
|
|
407
|
+
f" | {nhubs} hub-types | {ctx['timestamp']}",
|
|
408
|
+
]
|
|
409
|
+
|
|
410
|
+
def _render_pipelines(self, ctx: Dict[str, Any]) -> List[str]:
|
|
411
|
+
pipelines = ctx["pipelines"]
|
|
412
|
+
if not pipelines:
|
|
413
|
+
return ["PIPELINES[0]: none detected"]
|
|
414
|
+
|
|
415
|
+
# Count domains
|
|
416
|
+
domains = defaultdict(int)
|
|
417
|
+
for pl in pipelines:
|
|
418
|
+
domains[pl.get("domain", "Unknown")] += 1
|
|
419
|
+
domain_summary = ", ".join(f"{d}:{n}" for d, n in sorted(domains.items()))
|
|
420
|
+
|
|
421
|
+
lines = [f"PIPELINES[{len(pipelines)}] ({domain_summary}):"]
|
|
422
|
+
for pl in pipelines:
|
|
423
|
+
domain_tag = f"[{pl.get('domain', '?')}]"
|
|
424
|
+
entry_type = pl.get("entry_type", "?")
|
|
425
|
+
exit_type = pl.get("exit_type", "?")
|
|
426
|
+
lines.append(
|
|
427
|
+
f" {pl['name']} {domain_tag}:"
|
|
428
|
+
f" {entry_type} \u2192 {exit_type}"
|
|
429
|
+
)
|
|
430
|
+
for stage in pl["stages"]:
|
|
431
|
+
cc_marker = " !!" if stage["cc"] >= CC_HIGH else ""
|
|
432
|
+
entry_lbl = " \u25b6" if stage.get("is_entry") else ""
|
|
433
|
+
exit_lbl = " \u25a0" if stage.get("is_exit") else ""
|
|
434
|
+
lines.append(
|
|
435
|
+
f" \u2192 {stage['signature']}"
|
|
436
|
+
f"{'':>{max(1, 40 - len(stage['signature']))}}"
|
|
437
|
+
f"CC={stage['cc']:<4.0f} {stage['purity']}"
|
|
438
|
+
f"{cc_marker}{entry_lbl}{exit_lbl}"
|
|
439
|
+
)
|
|
440
|
+
bn = pl.get("bottleneck")
|
|
441
|
+
bn_str = f"BOTTLENECK: {bn['name']}(CC={bn['cc']:.0f})" if bn else "OK"
|
|
442
|
+
lines.append(
|
|
443
|
+
f" PURITY: {pl['pure_count']}/{pl['total_stages']} pure"
|
|
444
|
+
f" {bn_str}"
|
|
445
|
+
)
|
|
446
|
+
lines.append("")
|
|
447
|
+
|
|
448
|
+
return lines
|
|
449
|
+
|
|
450
|
+
def _render_transforms(self, ctx: Dict[str, Any]) -> List[str]:
|
|
451
|
+
transforms = ctx["transforms"]
|
|
452
|
+
if not transforms:
|
|
453
|
+
return ["TRANSFORMS: none (fan-out < 10)"]
|
|
454
|
+
|
|
455
|
+
lines = [f"TRANSFORMS (fan-out \u2265{FAN_OUT_THRESHOLD}):"]
|
|
456
|
+
for t in transforms:
|
|
457
|
+
lines.append(
|
|
458
|
+
f" {t['signature']:<55s} fan={t['fan_out']:<3}"
|
|
459
|
+
f" {t['label']}"
|
|
460
|
+
)
|
|
461
|
+
return lines
|
|
462
|
+
|
|
463
|
+
def _render_contracts(self, ctx: Dict[str, Any]) -> List[str]:
|
|
464
|
+
contracts = ctx["contracts"]
|
|
465
|
+
if not contracts:
|
|
466
|
+
return ["CONTRACTS: none (no pipelines detected)"]
|
|
467
|
+
|
|
468
|
+
lines = ["CONTRACTS:"]
|
|
469
|
+
for contract in contracts:
|
|
470
|
+
lines.append(f" Pipeline: {contract['pipeline']}")
|
|
471
|
+
for stage in contract["stages"]:
|
|
472
|
+
lines.append(f" {stage['signature']}")
|
|
473
|
+
lines.append(f" IN: {stage['in']}")
|
|
474
|
+
lines.append(f" OUT: {stage['out']}")
|
|
475
|
+
if stage.get("side_effect"):
|
|
476
|
+
lines.append(f" SIDE-EFFECT: {stage['side_effect']}")
|
|
477
|
+
if stage.get("invariant"):
|
|
478
|
+
lines.append(f" INVARIANT: {stage['invariant']}")
|
|
479
|
+
if stage.get("smell"):
|
|
480
|
+
lines.append(f" SMELL: {stage['smell']}")
|
|
481
|
+
lines.append("")
|
|
482
|
+
return lines
|
|
483
|
+
|
|
484
|
+
def _render_data_types(self, ctx: Dict[str, Any]) -> List[str]:
|
|
485
|
+
types = ctx["type_usage"]
|
|
486
|
+
if not types:
|
|
487
|
+
return ["DATA_TYPES: no type information available"]
|
|
488
|
+
|
|
489
|
+
# Count type sources
|
|
490
|
+
type_info = ctx.get("type_info", {})
|
|
491
|
+
n_annotated = sum(
|
|
492
|
+
1 for ti in type_info.values()
|
|
493
|
+
if ti.get("source") == "annotation"
|
|
494
|
+
)
|
|
495
|
+
n_inferred = sum(
|
|
496
|
+
1 for ti in type_info.values()
|
|
497
|
+
if ti.get("source") == "inferred"
|
|
498
|
+
)
|
|
499
|
+
n_total = len(type_info)
|
|
500
|
+
|
|
501
|
+
lines = [
|
|
502
|
+
f"DATA_TYPES (by cross-function usage)"
|
|
503
|
+
f" [{n_annotated} annotated, {n_inferred} inferred"
|
|
504
|
+
f" / {n_total} functions]:"
|
|
505
|
+
]
|
|
506
|
+
for t in types:
|
|
507
|
+
label = f" {t['label']}" if t["label"] else ""
|
|
508
|
+
lines.append(
|
|
509
|
+
f" {t['type']:<20s} consumed:{t['consumed']:<3}"
|
|
510
|
+
f" produced:{t['produced']:<3}{label}"
|
|
511
|
+
)
|
|
512
|
+
|
|
513
|
+
# Hub types summary with split recommendations
|
|
514
|
+
hubs = [t for t in types if t["consumed"] >= HUB_TYPE_THRESHOLD]
|
|
515
|
+
if hubs:
|
|
516
|
+
lines.append("")
|
|
517
|
+
lines.append(" HUB TYPES (consumed \u226510):")
|
|
518
|
+
for h in hubs:
|
|
519
|
+
lines.append(
|
|
520
|
+
f" {h['type']} \u2192 {h['consumed']} consumers"
|
|
521
|
+
f" \u2192 split into:"
|
|
522
|
+
)
|
|
523
|
+
recs = HUB_SPLIT_RECOMMENDATIONS.get(h["type"], [])
|
|
524
|
+
if recs:
|
|
525
|
+
for rec in recs:
|
|
526
|
+
lines.append(f" - {rec}")
|
|
527
|
+
else:
|
|
528
|
+
lines.append(" - (analyze consumers to suggest sub-interfaces)")
|
|
529
|
+
|
|
530
|
+
return lines
|
|
531
|
+
|
|
532
|
+
def _render_side_effects(self, ctx: Dict[str, Any]) -> List[str]:
|
|
533
|
+
se = ctx["side_effects"]
|
|
534
|
+
lines = ["SIDE_EFFECTS:"]
|
|
535
|
+
|
|
536
|
+
for category, funcs in se.items():
|
|
537
|
+
if funcs:
|
|
538
|
+
lines.append(
|
|
539
|
+
f" {category + ':':<10s} {', '.join(funcs[:10])}"
|
|
540
|
+
)
|
|
541
|
+
|
|
542
|
+
# Pipeline purity summary
|
|
543
|
+
pipelines = ctx["pipelines"]
|
|
544
|
+
if pipelines:
|
|
545
|
+
lines.append("")
|
|
546
|
+
lines.append(" PIPELINE PURITY:")
|
|
547
|
+
for pl in pipelines:
|
|
548
|
+
ratio = pl["pure_count"] / pl["total_stages"] if pl["total_stages"] else 0
|
|
549
|
+
bar_len = int(ratio * 4)
|
|
550
|
+
bar = "\u2588" * bar_len + "\u2591" * (4 - bar_len)
|
|
551
|
+
pct = int(ratio * 100)
|
|
552
|
+
lines.append(
|
|
553
|
+
f" {pl['name']:<15s} {bar} {pct}% pure"
|
|
554
|
+
)
|
|
555
|
+
|
|
556
|
+
return lines
|
|
557
|
+
|
|
558
|
+
# ------------------------------------------------------------------
|
|
559
|
+
# utility helpers
|
|
560
|
+
# ------------------------------------------------------------------
|
|
561
|
+
def _is_excluded(self, path: str) -> bool:
|
|
562
|
+
if not path:
|
|
563
|
+
return False
|
|
564
|
+
path_lower = path.lower().replace('\\', '/')
|
|
565
|
+
for pattern in EXCLUDE_PATTERNS:
|
|
566
|
+
if f'/{pattern}/' in path_lower or path_lower.startswith(f'{pattern}/'):
|
|
567
|
+
return True
|
|
568
|
+
if pattern in path_lower.split('/'):
|
|
569
|
+
return True
|
|
570
|
+
return False
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""JSON Exporter for code2flow."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from .base import Exporter
|
|
6
|
+
from ..core.models import AnalysisResult
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class JSONExporter(Exporter):
|
|
10
|
+
"""Export to JSON format."""
|
|
11
|
+
|
|
12
|
+
def export(self, result: AnalysisResult, output_path: str, compact: bool = True, include_defaults: bool = False) -> None:
|
|
13
|
+
"""Export to JSON file."""
|
|
14
|
+
data = result.to_dict(compact=compact and not include_defaults)
|
|
15
|
+
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
|
|
16
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
|
17
|
+
json.dump(data, f, indent=2 if not compact else None, ensure_ascii=False)
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Backward-compatibility shim: LLMPromptExporter → ContextExporter.
|
|
2
|
+
|
|
3
|
+
The canonical implementation is now in context_exporter.py.
|
|
4
|
+
This module re-exports the old name for backward compatibility.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .context_exporter import ContextExporter
|
|
8
|
+
|
|
9
|
+
# Backward-compat alias
|
|
10
|
+
LLMPromptExporter = ContextExporter
|
|
11
|
+
|
|
12
|
+
__all__ = ["LLMPromptExporter", "ContextExporter"]
|