code2flow-toon 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code2flow/__init__.py +47 -0
- code2flow/__main__.py +6 -0
- code2flow/analysis/__init__.py +17 -0
- code2flow/analysis/call_graph.py +210 -0
- code2flow/analysis/cfg.py +293 -0
- code2flow/analysis/coupling.py +77 -0
- code2flow/analysis/data_analysis.py +249 -0
- code2flow/analysis/dfg.py +224 -0
- code2flow/analysis/smells.py +192 -0
- code2flow/cli.py +464 -0
- code2flow/core/__init__.py +36 -0
- code2flow/core/analyzer.py +765 -0
- code2flow/core/config.py +177 -0
- code2flow/core/models.py +194 -0
- code2flow/core/streaming_analyzer.py +666 -0
- code2flow/exporters/__init__.py +17 -0
- code2flow/exporters/base.py +13 -0
- code2flow/exporters/json_exporter.py +17 -0
- code2flow/exporters/llm_exporter.py +199 -0
- code2flow/exporters/mermaid_exporter.py +67 -0
- code2flow/exporters/toon.py +401 -0
- code2flow/exporters/yaml_exporter.py +108 -0
- code2flow/llm_flow_generator.py +451 -0
- code2flow/llm_task_generator.py +263 -0
- code2flow/mermaid_generator.py +481 -0
- code2flow/nlp/__init__.py +23 -0
- code2flow/nlp/config.py +174 -0
- code2flow/nlp/entity_resolution.py +326 -0
- code2flow/nlp/intent_matching.py +297 -0
- code2flow/nlp/normalization.py +122 -0
- code2flow/nlp/pipeline.py +388 -0
- code2flow/patterns/__init__.py +0 -0
- code2flow/patterns/detector.py +168 -0
- code2flow/refactor/__init__.py +0 -0
- code2flow/refactor/prompt_engine.py +150 -0
- code2flow/visualizers/__init__.py +0 -0
- code2flow/visualizers/graph.py +196 -0
- code2flow_toon-0.2.4.dist-info/METADATA +599 -0
- code2flow_toon-0.2.4.dist-info/RECORD +43 -0
- code2flow_toon-0.2.4.dist-info/WHEEL +5 -0
- code2flow_toon-0.2.4.dist-info/entry_points.txt +2 -0
- code2flow_toon-0.2.4.dist-info/licenses/LICENSE +201 -0
- code2flow_toon-0.2.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,451 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import re
|
|
3
|
+
import sys
|
|
4
|
+
from collections import Counter, defaultdict, deque
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Dict, Iterable, List, Optional, Set, Tuple
|
|
8
|
+
|
|
9
|
+
import yaml
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
_FUNC_LABEL_PREFIX = "FUNC:"
|
|
13
|
+
_CALL_LABEL_PREFIX = "CALL "
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _strip_bom(text: str) -> str:
|
|
17
|
+
return text[1:] if text.startswith("\ufeff") else text
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _safe_read_yaml(path: Path) -> Dict[str, Any]:
|
|
21
|
+
raw = _strip_bom(path.read_text(encoding="utf-8"))
|
|
22
|
+
loaded = yaml.safe_load(raw) or {}
|
|
23
|
+
if not isinstance(loaded, dict):
|
|
24
|
+
raise ValueError("analysis.yaml must be a mapping at top-level")
|
|
25
|
+
return loaded
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _as_dict(d: Any) -> Dict[str, Any]:
|
|
29
|
+
return d if isinstance(d, dict) else {}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _as_list(v: Any) -> List[Any]:
|
|
33
|
+
return v if isinstance(v, list) else []
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _shorten(s: str, max_len: int) -> str:
|
|
37
|
+
s = (s or "").strip()
|
|
38
|
+
if len(s) <= max_len:
|
|
39
|
+
return s
|
|
40
|
+
return s[: max(0, max_len - 1)].rstrip() + "…"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _parse_call_label(label: str) -> Optional[str]:
|
|
44
|
+
label = (label or "").strip()
|
|
45
|
+
if not label.startswith(_CALL_LABEL_PREFIX):
|
|
46
|
+
return None
|
|
47
|
+
rest = label[len(_CALL_LABEL_PREFIX) :].strip()
|
|
48
|
+
rest = rest.replace("<", "").replace(">", "")
|
|
49
|
+
|
|
50
|
+
m = re.match(r"([A-Za-z_][A-Za-z0-9_\.]+)\s*\(", rest)
|
|
51
|
+
if m:
|
|
52
|
+
return m.group(1)
|
|
53
|
+
|
|
54
|
+
m = re.match(r"([A-Za-z_][A-Za-z0-9_\.]+)$", rest)
|
|
55
|
+
if m:
|
|
56
|
+
return m.group(1)
|
|
57
|
+
|
|
58
|
+
return None
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _parse_func_label(label: str) -> Optional[str]:
|
|
62
|
+
label = (label or "").strip()
|
|
63
|
+
if not label.startswith(_FUNC_LABEL_PREFIX):
|
|
64
|
+
return None
|
|
65
|
+
return label[len(_FUNC_LABEL_PREFIX) :].strip() or None
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@dataclass(frozen=True)
|
|
69
|
+
class FuncSummary:
|
|
70
|
+
name: str
|
|
71
|
+
file: Optional[str]
|
|
72
|
+
line: Optional[int]
|
|
73
|
+
decisions: Tuple[str, ...]
|
|
74
|
+
calls: Tuple[str, ...]
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _collect_nodes(analysis: Dict[str, Any]) -> Dict[int, Dict[str, Any]]:
|
|
78
|
+
nodes = analysis.get("nodes")
|
|
79
|
+
if not isinstance(nodes, dict):
|
|
80
|
+
return {}
|
|
81
|
+
|
|
82
|
+
parsed: Dict[int, Dict[str, Any]] = {}
|
|
83
|
+
for k, v in nodes.items():
|
|
84
|
+
try:
|
|
85
|
+
node_id = int(k)
|
|
86
|
+
except Exception:
|
|
87
|
+
continue
|
|
88
|
+
if isinstance(v, dict):
|
|
89
|
+
parsed[node_id] = v
|
|
90
|
+
return parsed
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _collect_entrypoints(nodes: Dict[int, Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
94
|
+
by_file: Dict[str, List[Dict[str, Any]]] = defaultdict(list)
|
|
95
|
+
for n in nodes.values():
|
|
96
|
+
f = n.get("file")
|
|
97
|
+
if isinstance(f, str):
|
|
98
|
+
by_file[f].append(n)
|
|
99
|
+
|
|
100
|
+
entrypoints: List[Dict[str, Any]] = []
|
|
101
|
+
for f, ns in by_file.items():
|
|
102
|
+
if not (f.endswith("__main__.py") or f.endswith("cli.py")):
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
main_funcs = [n for n in ns if n.get("type") == "FUNC" and isinstance(n.get("function"), str)]
|
|
106
|
+
for n in main_funcs:
|
|
107
|
+
entrypoints.append(
|
|
108
|
+
{
|
|
109
|
+
"kind": "cli" if f.endswith("cli.py") else "module_main",
|
|
110
|
+
"file": f,
|
|
111
|
+
"function": n.get("function"),
|
|
112
|
+
"line": n.get("line"),
|
|
113
|
+
}
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
uniq: Dict[str, Dict[str, Any]] = {}
|
|
117
|
+
for ep in entrypoints:
|
|
118
|
+
key = str(ep.get("function") or "")
|
|
119
|
+
if key and key not in uniq:
|
|
120
|
+
uniq[key] = ep
|
|
121
|
+
|
|
122
|
+
return list(uniq.values())
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _collect_functions(nodes: Dict[int, Dict[str, Any]]) -> Set[str]:
|
|
126
|
+
out: Set[str] = set()
|
|
127
|
+
for n in nodes.values():
|
|
128
|
+
if n.get("type") != "FUNC":
|
|
129
|
+
continue
|
|
130
|
+
fn = n.get("function")
|
|
131
|
+
if isinstance(fn, str) and fn:
|
|
132
|
+
out.add(fn)
|
|
133
|
+
else:
|
|
134
|
+
parsed = _parse_func_label(str(n.get("label") or ""))
|
|
135
|
+
if parsed:
|
|
136
|
+
out.add(parsed)
|
|
137
|
+
return out
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _node_counts_by_function(nodes: Dict[int, Dict[str, Any]]) -> Counter[str]:
|
|
141
|
+
counts: Counter[str] = Counter()
|
|
142
|
+
for n in nodes.values():
|
|
143
|
+
fn = n.get("function")
|
|
144
|
+
if isinstance(fn, str) and fn:
|
|
145
|
+
counts[fn] += 1
|
|
146
|
+
return counts
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def _pick_relevant_functions(
|
|
150
|
+
*,
|
|
151
|
+
entrypoints: List[Dict[str, Any]],
|
|
152
|
+
known_functions: Set[str],
|
|
153
|
+
func_summaries: Dict[str, FuncSummary],
|
|
154
|
+
nodes: Dict[int, Dict[str, Any]],
|
|
155
|
+
max_functions: int,
|
|
156
|
+
) -> List[str]:
|
|
157
|
+
"""Pick a compact but meaningful subset of functions.
|
|
158
|
+
|
|
159
|
+
In many real projects, the CFG "CALL" labels often point to external
|
|
160
|
+
functions (e.g. click.echo), so a pure call-graph reachability may select
|
|
161
|
+
almost nothing. Here we fall back to a scoring heuristic:
|
|
162
|
+
- start with entrypoints
|
|
163
|
+
- boost functions that have many nodes (more logic)
|
|
164
|
+
- boost functions with important keywords (extract, schema, openapi, dom, cli)
|
|
165
|
+
"""
|
|
166
|
+
|
|
167
|
+
roots = [str(ep.get("function") or "") for ep in entrypoints]
|
|
168
|
+
roots = [r for r in roots if r in known_functions]
|
|
169
|
+
|
|
170
|
+
counts = _node_counts_by_function(nodes)
|
|
171
|
+
|
|
172
|
+
keyword_boosts = [
|
|
173
|
+
(".cli.", 50),
|
|
174
|
+
(".extract.", 80),
|
|
175
|
+
("extract_schema", 120),
|
|
176
|
+
("extract_schema_to_file", 120),
|
|
177
|
+
("extract_appspec_to_file", 120),
|
|
178
|
+
("openapi", 60),
|
|
179
|
+
("dom", 40),
|
|
180
|
+
("makefile", 40),
|
|
181
|
+
("shell", 40),
|
|
182
|
+
("python", 40),
|
|
183
|
+
("validate", 20),
|
|
184
|
+
("discover", 20),
|
|
185
|
+
]
|
|
186
|
+
|
|
187
|
+
def score(fn: str) -> int:
|
|
188
|
+
s = 0
|
|
189
|
+
s += min(500, counts.get(fn, 0)) # node count baseline
|
|
190
|
+
for needle, boost in keyword_boosts:
|
|
191
|
+
if needle in fn:
|
|
192
|
+
s += boost
|
|
193
|
+
if fn in roots:
|
|
194
|
+
s += 1000
|
|
195
|
+
if fn in func_summaries and func_summaries[fn].decisions:
|
|
196
|
+
s += min(200, 10 * len(func_summaries[fn].decisions))
|
|
197
|
+
return s
|
|
198
|
+
|
|
199
|
+
scored = [(fn, score(fn)) for fn in known_functions]
|
|
200
|
+
scored.sort(key=lambda x: x[1], reverse=True)
|
|
201
|
+
|
|
202
|
+
picked: List[str] = []
|
|
203
|
+
for fn, _ in scored:
|
|
204
|
+
if len(picked) >= max_functions:
|
|
205
|
+
break
|
|
206
|
+
picked.append(fn)
|
|
207
|
+
|
|
208
|
+
return picked
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _summarize_functions(nodes: Dict[int, Dict[str, Any]], limit_decisions: int, limit_calls: int) -> Dict[str, FuncSummary]:
|
|
212
|
+
decisions_by_func: Dict[str, List[str]] = defaultdict(list)
|
|
213
|
+
calls_by_func: Dict[str, List[str]] = defaultdict(list)
|
|
214
|
+
loc_by_func: Dict[str, Tuple[Optional[str], Optional[int]]] = {}
|
|
215
|
+
|
|
216
|
+
for n in nodes.values():
|
|
217
|
+
fn = n.get("function")
|
|
218
|
+
if not isinstance(fn, str) or not fn:
|
|
219
|
+
continue
|
|
220
|
+
|
|
221
|
+
if fn not in loc_by_func:
|
|
222
|
+
loc_by_func[fn] = (
|
|
223
|
+
n.get("file") if isinstance(n.get("file"), str) else None,
|
|
224
|
+
n.get("line") if isinstance(n.get("line"), int) else None,
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
ntype = n.get("type")
|
|
228
|
+
label = str(n.get("label") or "")
|
|
229
|
+
|
|
230
|
+
if ntype == "IF":
|
|
231
|
+
decisions_by_func[fn].append(_shorten(label, 120))
|
|
232
|
+
elif ntype == "CALL":
|
|
233
|
+
callee = _parse_call_label(label)
|
|
234
|
+
if callee:
|
|
235
|
+
calls_by_func[fn].append(callee)
|
|
236
|
+
|
|
237
|
+
summaries: Dict[str, FuncSummary] = {}
|
|
238
|
+
for fn in set(list(decisions_by_func.keys()) + list(calls_by_func.keys()) + list(loc_by_func.keys())):
|
|
239
|
+
file, line = loc_by_func.get(fn, (None, None))
|
|
240
|
+
|
|
241
|
+
decision_counts = Counter(decisions_by_func.get(fn, []))
|
|
242
|
+
call_counts = Counter(calls_by_func.get(fn, []))
|
|
243
|
+
|
|
244
|
+
decisions = tuple([d for d, _ in decision_counts.most_common(limit_decisions)])
|
|
245
|
+
calls = tuple([c for c, _ in call_counts.most_common(limit_calls)])
|
|
246
|
+
|
|
247
|
+
summaries[fn] = FuncSummary(
|
|
248
|
+
name=fn,
|
|
249
|
+
file=file,
|
|
250
|
+
line=line,
|
|
251
|
+
decisions=decisions,
|
|
252
|
+
calls=calls,
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
return summaries
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def _build_call_graph(func_summaries: Dict[str, FuncSummary], known_functions: Set[str]) -> Dict[str, Set[str]]:
|
|
259
|
+
g: Dict[str, Set[str]] = defaultdict(set)
|
|
260
|
+
for fn, s in func_summaries.items():
|
|
261
|
+
for callee in s.calls:
|
|
262
|
+
if callee in known_functions:
|
|
263
|
+
g[fn].add(callee)
|
|
264
|
+
return g
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def _reachable(g: Dict[str, Set[str]], roots: Iterable[str], max_nodes: int) -> List[str]:
|
|
268
|
+
seen: Set[str] = set()
|
|
269
|
+
q: deque[str] = deque([r for r in roots if r])
|
|
270
|
+
|
|
271
|
+
while q and len(seen) < max_nodes:
|
|
272
|
+
cur = q.popleft()
|
|
273
|
+
if cur in seen:
|
|
274
|
+
continue
|
|
275
|
+
seen.add(cur)
|
|
276
|
+
for nxt in sorted(g.get(cur, set())):
|
|
277
|
+
if nxt not in seen:
|
|
278
|
+
q.append(nxt)
|
|
279
|
+
|
|
280
|
+
return list(seen)
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def generate_llm_flow(
|
|
284
|
+
analysis: Dict[str, Any],
|
|
285
|
+
max_functions: int,
|
|
286
|
+
limit_decisions: int,
|
|
287
|
+
limit_calls: int,
|
|
288
|
+
) -> Dict[str, Any]:
|
|
289
|
+
nodes = _collect_nodes(analysis)
|
|
290
|
+
entrypoints = _collect_entrypoints(nodes)
|
|
291
|
+
|
|
292
|
+
known_functions = _collect_functions(nodes)
|
|
293
|
+
func_summaries = _summarize_functions(nodes, limit_decisions=limit_decisions, limit_calls=limit_calls)
|
|
294
|
+
|
|
295
|
+
reachable = _pick_relevant_functions(
|
|
296
|
+
entrypoints=entrypoints,
|
|
297
|
+
known_functions=known_functions,
|
|
298
|
+
func_summaries=func_summaries,
|
|
299
|
+
nodes=nodes,
|
|
300
|
+
max_functions=max_functions,
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
functions_out: List[Dict[str, Any]] = []
|
|
304
|
+
for fn in sorted(reachable):
|
|
305
|
+
s = func_summaries.get(fn)
|
|
306
|
+
if not s:
|
|
307
|
+
continue
|
|
308
|
+
functions_out.append(
|
|
309
|
+
{
|
|
310
|
+
"name": s.name,
|
|
311
|
+
"file": s.file,
|
|
312
|
+
"line": s.line,
|
|
313
|
+
"decisions": list(s.decisions),
|
|
314
|
+
"calls": list(s.calls),
|
|
315
|
+
}
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
package_names = sorted({fn.split(".")[0] for fn in known_functions if "." in fn})
|
|
319
|
+
|
|
320
|
+
return {
|
|
321
|
+
"format": "llm_flow.v1",
|
|
322
|
+
"app": {
|
|
323
|
+
"packages": package_names,
|
|
324
|
+
"entrypoints": entrypoints,
|
|
325
|
+
},
|
|
326
|
+
"flow": {
|
|
327
|
+
"selected_functions": functions_out,
|
|
328
|
+
},
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def render_llm_flow_md(flow: Dict[str, Any]) -> str:
|
|
333
|
+
app = _as_dict(flow.get("app"))
|
|
334
|
+
entrypoints = _as_list(app.get("entrypoints"))
|
|
335
|
+
selected = _as_list(_as_dict(flow.get("flow")).get("selected_functions"))
|
|
336
|
+
|
|
337
|
+
lines: List[str] = []
|
|
338
|
+
lines.append("# LLM Flow Summary")
|
|
339
|
+
lines.append("")
|
|
340
|
+
|
|
341
|
+
pkgs = _as_list(app.get("packages"))
|
|
342
|
+
if pkgs:
|
|
343
|
+
lines.append("## Packages")
|
|
344
|
+
for p in pkgs:
|
|
345
|
+
lines.append(f"- {p}")
|
|
346
|
+
lines.append("")
|
|
347
|
+
|
|
348
|
+
if entrypoints:
|
|
349
|
+
lines.append("## Entrypoints")
|
|
350
|
+
for ep in entrypoints:
|
|
351
|
+
epd = _as_dict(ep)
|
|
352
|
+
fn = epd.get("function")
|
|
353
|
+
f = epd.get("file")
|
|
354
|
+
ln = epd.get("line")
|
|
355
|
+
lines.append(f"- {fn} ({f}:{ln})")
|
|
356
|
+
lines.append("")
|
|
357
|
+
|
|
358
|
+
lines.append("## Selected functions")
|
|
359
|
+
for f in selected:
|
|
360
|
+
fd = _as_dict(f)
|
|
361
|
+
name = fd.get("name")
|
|
362
|
+
file = fd.get("file")
|
|
363
|
+
line = fd.get("line")
|
|
364
|
+
lines.append(f"### {name}")
|
|
365
|
+
lines.append(f"- Location: {file}:{line}")
|
|
366
|
+
|
|
367
|
+
decisions = _as_list(fd.get("decisions"))
|
|
368
|
+
if decisions:
|
|
369
|
+
lines.append("- Decisions:")
|
|
370
|
+
for d in decisions:
|
|
371
|
+
lines.append(f" - {_shorten(str(d), 180)}")
|
|
372
|
+
|
|
373
|
+
calls = _as_list(fd.get("calls"))
|
|
374
|
+
if calls:
|
|
375
|
+
lines.append("- Calls:")
|
|
376
|
+
for c in calls:
|
|
377
|
+
lines.append(f" - {c}")
|
|
378
|
+
|
|
379
|
+
lines.append("")
|
|
380
|
+
|
|
381
|
+
return "\n".join(lines).rstrip() + "\n"
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def dump_yaml(data: Dict[str, Any]) -> str:
|
|
385
|
+
return yaml.safe_dump(
|
|
386
|
+
data,
|
|
387
|
+
sort_keys=False,
|
|
388
|
+
allow_unicode=True,
|
|
389
|
+
width=100,
|
|
390
|
+
default_flow_style=False,
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
|
|
394
|
+
def create_parser() -> argparse.ArgumentParser:
|
|
395
|
+
p = argparse.ArgumentParser(
|
|
396
|
+
prog="llm-flow-generator",
|
|
397
|
+
description="Generate compact LLM-friendly app flow summary from code2flow analysis.yaml",
|
|
398
|
+
)
|
|
399
|
+
p.add_argument(
|
|
400
|
+
"-i",
|
|
401
|
+
"--input",
|
|
402
|
+
default="./output/analysis.yaml",
|
|
403
|
+
help="Path to analysis.yaml (default: ./output/analysis.yaml)",
|
|
404
|
+
)
|
|
405
|
+
p.add_argument(
|
|
406
|
+
"-o",
|
|
407
|
+
"--output",
|
|
408
|
+
default="./output/llm_flow.yaml",
|
|
409
|
+
help="Output llm_flow.yaml path (default: ./output/llm_flow.yaml)",
|
|
410
|
+
)
|
|
411
|
+
p.add_argument(
|
|
412
|
+
"--md",
|
|
413
|
+
default=None,
|
|
414
|
+
help="Optional output Markdown summary path (e.g. ./output/llm_flow.md)",
|
|
415
|
+
)
|
|
416
|
+
p.add_argument("--max-functions", type=int, default=40)
|
|
417
|
+
p.add_argument("--limit-decisions", type=int, default=8)
|
|
418
|
+
p.add_argument("--limit-calls", type=int, default=12)
|
|
419
|
+
return p
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
def main(argv: Optional[List[str]] = None) -> int:
|
|
423
|
+
args = create_parser().parse_args(argv)
|
|
424
|
+
|
|
425
|
+
input_path = Path(args.input)
|
|
426
|
+
if not input_path.exists():
|
|
427
|
+
print(f"Error: input file not found: {input_path}", file=sys.stderr)
|
|
428
|
+
return 2
|
|
429
|
+
|
|
430
|
+
analysis = _safe_read_yaml(input_path)
|
|
431
|
+
flow = generate_llm_flow(
|
|
432
|
+
analysis,
|
|
433
|
+
max_functions=max(1, args.max_functions),
|
|
434
|
+
limit_decisions=max(0, args.limit_decisions),
|
|
435
|
+
limit_calls=max(0, args.limit_calls),
|
|
436
|
+
)
|
|
437
|
+
|
|
438
|
+
output_path = Path(args.output)
|
|
439
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
440
|
+
output_path.write_text(dump_yaml(flow), encoding="utf-8")
|
|
441
|
+
|
|
442
|
+
if args.md:
|
|
443
|
+
md_path = Path(args.md)
|
|
444
|
+
md_path.parent.mkdir(parents=True, exist_ok=True)
|
|
445
|
+
md_path.write_text(render_llm_flow_md(flow), encoding="utf-8")
|
|
446
|
+
|
|
447
|
+
return 0
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
if __name__ == "__main__":
|
|
451
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import sys
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
5
|
+
|
|
6
|
+
import yaml
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _strip_bom(text: str) -> str:
|
|
10
|
+
return text[1:] if text.startswith("\ufeff") else text
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _ensure_list(value: Any) -> List[Any]:
|
|
14
|
+
if value is None:
|
|
15
|
+
return []
|
|
16
|
+
if isinstance(value, list):
|
|
17
|
+
return value
|
|
18
|
+
return [value]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _deep_get(d: Dict[str, Any], path: Tuple[str, ...]) -> Any:
|
|
22
|
+
cur: Any = d
|
|
23
|
+
for key in path:
|
|
24
|
+
if not isinstance(cur, dict) or key not in cur:
|
|
25
|
+
return None
|
|
26
|
+
cur = cur[key]
|
|
27
|
+
return cur
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def normalize_llm_task(data: Dict[str, Any]) -> Dict[str, Any]:
|
|
31
|
+
task = data.get("task") or {}
|
|
32
|
+
context = data.get("context") or {}
|
|
33
|
+
deliverables = data.get("deliverables") or {}
|
|
34
|
+
interfaces = data.get("interfaces") or {}
|
|
35
|
+
rules = data.get("rules") or {}
|
|
36
|
+
acceptance = data.get("acceptance") or {}
|
|
37
|
+
examples = data.get("examples")
|
|
38
|
+
notes = data.get("notes_for_llm") or {}
|
|
39
|
+
|
|
40
|
+
normalized: Dict[str, Any] = {
|
|
41
|
+
"task": {
|
|
42
|
+
"title": task.get("title") or "",
|
|
43
|
+
"one_line_goal": task.get("one_line_goal") or "",
|
|
44
|
+
},
|
|
45
|
+
"context": {
|
|
46
|
+
"product_area": context.get("product_area") or "",
|
|
47
|
+
"current_behavior": context.get("current_behavior") or "",
|
|
48
|
+
"desired_behavior": context.get("desired_behavior") or "",
|
|
49
|
+
},
|
|
50
|
+
"deliverables": {
|
|
51
|
+
"language": deliverables.get("language") or "any",
|
|
52
|
+
"must_generate": _ensure_list(deliverables.get("must_generate")),
|
|
53
|
+
"files_to_create_or_edit": _ensure_list(deliverables.get("files_to_create_or_edit")),
|
|
54
|
+
},
|
|
55
|
+
"interfaces": {
|
|
56
|
+
"inputs": _ensure_list(interfaces.get("inputs")),
|
|
57
|
+
"outputs": _ensure_list(interfaces.get("outputs")),
|
|
58
|
+
},
|
|
59
|
+
"rules": {
|
|
60
|
+
"must": _ensure_list(rules.get("must")),
|
|
61
|
+
"must_not": _ensure_list(rules.get("must_not")),
|
|
62
|
+
"assumptions": _ensure_list(rules.get("assumptions")),
|
|
63
|
+
"edge_cases": _ensure_list(rules.get("edge_cases")),
|
|
64
|
+
"performance": _ensure_list(rules.get("performance")),
|
|
65
|
+
},
|
|
66
|
+
"acceptance": {
|
|
67
|
+
"tests": _ensure_list(acceptance.get("tests")),
|
|
68
|
+
"done_definition": _ensure_list(acceptance.get("done_definition")),
|
|
69
|
+
},
|
|
70
|
+
"examples": _ensure_list(examples),
|
|
71
|
+
"notes_for_llm": {
|
|
72
|
+
"constraints": _ensure_list(notes.get("constraints")),
|
|
73
|
+
"style": _ensure_list(notes.get("style")),
|
|
74
|
+
"language_specific_hints": _ensure_list(notes.get("language_specific_hints")),
|
|
75
|
+
},
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
return normalized
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
_SECTION_KEYS = {
|
|
82
|
+
"TITLE": ("task", "title"),
|
|
83
|
+
"GOAL": ("task", "one_line_goal"),
|
|
84
|
+
"PRODUCT_AREA": ("context", "product_area"),
|
|
85
|
+
"CURRENT": ("context", "current_behavior"),
|
|
86
|
+
"DESIRED": ("context", "desired_behavior"),
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _parse_bullets(lines: List[str]) -> List[str]:
|
|
91
|
+
items: List[str] = []
|
|
92
|
+
for raw in lines:
|
|
93
|
+
s = raw.strip()
|
|
94
|
+
if not s:
|
|
95
|
+
continue
|
|
96
|
+
if s.startswith("-"):
|
|
97
|
+
items.append(s[1:].strip())
|
|
98
|
+
else:
|
|
99
|
+
items.append(s)
|
|
100
|
+
return items
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def parse_llm_task_text(text: str) -> Dict[str, Any]:
|
|
104
|
+
text = _strip_bom(text)
|
|
105
|
+
lines = text.replace("\r\n", "\n").replace("\r", "\n").split("\n")
|
|
106
|
+
|
|
107
|
+
sections: Dict[str, List[str]] = {}
|
|
108
|
+
current: Optional[str] = None
|
|
109
|
+
|
|
110
|
+
def start_section(name: str) -> None:
|
|
111
|
+
nonlocal current
|
|
112
|
+
current = name
|
|
113
|
+
sections.setdefault(name, [])
|
|
114
|
+
|
|
115
|
+
for line in lines:
|
|
116
|
+
stripped = line.strip()
|
|
117
|
+
if not stripped:
|
|
118
|
+
if current is not None:
|
|
119
|
+
sections[current].append("")
|
|
120
|
+
continue
|
|
121
|
+
|
|
122
|
+
upper = stripped.upper()
|
|
123
|
+
if upper.endswith(":"):
|
|
124
|
+
key = upper[:-1].strip()
|
|
125
|
+
if key in {
|
|
126
|
+
"TITLE",
|
|
127
|
+
"GOAL",
|
|
128
|
+
"CURRENT",
|
|
129
|
+
"DESIRED",
|
|
130
|
+
"INPUTS",
|
|
131
|
+
"OUTPUTS",
|
|
132
|
+
"RULES (MUST)",
|
|
133
|
+
"RULES (MUST NOT)",
|
|
134
|
+
"EDGE CASES",
|
|
135
|
+
"ACCEPTANCE TESTS",
|
|
136
|
+
"DELIVERABLES",
|
|
137
|
+
}:
|
|
138
|
+
start_section(key)
|
|
139
|
+
continue
|
|
140
|
+
|
|
141
|
+
if current is None:
|
|
142
|
+
continue
|
|
143
|
+
sections[current].append(line)
|
|
144
|
+
|
|
145
|
+
data: Dict[str, Any] = {
|
|
146
|
+
"task": {"title": "", "one_line_goal": ""},
|
|
147
|
+
"context": {"product_area": "", "current_behavior": "", "desired_behavior": ""},
|
|
148
|
+
"deliverables": {"language": "any", "must_generate": [], "files_to_create_or_edit": []},
|
|
149
|
+
"interfaces": {"inputs": [], "outputs": []},
|
|
150
|
+
"rules": {"must": [], "must_not": [], "assumptions": [], "edge_cases": [], "performance": []},
|
|
151
|
+
"acceptance": {"tests": [], "done_definition": []},
|
|
152
|
+
"examples": [],
|
|
153
|
+
"notes_for_llm": {"constraints": [], "style": [], "language_specific_hints": []},
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
for section_name, path in _SECTION_KEYS.items():
|
|
157
|
+
content_lines = sections.get(section_name)
|
|
158
|
+
if not content_lines:
|
|
159
|
+
continue
|
|
160
|
+
value = "\n".join(content_lines).strip()
|
|
161
|
+
if value:
|
|
162
|
+
parent = data
|
|
163
|
+
for key in path[:-1]:
|
|
164
|
+
parent = parent[key]
|
|
165
|
+
parent[path[-1]] = value
|
|
166
|
+
|
|
167
|
+
if sections.get("INPUTS"):
|
|
168
|
+
data["interfaces"]["inputs"] = _parse_bullets(sections["INPUTS"])
|
|
169
|
+
if sections.get("OUTPUTS"):
|
|
170
|
+
data["interfaces"]["outputs"] = _parse_bullets(sections["OUTPUTS"])
|
|
171
|
+
if sections.get("RULES (MUST)"):
|
|
172
|
+
data["rules"]["must"] = _parse_bullets(sections["RULES (MUST)"])
|
|
173
|
+
if sections.get("RULES (MUST NOT)"):
|
|
174
|
+
data["rules"]["must_not"] = _parse_bullets(sections["RULES (MUST NOT)"])
|
|
175
|
+
if sections.get("EDGE CASES"):
|
|
176
|
+
data["rules"]["edge_cases"] = _parse_bullets(sections["EDGE CASES"])
|
|
177
|
+
|
|
178
|
+
if sections.get("ACCEPTANCE TESTS"):
|
|
179
|
+
tests: List[Dict[str, str]] = []
|
|
180
|
+
raw_items = _parse_bullets(sections["ACCEPTANCE TESTS"])
|
|
181
|
+
for idx, item in enumerate(raw_items, 1):
|
|
182
|
+
tests.append({"name": f"test_{idx}", "given": "", "when": "", "then": item})
|
|
183
|
+
data["acceptance"]["tests"] = tests
|
|
184
|
+
|
|
185
|
+
if sections.get("DELIVERABLES"):
|
|
186
|
+
data["deliverables"]["must_generate"] = _parse_bullets(sections["DELIVERABLES"])
|
|
187
|
+
|
|
188
|
+
return data
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def load_input(path: Path) -> Dict[str, Any]:
|
|
192
|
+
raw = path.read_text(encoding="utf-8")
|
|
193
|
+
raw = _strip_bom(raw)
|
|
194
|
+
|
|
195
|
+
if path.suffix.lower() in {".yaml", ".yml"}:
|
|
196
|
+
loaded = yaml.safe_load(raw) or {}
|
|
197
|
+
if not isinstance(loaded, dict):
|
|
198
|
+
raise ValueError("YAML input must be a mapping/object at top level")
|
|
199
|
+
return loaded
|
|
200
|
+
|
|
201
|
+
if path.suffix.lower() == ".json":
|
|
202
|
+
import json
|
|
203
|
+
|
|
204
|
+
loaded = json.loads(raw)
|
|
205
|
+
if not isinstance(loaded, dict):
|
|
206
|
+
raise ValueError("JSON input must be an object at top level")
|
|
207
|
+
return loaded
|
|
208
|
+
|
|
209
|
+
return parse_llm_task_text(raw)
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def dump_yaml(data: Dict[str, Any]) -> str:
|
|
213
|
+
return yaml.safe_dump(
|
|
214
|
+
data,
|
|
215
|
+
sort_keys=False,
|
|
216
|
+
allow_unicode=True,
|
|
217
|
+
width=100,
|
|
218
|
+
default_flow_style=False,
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def create_parser() -> argparse.ArgumentParser:
|
|
223
|
+
p = argparse.ArgumentParser(
|
|
224
|
+
prog="llm-task-generator",
|
|
225
|
+
description="Generate normalized llm_task.yaml from simplified task spec (text/YAML/JSON).",
|
|
226
|
+
)
|
|
227
|
+
p.add_argument("-i", "--input", required=True, help="Input file: .txt/.md/.yaml/.yml/.json")
|
|
228
|
+
p.add_argument("-o", "--output", required=True, help="Output YAML file path")
|
|
229
|
+
p.add_argument(
|
|
230
|
+
"--validate-only",
|
|
231
|
+
action="store_true",
|
|
232
|
+
help="Only validate/normalize input; do not write output file",
|
|
233
|
+
)
|
|
234
|
+
return p
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def main(argv: Optional[List[str]] = None) -> int:
|
|
238
|
+
args = create_parser().parse_args(argv)
|
|
239
|
+
|
|
240
|
+
input_path = Path(args.input)
|
|
241
|
+
if not input_path.exists():
|
|
242
|
+
print(f"Error: input file not found: {input_path}", file=sys.stderr)
|
|
243
|
+
return 2
|
|
244
|
+
|
|
245
|
+
data = load_input(input_path)
|
|
246
|
+
|
|
247
|
+
if "task" not in data:
|
|
248
|
+
data = {"task": data}
|
|
249
|
+
|
|
250
|
+
normalized = normalize_llm_task(data)
|
|
251
|
+
|
|
252
|
+
if args.validate_only:
|
|
253
|
+
sys.stdout.write(dump_yaml(normalized))
|
|
254
|
+
return 0
|
|
255
|
+
|
|
256
|
+
output_path = Path(args.output)
|
|
257
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
258
|
+
output_path.write_text(dump_yaml(normalized), encoding="utf-8")
|
|
259
|
+
return 0
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
if __name__ == "__main__":
|
|
263
|
+
raise SystemExit(main())
|