codedebrief 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codedebrief/__init__.py +12 -0
- codedebrief/analysis/__init__.py +16 -0
- codedebrief/analysis/common.py +527 -0
- codedebrief/analysis/discovery.py +100 -0
- codedebrief/analysis/languages/__init__.py +6 -0
- codedebrief/analysis/languages/_common.py +68 -0
- codedebrief/analysis/languages/c.py +96 -0
- codedebrief/analysis/languages/cpp.py +146 -0
- codedebrief/analysis/languages/csharp.py +137 -0
- codedebrief/analysis/languages/go.py +157 -0
- codedebrief/analysis/languages/java.py +158 -0
- codedebrief/analysis/languages/php.py +83 -0
- codedebrief/analysis/languages/ruby.py +75 -0
- codedebrief/analysis/languages/rust.py +96 -0
- codedebrief/analysis/project.py +373 -0
- codedebrief/analysis/python.py +939 -0
- codedebrief/analysis/registry.py +320 -0
- codedebrief/analysis/treesitter.py +884 -0
- codedebrief/analysis/typescript.py +1019 -0
- codedebrief/artifacts.py +49 -0
- codedebrief/cli.py +585 -0
- codedebrief/config.py +226 -0
- codedebrief/doctor.py +175 -0
- codedebrief/install.py +441 -0
- codedebrief/mcp_server.py +2720 -0
- codedebrief/model.py +189 -0
- codedebrief/py.typed +1 -0
- codedebrief/quality.py +392 -0
- codedebrief/query.py +641 -0
- codedebrief/render/__init__.py +6 -0
- codedebrief/render/assets/generated/codedebrief-viewer-runtime.iife.js +10 -0
- codedebrief/render/assets/panels.js +462 -0
- codedebrief/render/assets/shell.js +1649 -0
- codedebrief/render/assets/styles.css +1715 -0
- codedebrief/render/assets/tree.js +616 -0
- codedebrief/render/html.py +191 -0
- codedebrief/render/markdown.py +153 -0
- codedebrief/render/payload.py +326 -0
- codedebrief/render/snapshot.py +769 -0
- codedebrief/schema/codedebrief.schema.json +449 -0
- codedebrief/util.py +65 -0
- codedebrief/validation.py +214 -0
- codedebrief-0.11.0.dist-info/METADATA +426 -0
- codedebrief-0.11.0.dist-info/RECORD +48 -0
- codedebrief-0.11.0.dist-info/WHEEL +4 -0
- codedebrief-0.11.0.dist-info/entry_points.txt +2 -0
- codedebrief-0.11.0.dist-info/licenses/LICENSE +176 -0
- codedebrief-0.11.0.dist-info/licenses/NOTICE +9 -0
codedebrief/query.py
ADDED
|
@@ -0,0 +1,641 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from collections import deque
|
|
5
|
+
from collections.abc import Iterable
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from codedebrief.model import Flow, FlowNode, NodeKind, ProjectModel
|
|
11
|
+
from codedebrief.util import metadata_scope_names
|
|
12
|
+
|
|
13
|
+
# Per-bucket relevance weights. Named constants instead of inline magic numbers so the
|
|
14
|
+
# ranking model is auditable and the tests can assert exact scores.
|
|
15
|
+
IDENTITY_WEIGHT = 6
|
|
16
|
+
NODE_WEIGHT = 3
|
|
17
|
+
STRUCTURE_WEIGHT = 5
|
|
18
|
+
METADATA_WEIGHT = 2
|
|
19
|
+
# Tie-breaker only: nudges an entrypoint above an otherwise-equal non-entrypoint. Added
|
|
20
|
+
# only when the term-overlap score is already > 0, so it never manufactures a match.
|
|
21
|
+
ENTRYPOINT_BONUS = 1
|
|
22
|
+
NAVIGATION_TOKENS_PER_ITEM = 60
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass(slots=True)
|
|
26
|
+
class QueryMatch:
|
|
27
|
+
flow: Flow
|
|
28
|
+
score: int
|
|
29
|
+
reasons: list[str]
|
|
30
|
+
|
|
31
|
+
def to_dict(self, include_source: bool = True) -> dict[str, Any]:
|
|
32
|
+
"""The single serialization used by MCP query/context tools."""
|
|
33
|
+
payload: dict[str, Any] = {
|
|
34
|
+
"flow_id": self.flow.id,
|
|
35
|
+
"name": self.flow.name,
|
|
36
|
+
"language": self.flow.language,
|
|
37
|
+
"entry_kind": self.flow.entry_kind,
|
|
38
|
+
"framework": self.flow.framework,
|
|
39
|
+
"scope": metadata_scope_names(self.flow.metadata),
|
|
40
|
+
"score": self.score,
|
|
41
|
+
"reasons": self.reasons,
|
|
42
|
+
"subgraph_flow_ids": [self.flow.id],
|
|
43
|
+
"next_tools": {
|
|
44
|
+
"agent_context": {
|
|
45
|
+
"tool": "agent_context",
|
|
46
|
+
"arguments": {"flow_id": self.flow.id},
|
|
47
|
+
},
|
|
48
|
+
"snapshot_slice": {
|
|
49
|
+
"tool": "snapshot_slice",
|
|
50
|
+
"arguments": {
|
|
51
|
+
"flow_ids": [self.flow.id],
|
|
52
|
+
"format": "svg",
|
|
53
|
+
"include_svg": False,
|
|
54
|
+
},
|
|
55
|
+
},
|
|
56
|
+
"expand_slice": {
|
|
57
|
+
"tool": "expand_slice",
|
|
58
|
+
"arguments": {"flow_ids": [self.flow.id], "direction": "neighbors"},
|
|
59
|
+
},
|
|
60
|
+
},
|
|
61
|
+
}
|
|
62
|
+
if include_source:
|
|
63
|
+
payload["source"] = f"{self.flow.location.path}:{self.flow.location.start_line}"
|
|
64
|
+
return payload
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@dataclass(slots=True)
|
|
68
|
+
class ImpactResult:
|
|
69
|
+
changed_files: list[str]
|
|
70
|
+
directly_impacted: list[Flow]
|
|
71
|
+
transitively_impacted: list[Flow]
|
|
72
|
+
impact_reasons: dict[str, list[str]] = field(default_factory=dict)
|
|
73
|
+
target_flow_ids: list[str] = field(default_factory=list)
|
|
74
|
+
target_symbols: list[str] = field(default_factory=list)
|
|
75
|
+
target_dependency_paths: list[str] = field(default_factory=list)
|
|
76
|
+
unresolved_targets: list[dict[str, str]] = field(default_factory=list)
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def all_flows(self) -> list[Flow]:
|
|
80
|
+
seen: dict[str, Flow] = {}
|
|
81
|
+
for flow in self.directly_impacted + self.transitively_impacted:
|
|
82
|
+
seen[flow.id] = flow
|
|
83
|
+
return list(seen.values())
|
|
84
|
+
|
|
85
|
+
@property
|
|
86
|
+
def subgraph_flow_ids(self) -> list[str]:
|
|
87
|
+
return [flow.id for flow in self.all_flows]
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def query_model(
|
|
91
|
+
model: ProjectModel,
|
|
92
|
+
question: str,
|
|
93
|
+
limit: int = 10,
|
|
94
|
+
scope: str | None = None,
|
|
95
|
+
language: str | None = None,
|
|
96
|
+
source_path: str | None = None,
|
|
97
|
+
symbol: str | None = None,
|
|
98
|
+
domain: str | None = None,
|
|
99
|
+
value: str | None = None,
|
|
100
|
+
) -> list[QueryMatch]:
|
|
101
|
+
terms = _terms(question)
|
|
102
|
+
has_structured_filter = any(
|
|
103
|
+
item is not None
|
|
104
|
+
for item in (
|
|
105
|
+
scope,
|
|
106
|
+
language,
|
|
107
|
+
source_path,
|
|
108
|
+
symbol,
|
|
109
|
+
domain,
|
|
110
|
+
value,
|
|
111
|
+
)
|
|
112
|
+
)
|
|
113
|
+
if not terms and not has_structured_filter:
|
|
114
|
+
# A blank or punctuation-only question has nothing to rank against. Returning []
|
|
115
|
+
# (rather than every entrypoint) makes the CLI print "No matching logic flows
|
|
116
|
+
# found." instead of garbage filler.
|
|
117
|
+
return []
|
|
118
|
+
# Dedup query terms before scoring so repeating a word ("user user user") cannot
|
|
119
|
+
# inflate a flow's rank. dict.fromkeys preserves order for stable reason text.
|
|
120
|
+
unique_terms = list(dict.fromkeys(terms))
|
|
121
|
+
|
|
122
|
+
matches: list[QueryMatch] = []
|
|
123
|
+
for flow in model.flows:
|
|
124
|
+
if not flow_in_scope(flow, scope):
|
|
125
|
+
continue
|
|
126
|
+
if language is not None and flow.language != language:
|
|
127
|
+
continue
|
|
128
|
+
filter_reasons = _structured_query_filter_reasons(
|
|
129
|
+
flow,
|
|
130
|
+
source_path=source_path,
|
|
131
|
+
symbol=symbol,
|
|
132
|
+
domain=domain,
|
|
133
|
+
value=value,
|
|
134
|
+
)
|
|
135
|
+
if filter_reasons is None:
|
|
136
|
+
continue
|
|
137
|
+
# Match on tokens, not substrings: "order" must not match inside "reordering".
|
|
138
|
+
name_tokens = _tokenize(f"{flow.name} {flow.symbol}")
|
|
139
|
+
node_tokens = _tokenize(" ".join(node.label for node in flow.nodes))
|
|
140
|
+
structure_tokens = _tokenize(
|
|
141
|
+
" ".join(
|
|
142
|
+
[
|
|
143
|
+
flow.location.path,
|
|
144
|
+
flow.language,
|
|
145
|
+
" ".join(metadata_scope_names(flow.metadata)),
|
|
146
|
+
]
|
|
147
|
+
)
|
|
148
|
+
)
|
|
149
|
+
metadata_tokens = _flow_metadata_tokens(flow)
|
|
150
|
+
score = 0
|
|
151
|
+
reasons: list[str] = []
|
|
152
|
+
for term in unique_terms:
|
|
153
|
+
if _term_matches(term, name_tokens):
|
|
154
|
+
score += IDENTITY_WEIGHT
|
|
155
|
+
reasons.append(f"`{term}` matches the flow identity")
|
|
156
|
+
if _term_matches(term, node_tokens):
|
|
157
|
+
score += NODE_WEIGHT
|
|
158
|
+
reasons.append(f"`{term}` appears in a decision or action")
|
|
159
|
+
if _term_matches(term, structure_tokens):
|
|
160
|
+
score += STRUCTURE_WEIGHT
|
|
161
|
+
reasons.append(f"`{term}` matches flow structure")
|
|
162
|
+
if _term_matches(term, metadata_tokens):
|
|
163
|
+
score += METADATA_WEIGHT
|
|
164
|
+
reasons.append(f"`{term}` appears in decision metadata")
|
|
165
|
+
if filter_reasons:
|
|
166
|
+
score += STRUCTURE_WEIGHT * len(filter_reasons)
|
|
167
|
+
reasons.extend(filter_reasons)
|
|
168
|
+
# The entrypoint bonus is a tie-breaker among real matches, never a match on its
|
|
169
|
+
# own: only add it once the term-overlap score is already positive.
|
|
170
|
+
if score:
|
|
171
|
+
if flow.is_entrypoint:
|
|
172
|
+
score += ENTRYPOINT_BONUS
|
|
173
|
+
matches.append(QueryMatch(flow, score, list(dict.fromkeys(reasons))))
|
|
174
|
+
# Deterministic order: score desc, then name, then unique id, so equal score+name is
|
|
175
|
+
# stable regardless of flow insertion order.
|
|
176
|
+
matches.sort(key=lambda item: (-item.score, item.flow.name, item.flow.id))
|
|
177
|
+
if limit and limit > 0:
|
|
178
|
+
matches = matches[:limit]
|
|
179
|
+
return matches
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def impact_model(
|
|
183
|
+
model: ProjectModel,
|
|
184
|
+
changed_files: list[str],
|
|
185
|
+
scope: str | None = None,
|
|
186
|
+
*,
|
|
187
|
+
flow_ids: list[str] | None = None,
|
|
188
|
+
symbols: list[str] | None = None,
|
|
189
|
+
dependency_paths: list[str] | None = None,
|
|
190
|
+
) -> ImpactResult:
|
|
191
|
+
normalized = {_normalize_path(item) for item in changed_files}
|
|
192
|
+
flows = [flow for flow in model.flows if flow_in_scope(flow, scope)]
|
|
193
|
+
direct = [flow for flow in flows if _normalize_path(flow.location.path) in normalized]
|
|
194
|
+
by_id = {flow.id: flow for flow in model.flows}
|
|
195
|
+
scoped_ids = {flow.id for flow in flows}
|
|
196
|
+
target_flow_ids = _unique(flow_ids or [])
|
|
197
|
+
target_symbols = _unique(symbols or [])
|
|
198
|
+
target_dependency_paths = _unique(_normalize_path(item) for item in dependency_paths or [])
|
|
199
|
+
unresolved_targets: list[dict[str, str]] = []
|
|
200
|
+
direct_by_id: dict[str, Flow] = {}
|
|
201
|
+
impact_reasons: dict[str, list[str]] = {}
|
|
202
|
+
|
|
203
|
+
def add_reason(flow: Flow, reason: str) -> None:
|
|
204
|
+
reasons = impact_reasons.setdefault(flow.id, [])
|
|
205
|
+
if reason not in reasons:
|
|
206
|
+
reasons.append(reason)
|
|
207
|
+
|
|
208
|
+
for flow in direct:
|
|
209
|
+
direct_by_id[flow.id] = flow
|
|
210
|
+
add_reason(flow, f"source file changed `{_normalize_path(flow.location.path)}`")
|
|
211
|
+
|
|
212
|
+
for file_record in model.files:
|
|
213
|
+
dependency_matches = sorted(
|
|
214
|
+
dependency
|
|
215
|
+
for dependency in {_normalize_path(item) for item in file_record.dependencies}
|
|
216
|
+
if dependency in normalized
|
|
217
|
+
)
|
|
218
|
+
if not dependency_matches:
|
|
219
|
+
continue
|
|
220
|
+
for flow_id in file_record.flow_ids:
|
|
221
|
+
dependent_flow = by_id.get(flow_id)
|
|
222
|
+
if dependent_flow is None or dependent_flow.id not in scoped_ids:
|
|
223
|
+
continue
|
|
224
|
+
direct_by_id[dependent_flow.id] = dependent_flow
|
|
225
|
+
for dependency in dependency_matches:
|
|
226
|
+
add_reason(dependent_flow, f"depends on changed file `{dependency}`")
|
|
227
|
+
|
|
228
|
+
def add_flow(flow: Flow, target_type: str, value: str, reason: str) -> None:
|
|
229
|
+
if flow.id not in scoped_ids:
|
|
230
|
+
unresolved_targets.append(
|
|
231
|
+
{"type": target_type, "value": value, "reason": "scope_filtered"}
|
|
232
|
+
)
|
|
233
|
+
return
|
|
234
|
+
direct_by_id[flow.id] = flow
|
|
235
|
+
add_reason(flow, reason)
|
|
236
|
+
|
|
237
|
+
for flow_id in target_flow_ids:
|
|
238
|
+
target_flow = by_id.get(flow_id)
|
|
239
|
+
if target_flow is None:
|
|
240
|
+
unresolved_targets.append({"type": "flow", "value": flow_id, "reason": "not_found"})
|
|
241
|
+
continue
|
|
242
|
+
add_flow(target_flow, "flow", flow_id, f"explicit flow target `{flow_id}`")
|
|
243
|
+
|
|
244
|
+
for symbol in target_symbols:
|
|
245
|
+
matches = [flow for flow in model.flows if symbol in (flow.symbol, flow.name)]
|
|
246
|
+
if not matches:
|
|
247
|
+
unresolved_targets.append({"type": "symbol", "value": symbol, "reason": "not_found"})
|
|
248
|
+
continue
|
|
249
|
+
for flow in matches:
|
|
250
|
+
add_flow(flow, "symbol", symbol, f"explicit symbol/name target `{symbol}`")
|
|
251
|
+
|
|
252
|
+
for dependency_path in target_dependency_paths:
|
|
253
|
+
matches = [
|
|
254
|
+
flow
|
|
255
|
+
for flow in model.flows
|
|
256
|
+
if _path_matches_dependency(_normalize_path(flow.location.path), dependency_path)
|
|
257
|
+
]
|
|
258
|
+
if not matches:
|
|
259
|
+
unresolved_targets.append(
|
|
260
|
+
{"type": "dependency_path", "value": dependency_path, "reason": "not_found"}
|
|
261
|
+
)
|
|
262
|
+
continue
|
|
263
|
+
scoped_matches = [flow for flow in matches if flow.id in scoped_ids]
|
|
264
|
+
if not scoped_matches:
|
|
265
|
+
unresolved_targets.append(
|
|
266
|
+
{
|
|
267
|
+
"type": "dependency_path",
|
|
268
|
+
"value": dependency_path,
|
|
269
|
+
"reason": "scope_filtered",
|
|
270
|
+
}
|
|
271
|
+
)
|
|
272
|
+
continue
|
|
273
|
+
for flow in scoped_matches:
|
|
274
|
+
direct_by_id[flow.id] = flow
|
|
275
|
+
add_reason(flow, f"dependency path target `{dependency_path}`")
|
|
276
|
+
|
|
277
|
+
direct = list(direct_by_id.values())
|
|
278
|
+
impacted_ids = set(direct_by_id)
|
|
279
|
+
queue: deque[str] = deque(impacted_ids)
|
|
280
|
+
transitive: list[Flow] = []
|
|
281
|
+
while queue:
|
|
282
|
+
current = by_id.get(queue.popleft())
|
|
283
|
+
if current is None:
|
|
284
|
+
continue
|
|
285
|
+
for caller_id in current.called_by:
|
|
286
|
+
if caller_id in impacted_ids:
|
|
287
|
+
continue
|
|
288
|
+
impacted_ids.add(caller_id)
|
|
289
|
+
queue.append(caller_id)
|
|
290
|
+
caller = by_id.get(caller_id)
|
|
291
|
+
if caller:
|
|
292
|
+
transitive.append(caller)
|
|
293
|
+
add_reason(caller, f"calls impacted flow `{current.name}`")
|
|
294
|
+
|
|
295
|
+
transitive = [flow for flow in transitive if flow_in_scope(flow, scope)]
|
|
296
|
+
impacted_ids = {flow.id for flow in direct} | {flow.id for flow in transitive}
|
|
297
|
+
scoped_impact_reasons = {
|
|
298
|
+
flow_id: reasons
|
|
299
|
+
for flow_id, reasons in sorted(impact_reasons.items())
|
|
300
|
+
if flow_id in impacted_ids
|
|
301
|
+
}
|
|
302
|
+
return ImpactResult(
|
|
303
|
+
changed_files=sorted(normalized),
|
|
304
|
+
directly_impacted=sorted(direct, key=lambda item: item.name),
|
|
305
|
+
transitively_impacted=sorted(transitive, key=lambda item: item.name),
|
|
306
|
+
impact_reasons=scoped_impact_reasons,
|
|
307
|
+
target_flow_ids=target_flow_ids,
|
|
308
|
+
target_symbols=target_symbols,
|
|
309
|
+
target_dependency_paths=target_dependency_paths,
|
|
310
|
+
unresolved_targets=unresolved_targets,
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
def flow_navigation(
|
|
315
|
+
model: ProjectModel,
|
|
316
|
+
target: str,
|
|
317
|
+
token_budget: int = 0,
|
|
318
|
+
) -> dict[str, Any]:
|
|
319
|
+
"""A bounded navigation pack for one flow, shared by CLI and MCP."""
|
|
320
|
+
flow, error = _resolve_flow_target(model, target)
|
|
321
|
+
if error is not None:
|
|
322
|
+
return error
|
|
323
|
+
assert flow is not None
|
|
324
|
+
by_id = {item.id: item for item in model.flows}
|
|
325
|
+
scope = metadata_scope_names(flow.metadata)
|
|
326
|
+
primary_scope = scope[0] if scope else None
|
|
327
|
+
return {
|
|
328
|
+
"flow": {
|
|
329
|
+
**flow_summary(flow),
|
|
330
|
+
"symbol": flow.symbol,
|
|
331
|
+
"is_entrypoint": flow.is_entrypoint,
|
|
332
|
+
"nodes": len(flow.nodes),
|
|
333
|
+
"edges": len(flow.edges),
|
|
334
|
+
"decisions": sum(node.kind is NodeKind.DECISION for node in flow.nodes),
|
|
335
|
+
"calls": len(flow.calls),
|
|
336
|
+
"callers": len(flow.called_by),
|
|
337
|
+
"tests": flow.tests,
|
|
338
|
+
},
|
|
339
|
+
"called_flows": _navigation_cap(_related_flow_summaries(flow.calls, by_id), token_budget),
|
|
340
|
+
"caller_flows": _navigation_cap(
|
|
341
|
+
_related_flow_summaries(flow.called_by, by_id), token_budget
|
|
342
|
+
),
|
|
343
|
+
"unresolved_call_ids": [target_id for target_id in flow.calls if target_id not in by_id],
|
|
344
|
+
"decision_nodes": _navigation_cap(
|
|
345
|
+
[_decision_navigation(node) for node in flow.nodes if node.kind is NodeKind.DECISION],
|
|
346
|
+
token_budget,
|
|
347
|
+
),
|
|
348
|
+
"next_tools": {
|
|
349
|
+
"agent_context": {
|
|
350
|
+
"tool": "agent_context",
|
|
351
|
+
"arguments": {
|
|
352
|
+
"flow_id": flow.id,
|
|
353
|
+
"question": flow.name,
|
|
354
|
+
**({"scope": primary_scope} if primary_scope else {}),
|
|
355
|
+
},
|
|
356
|
+
},
|
|
357
|
+
"snapshot_slice": {
|
|
358
|
+
"tool": "snapshot_slice",
|
|
359
|
+
"arguments": {
|
|
360
|
+
"flow_ids": [flow.id],
|
|
361
|
+
"format": "svg",
|
|
362
|
+
"include_svg": False,
|
|
363
|
+
},
|
|
364
|
+
},
|
|
365
|
+
"expand_slice": {
|
|
366
|
+
"tool": "expand_slice",
|
|
367
|
+
"arguments": {"flow_ids": [flow.id], "direction": "neighbors"},
|
|
368
|
+
},
|
|
369
|
+
},
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def _resolve_flow_target(
|
|
374
|
+
model: ProjectModel, target: str
|
|
375
|
+
) -> tuple[Flow | None, dict[str, Any] | None]:
|
|
376
|
+
symbol_match: Flow | None = None
|
|
377
|
+
name_matches: list[Flow] = []
|
|
378
|
+
for flow in model.flows:
|
|
379
|
+
if flow.id == target:
|
|
380
|
+
return flow, None
|
|
381
|
+
if symbol_match is None and flow.symbol == target:
|
|
382
|
+
symbol_match = flow
|
|
383
|
+
if flow.name == target:
|
|
384
|
+
name_matches.append(flow)
|
|
385
|
+
if symbol_match is not None:
|
|
386
|
+
return symbol_match, None
|
|
387
|
+
if len(name_matches) == 1:
|
|
388
|
+
return name_matches[0], None
|
|
389
|
+
if len(name_matches) > 1:
|
|
390
|
+
return None, _flow_target_error(
|
|
391
|
+
f"ambiguous flow target: {target}",
|
|
392
|
+
"flow_target_ambiguous",
|
|
393
|
+
target,
|
|
394
|
+
matches=[flow_summary(flow) for flow in name_matches],
|
|
395
|
+
)
|
|
396
|
+
return None, _flow_target_error(f"flow not found: {target}", "flow_not_found", target)
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
def _flow_target_error(
|
|
400
|
+
message: str,
|
|
401
|
+
error_code: str,
|
|
402
|
+
target: str,
|
|
403
|
+
*,
|
|
404
|
+
matches: list[dict[str, Any]] | None = None,
|
|
405
|
+
) -> dict[str, Any]:
|
|
406
|
+
payload: dict[str, Any] = {
|
|
407
|
+
"error": message,
|
|
408
|
+
"error_code": error_code,
|
|
409
|
+
"target": target,
|
|
410
|
+
"recoverable": True,
|
|
411
|
+
"guardrail": (
|
|
412
|
+
"This reports an invalid flow-navigation target from the generated model; "
|
|
413
|
+
"re-run agent_context with a narrower question to locate a modeled flow."
|
|
414
|
+
),
|
|
415
|
+
"next_tools": {
|
|
416
|
+
"agent_context": {
|
|
417
|
+
"tool": "agent_context",
|
|
418
|
+
"arguments": {"question": target, "token_budget": 600},
|
|
419
|
+
},
|
|
420
|
+
},
|
|
421
|
+
}
|
|
422
|
+
if matches is not None:
|
|
423
|
+
payload["matches"] = matches
|
|
424
|
+
return payload
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
def _navigation_cap(items: list[dict[str, Any]], token_budget: int) -> list[dict[str, Any]]:
|
|
428
|
+
if token_budget <= 0:
|
|
429
|
+
return items
|
|
430
|
+
return items[: max(1, token_budget // NAVIGATION_TOKENS_PER_ITEM)]
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
def flow_summary(flow: Flow) -> dict[str, Any]:
|
|
434
|
+
return {
|
|
435
|
+
"id": flow.id,
|
|
436
|
+
"name": flow.name,
|
|
437
|
+
"source": f"{flow.location.path}:{flow.location.start_line}",
|
|
438
|
+
"entry_kind": flow.entry_kind,
|
|
439
|
+
"language": flow.language,
|
|
440
|
+
"scope": metadata_scope_names(flow.metadata),
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
def _related_flow_summaries(flow_ids: list[str], by_id: dict[str, Flow]) -> list[dict[str, Any]]:
|
|
445
|
+
return sorted(
|
|
446
|
+
[flow_summary(by_id[flow_id]) for flow_id in flow_ids if flow_id in by_id],
|
|
447
|
+
key=lambda item: (item["name"], item["id"]),
|
|
448
|
+
)
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
def _decision_navigation(node: FlowNode) -> dict[str, Any]:
|
|
452
|
+
return {
|
|
453
|
+
"node_id": node.id,
|
|
454
|
+
"label": node.label,
|
|
455
|
+
"source": f"{node.location.path}:{node.location.start_line}",
|
|
456
|
+
"condition": node.metadata.get("condition"),
|
|
457
|
+
"domain": node.metadata.get("domain"),
|
|
458
|
+
"subject": node.metadata.get("subject"),
|
|
459
|
+
"operator": node.metadata.get("operator"),
|
|
460
|
+
"values": node.metadata.get("values", []),
|
|
461
|
+
"branches": node.metadata.get("branches", []),
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
|
|
465
|
+
def flow_in_scope(flow: Flow, scope: str | None) -> bool:
|
|
466
|
+
"""Whether a flow belongs to the requested macro-part (None = no filter)."""
|
|
467
|
+
return scope is None or scope in metadata_scope_names(flow.metadata)
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
def _structured_query_filter_reasons(
|
|
471
|
+
flow: Flow,
|
|
472
|
+
*,
|
|
473
|
+
source_path: str | None,
|
|
474
|
+
symbol: str | None,
|
|
475
|
+
domain: str | None,
|
|
476
|
+
value: str | None,
|
|
477
|
+
) -> list[str] | None:
|
|
478
|
+
reasons: list[str] = []
|
|
479
|
+
if source_path is not None:
|
|
480
|
+
needle = _normalize_path(source_path)
|
|
481
|
+
haystack = _normalize_path(flow.location.path)
|
|
482
|
+
if needle not in haystack:
|
|
483
|
+
return None
|
|
484
|
+
reasons.append(f"source path matches `{needle}`")
|
|
485
|
+
if symbol is not None:
|
|
486
|
+
if symbol not in {flow.symbol, flow.name, flow.id}:
|
|
487
|
+
return None
|
|
488
|
+
reasons.append(f"symbol/name matches `{symbol}`")
|
|
489
|
+
if domain is not None or value is not None:
|
|
490
|
+
decision = _flow_has_decision_filter(flow, domain=domain, value=value)
|
|
491
|
+
if decision is None:
|
|
492
|
+
return None
|
|
493
|
+
if domain is not None:
|
|
494
|
+
reasons.append(f"decision domain matches `{domain}`")
|
|
495
|
+
if value is not None:
|
|
496
|
+
reasons.append(f"decision value matches `{value}`")
|
|
497
|
+
return reasons
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
def _flow_has_decision_filter(
|
|
501
|
+
flow: Flow, *, domain: str | None, value: str | None
|
|
502
|
+
) -> FlowNode | None:
|
|
503
|
+
for node in flow.nodes:
|
|
504
|
+
if node.kind is not NodeKind.DECISION:
|
|
505
|
+
continue
|
|
506
|
+
domains = {
|
|
507
|
+
str(node.metadata.get("domain", "")),
|
|
508
|
+
str(node.metadata.get("value_namespace", "")),
|
|
509
|
+
}
|
|
510
|
+
if domain is not None and domain not in domains:
|
|
511
|
+
continue
|
|
512
|
+
if value is not None and value not in _decision_values(node):
|
|
513
|
+
continue
|
|
514
|
+
return node
|
|
515
|
+
return None
|
|
516
|
+
|
|
517
|
+
|
|
518
|
+
def _decision_values(node: FlowNode) -> set[str]:
|
|
519
|
+
values = {str(item) for item in node.metadata.get("values", [])}
|
|
520
|
+
for branch in node.metadata.get("branches", []):
|
|
521
|
+
if isinstance(branch, dict):
|
|
522
|
+
values.add(str(branch.get("label", "")))
|
|
523
|
+
return values
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
def _enum_text(value: Any) -> str:
|
|
527
|
+
return value.value if hasattr(value, "value") else str(value)
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
def git_changed_files(root: Path) -> list[str]:
|
|
531
|
+
import subprocess
|
|
532
|
+
|
|
533
|
+
commands = [
|
|
534
|
+
["git", "diff", "--name-only", "HEAD"],
|
|
535
|
+
["git", "ls-files", "--others", "--exclude-standard"],
|
|
536
|
+
]
|
|
537
|
+
files: set[str] = set()
|
|
538
|
+
for command in commands:
|
|
539
|
+
result = subprocess.run(
|
|
540
|
+
command,
|
|
541
|
+
cwd=root,
|
|
542
|
+
check=False,
|
|
543
|
+
capture_output=True,
|
|
544
|
+
text=True,
|
|
545
|
+
)
|
|
546
|
+
if result.returncode == 0:
|
|
547
|
+
files.update(line.strip() for line in result.stdout.splitlines() if line.strip())
|
|
548
|
+
return sorted(files)
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
def _unique(values: Iterable[str]) -> list[str]:
|
|
552
|
+
return list(dict.fromkeys(item for item in values if item))
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
def _terms(question: str) -> list[str]:
|
|
556
|
+
stopwords = {
|
|
557
|
+
"a",
|
|
558
|
+
"an",
|
|
559
|
+
"and",
|
|
560
|
+
"are",
|
|
561
|
+
"does",
|
|
562
|
+
"flow",
|
|
563
|
+
"for",
|
|
564
|
+
"from",
|
|
565
|
+
"how",
|
|
566
|
+
"in",
|
|
567
|
+
"is",
|
|
568
|
+
"of",
|
|
569
|
+
"the",
|
|
570
|
+
"to",
|
|
571
|
+
"what",
|
|
572
|
+
"where",
|
|
573
|
+
"which",
|
|
574
|
+
}
|
|
575
|
+
return [token for token in _word_tokens(question) if len(token) > 1 and token not in stopwords]
|
|
576
|
+
|
|
577
|
+
|
|
578
|
+
def _tokenize(text: str) -> set[str]:
|
|
579
|
+
"""The field-side tokenizer that mirrors ``_terms`` (unicode \\w words, lowercased),
|
|
580
|
+
so query terms are matched against whole tokens rather than substrings."""
|
|
581
|
+
tokens: set[str] = set()
|
|
582
|
+
for token in _word_tokens(text):
|
|
583
|
+
tokens.update(_term_variants(token))
|
|
584
|
+
return tokens
|
|
585
|
+
|
|
586
|
+
|
|
587
|
+
def _word_tokens(text: str) -> list[str]:
|
|
588
|
+
# Split common code identifiers before lowercasing so a human query for "upload"
|
|
589
|
+
# can match names such as UnifiedUploadBox or ocrService. \w is unicode-aware in
|
|
590
|
+
# py3, so "café" / "日本語" still survive tokenization.
|
|
591
|
+
spaced = re.sub(r"(?<=[a-z0-9])(?=[A-Z])", " ", text)
|
|
592
|
+
tokens: list[str] = []
|
|
593
|
+
for token in re.findall(r"\w+", spaced.lower()):
|
|
594
|
+
tokens.extend(part for part in token.split("_") if part)
|
|
595
|
+
return tokens
|
|
596
|
+
|
|
597
|
+
|
|
598
|
+
def _term_matches(term: str, tokens: set[str]) -> bool:
|
|
599
|
+
return any(variant in tokens for variant in _term_variants(term))
|
|
600
|
+
|
|
601
|
+
|
|
602
|
+
def _term_variants(token: str) -> set[str]:
|
|
603
|
+
variants = {token}
|
|
604
|
+
if len(token) > 5 and token.endswith("ies"):
|
|
605
|
+
variants.add(f"{token[:-3]}y")
|
|
606
|
+
if len(token) > 4 and token.endswith("es"):
|
|
607
|
+
variants.add(token[:-2])
|
|
608
|
+
if len(token) > 3 and token.endswith("s"):
|
|
609
|
+
variants.add(token[:-1])
|
|
610
|
+
if len(token) >= 6 and token[-1] in {"a", "e", "i", "o"}:
|
|
611
|
+
variants.add(token[:-1])
|
|
612
|
+
return {variant for variant in variants if len(variant) > 1}
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
def _flow_metadata_tokens(flow: Flow) -> set[str]:
|
|
616
|
+
values: list[str] = []
|
|
617
|
+
for node in flow.nodes:
|
|
618
|
+
values.extend(
|
|
619
|
+
str(node.metadata.get(key, ""))
|
|
620
|
+
for key in ("domain", "subject", "value_namespace", "operator")
|
|
621
|
+
)
|
|
622
|
+
values.extend(str(item) for item in node.metadata.get("values", []))
|
|
623
|
+
values.extend(str(item) for item in node.metadata.get("effects", []))
|
|
624
|
+
for branch in node.metadata.get("branches", []):
|
|
625
|
+
if isinstance(branch, dict):
|
|
626
|
+
values.extend(str(branch.get(key, "")) for key in ("label", "outcome"))
|
|
627
|
+
return _tokenize(" ".join(values))
|
|
628
|
+
|
|
629
|
+
|
|
630
|
+
def _normalize_path(value: str) -> str:
|
|
631
|
+
# Strip only a leading "./" prefix - not the {'.', '/'} char set, which would
|
|
632
|
+
# corrupt dot-prefixed paths like ".github/workflows/ci.yml".
|
|
633
|
+
value = value.replace("\\", "/")
|
|
634
|
+
return value[2:] if value.startswith("./") else value
|
|
635
|
+
|
|
636
|
+
|
|
637
|
+
def _path_matches_dependency(source_path: str, dependency_path: str) -> bool:
|
|
638
|
+
dependency = dependency_path.rstrip("/")
|
|
639
|
+
if not dependency:
|
|
640
|
+
return False
|
|
641
|
+
return source_path == dependency or source_path.startswith(f"{dependency}/")
|