codedebrief 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. codedebrief/__init__.py +12 -0
  2. codedebrief/analysis/__init__.py +16 -0
  3. codedebrief/analysis/common.py +527 -0
  4. codedebrief/analysis/discovery.py +100 -0
  5. codedebrief/analysis/languages/__init__.py +6 -0
  6. codedebrief/analysis/languages/_common.py +68 -0
  7. codedebrief/analysis/languages/c.py +96 -0
  8. codedebrief/analysis/languages/cpp.py +146 -0
  9. codedebrief/analysis/languages/csharp.py +137 -0
  10. codedebrief/analysis/languages/go.py +157 -0
  11. codedebrief/analysis/languages/java.py +158 -0
  12. codedebrief/analysis/languages/php.py +83 -0
  13. codedebrief/analysis/languages/ruby.py +75 -0
  14. codedebrief/analysis/languages/rust.py +96 -0
  15. codedebrief/analysis/project.py +373 -0
  16. codedebrief/analysis/python.py +939 -0
  17. codedebrief/analysis/registry.py +320 -0
  18. codedebrief/analysis/treesitter.py +884 -0
  19. codedebrief/analysis/typescript.py +1019 -0
  20. codedebrief/artifacts.py +49 -0
  21. codedebrief/cli.py +585 -0
  22. codedebrief/config.py +226 -0
  23. codedebrief/doctor.py +175 -0
  24. codedebrief/install.py +441 -0
  25. codedebrief/mcp_server.py +2720 -0
  26. codedebrief/model.py +189 -0
  27. codedebrief/py.typed +1 -0
  28. codedebrief/quality.py +392 -0
  29. codedebrief/query.py +641 -0
  30. codedebrief/render/__init__.py +6 -0
  31. codedebrief/render/assets/generated/codedebrief-viewer-runtime.iife.js +10 -0
  32. codedebrief/render/assets/panels.js +462 -0
  33. codedebrief/render/assets/shell.js +1649 -0
  34. codedebrief/render/assets/styles.css +1715 -0
  35. codedebrief/render/assets/tree.js +616 -0
  36. codedebrief/render/html.py +191 -0
  37. codedebrief/render/markdown.py +153 -0
  38. codedebrief/render/payload.py +326 -0
  39. codedebrief/render/snapshot.py +769 -0
  40. codedebrief/schema/codedebrief.schema.json +449 -0
  41. codedebrief/util.py +65 -0
  42. codedebrief/validation.py +214 -0
  43. codedebrief-0.11.0.dist-info/METADATA +426 -0
  44. codedebrief-0.11.0.dist-info/RECORD +48 -0
  45. codedebrief-0.11.0.dist-info/WHEEL +4 -0
  46. codedebrief-0.11.0.dist-info/entry_points.txt +2 -0
  47. codedebrief-0.11.0.dist-info/licenses/LICENSE +176 -0
  48. codedebrief-0.11.0.dist-info/licenses/NOTICE +9 -0
codedebrief/query.py ADDED
@@ -0,0 +1,641 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from collections import deque
5
+ from collections.abc import Iterable
6
+ from dataclasses import dataclass, field
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ from codedebrief.model import Flow, FlowNode, NodeKind, ProjectModel
11
+ from codedebrief.util import metadata_scope_names
12
+
13
+ # Per-bucket relevance weights. Named constants instead of inline magic numbers so the
14
+ # ranking model is auditable and the tests can assert exact scores.
15
+ IDENTITY_WEIGHT = 6
16
+ NODE_WEIGHT = 3
17
+ STRUCTURE_WEIGHT = 5
18
+ METADATA_WEIGHT = 2
19
+ # Tie-breaker only: nudges an entrypoint above an otherwise-equal non-entrypoint. Added
20
+ # only when the term-overlap score is already > 0, so it never manufactures a match.
21
+ ENTRYPOINT_BONUS = 1
22
+ NAVIGATION_TOKENS_PER_ITEM = 60
23
+
24
+
25
+ @dataclass(slots=True)
26
+ class QueryMatch:
27
+ flow: Flow
28
+ score: int
29
+ reasons: list[str]
30
+
31
+ def to_dict(self, include_source: bool = True) -> dict[str, Any]:
32
+ """The single serialization used by MCP query/context tools."""
33
+ payload: dict[str, Any] = {
34
+ "flow_id": self.flow.id,
35
+ "name": self.flow.name,
36
+ "language": self.flow.language,
37
+ "entry_kind": self.flow.entry_kind,
38
+ "framework": self.flow.framework,
39
+ "scope": metadata_scope_names(self.flow.metadata),
40
+ "score": self.score,
41
+ "reasons": self.reasons,
42
+ "subgraph_flow_ids": [self.flow.id],
43
+ "next_tools": {
44
+ "agent_context": {
45
+ "tool": "agent_context",
46
+ "arguments": {"flow_id": self.flow.id},
47
+ },
48
+ "snapshot_slice": {
49
+ "tool": "snapshot_slice",
50
+ "arguments": {
51
+ "flow_ids": [self.flow.id],
52
+ "format": "svg",
53
+ "include_svg": False,
54
+ },
55
+ },
56
+ "expand_slice": {
57
+ "tool": "expand_slice",
58
+ "arguments": {"flow_ids": [self.flow.id], "direction": "neighbors"},
59
+ },
60
+ },
61
+ }
62
+ if include_source:
63
+ payload["source"] = f"{self.flow.location.path}:{self.flow.location.start_line}"
64
+ return payload
65
+
66
+
67
+ @dataclass(slots=True)
68
+ class ImpactResult:
69
+ changed_files: list[str]
70
+ directly_impacted: list[Flow]
71
+ transitively_impacted: list[Flow]
72
+ impact_reasons: dict[str, list[str]] = field(default_factory=dict)
73
+ target_flow_ids: list[str] = field(default_factory=list)
74
+ target_symbols: list[str] = field(default_factory=list)
75
+ target_dependency_paths: list[str] = field(default_factory=list)
76
+ unresolved_targets: list[dict[str, str]] = field(default_factory=list)
77
+
78
+ @property
79
+ def all_flows(self) -> list[Flow]:
80
+ seen: dict[str, Flow] = {}
81
+ for flow in self.directly_impacted + self.transitively_impacted:
82
+ seen[flow.id] = flow
83
+ return list(seen.values())
84
+
85
+ @property
86
+ def subgraph_flow_ids(self) -> list[str]:
87
+ return [flow.id for flow in self.all_flows]
88
+
89
+
90
+ def query_model(
91
+ model: ProjectModel,
92
+ question: str,
93
+ limit: int = 10,
94
+ scope: str | None = None,
95
+ language: str | None = None,
96
+ source_path: str | None = None,
97
+ symbol: str | None = None,
98
+ domain: str | None = None,
99
+ value: str | None = None,
100
+ ) -> list[QueryMatch]:
101
+ terms = _terms(question)
102
+ has_structured_filter = any(
103
+ item is not None
104
+ for item in (
105
+ scope,
106
+ language,
107
+ source_path,
108
+ symbol,
109
+ domain,
110
+ value,
111
+ )
112
+ )
113
+ if not terms and not has_structured_filter:
114
+ # A blank or punctuation-only question has nothing to rank against. Returning []
115
+ # (rather than every entrypoint) makes the CLI print "No matching logic flows
116
+ # found." instead of garbage filler.
117
+ return []
118
+ # Dedup query terms before scoring so repeating a word ("user user user") cannot
119
+ # inflate a flow's rank. dict.fromkeys preserves order for stable reason text.
120
+ unique_terms = list(dict.fromkeys(terms))
121
+
122
+ matches: list[QueryMatch] = []
123
+ for flow in model.flows:
124
+ if not flow_in_scope(flow, scope):
125
+ continue
126
+ if language is not None and flow.language != language:
127
+ continue
128
+ filter_reasons = _structured_query_filter_reasons(
129
+ flow,
130
+ source_path=source_path,
131
+ symbol=symbol,
132
+ domain=domain,
133
+ value=value,
134
+ )
135
+ if filter_reasons is None:
136
+ continue
137
+ # Match on tokens, not substrings: "order" must not match inside "reordering".
138
+ name_tokens = _tokenize(f"{flow.name} {flow.symbol}")
139
+ node_tokens = _tokenize(" ".join(node.label for node in flow.nodes))
140
+ structure_tokens = _tokenize(
141
+ " ".join(
142
+ [
143
+ flow.location.path,
144
+ flow.language,
145
+ " ".join(metadata_scope_names(flow.metadata)),
146
+ ]
147
+ )
148
+ )
149
+ metadata_tokens = _flow_metadata_tokens(flow)
150
+ score = 0
151
+ reasons: list[str] = []
152
+ for term in unique_terms:
153
+ if _term_matches(term, name_tokens):
154
+ score += IDENTITY_WEIGHT
155
+ reasons.append(f"`{term}` matches the flow identity")
156
+ if _term_matches(term, node_tokens):
157
+ score += NODE_WEIGHT
158
+ reasons.append(f"`{term}` appears in a decision or action")
159
+ if _term_matches(term, structure_tokens):
160
+ score += STRUCTURE_WEIGHT
161
+ reasons.append(f"`{term}` matches flow structure")
162
+ if _term_matches(term, metadata_tokens):
163
+ score += METADATA_WEIGHT
164
+ reasons.append(f"`{term}` appears in decision metadata")
165
+ if filter_reasons:
166
+ score += STRUCTURE_WEIGHT * len(filter_reasons)
167
+ reasons.extend(filter_reasons)
168
+ # The entrypoint bonus is a tie-breaker among real matches, never a match on its
169
+ # own: only add it once the term-overlap score is already positive.
170
+ if score:
171
+ if flow.is_entrypoint:
172
+ score += ENTRYPOINT_BONUS
173
+ matches.append(QueryMatch(flow, score, list(dict.fromkeys(reasons))))
174
+ # Deterministic order: score desc, then name, then unique id, so equal score+name is
175
+ # stable regardless of flow insertion order.
176
+ matches.sort(key=lambda item: (-item.score, item.flow.name, item.flow.id))
177
+ if limit and limit > 0:
178
+ matches = matches[:limit]
179
+ return matches
180
+
181
+
182
+ def impact_model(
183
+ model: ProjectModel,
184
+ changed_files: list[str],
185
+ scope: str | None = None,
186
+ *,
187
+ flow_ids: list[str] | None = None,
188
+ symbols: list[str] | None = None,
189
+ dependency_paths: list[str] | None = None,
190
+ ) -> ImpactResult:
191
+ normalized = {_normalize_path(item) for item in changed_files}
192
+ flows = [flow for flow in model.flows if flow_in_scope(flow, scope)]
193
+ direct = [flow for flow in flows if _normalize_path(flow.location.path) in normalized]
194
+ by_id = {flow.id: flow for flow in model.flows}
195
+ scoped_ids = {flow.id for flow in flows}
196
+ target_flow_ids = _unique(flow_ids or [])
197
+ target_symbols = _unique(symbols or [])
198
+ target_dependency_paths = _unique(_normalize_path(item) for item in dependency_paths or [])
199
+ unresolved_targets: list[dict[str, str]] = []
200
+ direct_by_id: dict[str, Flow] = {}
201
+ impact_reasons: dict[str, list[str]] = {}
202
+
203
+ def add_reason(flow: Flow, reason: str) -> None:
204
+ reasons = impact_reasons.setdefault(flow.id, [])
205
+ if reason not in reasons:
206
+ reasons.append(reason)
207
+
208
+ for flow in direct:
209
+ direct_by_id[flow.id] = flow
210
+ add_reason(flow, f"source file changed `{_normalize_path(flow.location.path)}`")
211
+
212
+ for file_record in model.files:
213
+ dependency_matches = sorted(
214
+ dependency
215
+ for dependency in {_normalize_path(item) for item in file_record.dependencies}
216
+ if dependency in normalized
217
+ )
218
+ if not dependency_matches:
219
+ continue
220
+ for flow_id in file_record.flow_ids:
221
+ dependent_flow = by_id.get(flow_id)
222
+ if dependent_flow is None or dependent_flow.id not in scoped_ids:
223
+ continue
224
+ direct_by_id[dependent_flow.id] = dependent_flow
225
+ for dependency in dependency_matches:
226
+ add_reason(dependent_flow, f"depends on changed file `{dependency}`")
227
+
228
+ def add_flow(flow: Flow, target_type: str, value: str, reason: str) -> None:
229
+ if flow.id not in scoped_ids:
230
+ unresolved_targets.append(
231
+ {"type": target_type, "value": value, "reason": "scope_filtered"}
232
+ )
233
+ return
234
+ direct_by_id[flow.id] = flow
235
+ add_reason(flow, reason)
236
+
237
+ for flow_id in target_flow_ids:
238
+ target_flow = by_id.get(flow_id)
239
+ if target_flow is None:
240
+ unresolved_targets.append({"type": "flow", "value": flow_id, "reason": "not_found"})
241
+ continue
242
+ add_flow(target_flow, "flow", flow_id, f"explicit flow target `{flow_id}`")
243
+
244
+ for symbol in target_symbols:
245
+ matches = [flow for flow in model.flows if symbol in (flow.symbol, flow.name)]
246
+ if not matches:
247
+ unresolved_targets.append({"type": "symbol", "value": symbol, "reason": "not_found"})
248
+ continue
249
+ for flow in matches:
250
+ add_flow(flow, "symbol", symbol, f"explicit symbol/name target `{symbol}`")
251
+
252
+ for dependency_path in target_dependency_paths:
253
+ matches = [
254
+ flow
255
+ for flow in model.flows
256
+ if _path_matches_dependency(_normalize_path(flow.location.path), dependency_path)
257
+ ]
258
+ if not matches:
259
+ unresolved_targets.append(
260
+ {"type": "dependency_path", "value": dependency_path, "reason": "not_found"}
261
+ )
262
+ continue
263
+ scoped_matches = [flow for flow in matches if flow.id in scoped_ids]
264
+ if not scoped_matches:
265
+ unresolved_targets.append(
266
+ {
267
+ "type": "dependency_path",
268
+ "value": dependency_path,
269
+ "reason": "scope_filtered",
270
+ }
271
+ )
272
+ continue
273
+ for flow in scoped_matches:
274
+ direct_by_id[flow.id] = flow
275
+ add_reason(flow, f"dependency path target `{dependency_path}`")
276
+
277
+ direct = list(direct_by_id.values())
278
+ impacted_ids = set(direct_by_id)
279
+ queue: deque[str] = deque(impacted_ids)
280
+ transitive: list[Flow] = []
281
+ while queue:
282
+ current = by_id.get(queue.popleft())
283
+ if current is None:
284
+ continue
285
+ for caller_id in current.called_by:
286
+ if caller_id in impacted_ids:
287
+ continue
288
+ impacted_ids.add(caller_id)
289
+ queue.append(caller_id)
290
+ caller = by_id.get(caller_id)
291
+ if caller:
292
+ transitive.append(caller)
293
+ add_reason(caller, f"calls impacted flow `{current.name}`")
294
+
295
+ transitive = [flow for flow in transitive if flow_in_scope(flow, scope)]
296
+ impacted_ids = {flow.id for flow in direct} | {flow.id for flow in transitive}
297
+ scoped_impact_reasons = {
298
+ flow_id: reasons
299
+ for flow_id, reasons in sorted(impact_reasons.items())
300
+ if flow_id in impacted_ids
301
+ }
302
+ return ImpactResult(
303
+ changed_files=sorted(normalized),
304
+ directly_impacted=sorted(direct, key=lambda item: item.name),
305
+ transitively_impacted=sorted(transitive, key=lambda item: item.name),
306
+ impact_reasons=scoped_impact_reasons,
307
+ target_flow_ids=target_flow_ids,
308
+ target_symbols=target_symbols,
309
+ target_dependency_paths=target_dependency_paths,
310
+ unresolved_targets=unresolved_targets,
311
+ )
312
+
313
+
314
+ def flow_navigation(
315
+ model: ProjectModel,
316
+ target: str,
317
+ token_budget: int = 0,
318
+ ) -> dict[str, Any]:
319
+ """A bounded navigation pack for one flow, shared by CLI and MCP."""
320
+ flow, error = _resolve_flow_target(model, target)
321
+ if error is not None:
322
+ return error
323
+ assert flow is not None
324
+ by_id = {item.id: item for item in model.flows}
325
+ scope = metadata_scope_names(flow.metadata)
326
+ primary_scope = scope[0] if scope else None
327
+ return {
328
+ "flow": {
329
+ **flow_summary(flow),
330
+ "symbol": flow.symbol,
331
+ "is_entrypoint": flow.is_entrypoint,
332
+ "nodes": len(flow.nodes),
333
+ "edges": len(flow.edges),
334
+ "decisions": sum(node.kind is NodeKind.DECISION for node in flow.nodes),
335
+ "calls": len(flow.calls),
336
+ "callers": len(flow.called_by),
337
+ "tests": flow.tests,
338
+ },
339
+ "called_flows": _navigation_cap(_related_flow_summaries(flow.calls, by_id), token_budget),
340
+ "caller_flows": _navigation_cap(
341
+ _related_flow_summaries(flow.called_by, by_id), token_budget
342
+ ),
343
+ "unresolved_call_ids": [target_id for target_id in flow.calls if target_id not in by_id],
344
+ "decision_nodes": _navigation_cap(
345
+ [_decision_navigation(node) for node in flow.nodes if node.kind is NodeKind.DECISION],
346
+ token_budget,
347
+ ),
348
+ "next_tools": {
349
+ "agent_context": {
350
+ "tool": "agent_context",
351
+ "arguments": {
352
+ "flow_id": flow.id,
353
+ "question": flow.name,
354
+ **({"scope": primary_scope} if primary_scope else {}),
355
+ },
356
+ },
357
+ "snapshot_slice": {
358
+ "tool": "snapshot_slice",
359
+ "arguments": {
360
+ "flow_ids": [flow.id],
361
+ "format": "svg",
362
+ "include_svg": False,
363
+ },
364
+ },
365
+ "expand_slice": {
366
+ "tool": "expand_slice",
367
+ "arguments": {"flow_ids": [flow.id], "direction": "neighbors"},
368
+ },
369
+ },
370
+ }
371
+
372
+
373
+ def _resolve_flow_target(
374
+ model: ProjectModel, target: str
375
+ ) -> tuple[Flow | None, dict[str, Any] | None]:
376
+ symbol_match: Flow | None = None
377
+ name_matches: list[Flow] = []
378
+ for flow in model.flows:
379
+ if flow.id == target:
380
+ return flow, None
381
+ if symbol_match is None and flow.symbol == target:
382
+ symbol_match = flow
383
+ if flow.name == target:
384
+ name_matches.append(flow)
385
+ if symbol_match is not None:
386
+ return symbol_match, None
387
+ if len(name_matches) == 1:
388
+ return name_matches[0], None
389
+ if len(name_matches) > 1:
390
+ return None, _flow_target_error(
391
+ f"ambiguous flow target: {target}",
392
+ "flow_target_ambiguous",
393
+ target,
394
+ matches=[flow_summary(flow) for flow in name_matches],
395
+ )
396
+ return None, _flow_target_error(f"flow not found: {target}", "flow_not_found", target)
397
+
398
+
399
+ def _flow_target_error(
400
+ message: str,
401
+ error_code: str,
402
+ target: str,
403
+ *,
404
+ matches: list[dict[str, Any]] | None = None,
405
+ ) -> dict[str, Any]:
406
+ payload: dict[str, Any] = {
407
+ "error": message,
408
+ "error_code": error_code,
409
+ "target": target,
410
+ "recoverable": True,
411
+ "guardrail": (
412
+ "This reports an invalid flow-navigation target from the generated model; "
413
+ "re-run agent_context with a narrower question to locate a modeled flow."
414
+ ),
415
+ "next_tools": {
416
+ "agent_context": {
417
+ "tool": "agent_context",
418
+ "arguments": {"question": target, "token_budget": 600},
419
+ },
420
+ },
421
+ }
422
+ if matches is not None:
423
+ payload["matches"] = matches
424
+ return payload
425
+
426
+
427
+ def _navigation_cap(items: list[dict[str, Any]], token_budget: int) -> list[dict[str, Any]]:
428
+ if token_budget <= 0:
429
+ return items
430
+ return items[: max(1, token_budget // NAVIGATION_TOKENS_PER_ITEM)]
431
+
432
+
433
+ def flow_summary(flow: Flow) -> dict[str, Any]:
434
+ return {
435
+ "id": flow.id,
436
+ "name": flow.name,
437
+ "source": f"{flow.location.path}:{flow.location.start_line}",
438
+ "entry_kind": flow.entry_kind,
439
+ "language": flow.language,
440
+ "scope": metadata_scope_names(flow.metadata),
441
+ }
442
+
443
+
444
+ def _related_flow_summaries(flow_ids: list[str], by_id: dict[str, Flow]) -> list[dict[str, Any]]:
445
+ return sorted(
446
+ [flow_summary(by_id[flow_id]) for flow_id in flow_ids if flow_id in by_id],
447
+ key=lambda item: (item["name"], item["id"]),
448
+ )
449
+
450
+
451
+ def _decision_navigation(node: FlowNode) -> dict[str, Any]:
452
+ return {
453
+ "node_id": node.id,
454
+ "label": node.label,
455
+ "source": f"{node.location.path}:{node.location.start_line}",
456
+ "condition": node.metadata.get("condition"),
457
+ "domain": node.metadata.get("domain"),
458
+ "subject": node.metadata.get("subject"),
459
+ "operator": node.metadata.get("operator"),
460
+ "values": node.metadata.get("values", []),
461
+ "branches": node.metadata.get("branches", []),
462
+ }
463
+
464
+
465
+ def flow_in_scope(flow: Flow, scope: str | None) -> bool:
466
+ """Whether a flow belongs to the requested macro-part (None = no filter)."""
467
+ return scope is None or scope in metadata_scope_names(flow.metadata)
468
+
469
+
470
+ def _structured_query_filter_reasons(
471
+ flow: Flow,
472
+ *,
473
+ source_path: str | None,
474
+ symbol: str | None,
475
+ domain: str | None,
476
+ value: str | None,
477
+ ) -> list[str] | None:
478
+ reasons: list[str] = []
479
+ if source_path is not None:
480
+ needle = _normalize_path(source_path)
481
+ haystack = _normalize_path(flow.location.path)
482
+ if needle not in haystack:
483
+ return None
484
+ reasons.append(f"source path matches `{needle}`")
485
+ if symbol is not None:
486
+ if symbol not in {flow.symbol, flow.name, flow.id}:
487
+ return None
488
+ reasons.append(f"symbol/name matches `{symbol}`")
489
+ if domain is not None or value is not None:
490
+ decision = _flow_has_decision_filter(flow, domain=domain, value=value)
491
+ if decision is None:
492
+ return None
493
+ if domain is not None:
494
+ reasons.append(f"decision domain matches `{domain}`")
495
+ if value is not None:
496
+ reasons.append(f"decision value matches `{value}`")
497
+ return reasons
498
+
499
+
500
+ def _flow_has_decision_filter(
501
+ flow: Flow, *, domain: str | None, value: str | None
502
+ ) -> FlowNode | None:
503
+ for node in flow.nodes:
504
+ if node.kind is not NodeKind.DECISION:
505
+ continue
506
+ domains = {
507
+ str(node.metadata.get("domain", "")),
508
+ str(node.metadata.get("value_namespace", "")),
509
+ }
510
+ if domain is not None and domain not in domains:
511
+ continue
512
+ if value is not None and value not in _decision_values(node):
513
+ continue
514
+ return node
515
+ return None
516
+
517
+
518
+ def _decision_values(node: FlowNode) -> set[str]:
519
+ values = {str(item) for item in node.metadata.get("values", [])}
520
+ for branch in node.metadata.get("branches", []):
521
+ if isinstance(branch, dict):
522
+ values.add(str(branch.get("label", "")))
523
+ return values
524
+
525
+
526
+ def _enum_text(value: Any) -> str:
527
+ return value.value if hasattr(value, "value") else str(value)
528
+
529
+
530
+ def git_changed_files(root: Path) -> list[str]:
531
+ import subprocess
532
+
533
+ commands = [
534
+ ["git", "diff", "--name-only", "HEAD"],
535
+ ["git", "ls-files", "--others", "--exclude-standard"],
536
+ ]
537
+ files: set[str] = set()
538
+ for command in commands:
539
+ result = subprocess.run(
540
+ command,
541
+ cwd=root,
542
+ check=False,
543
+ capture_output=True,
544
+ text=True,
545
+ )
546
+ if result.returncode == 0:
547
+ files.update(line.strip() for line in result.stdout.splitlines() if line.strip())
548
+ return sorted(files)
549
+
550
+
551
+ def _unique(values: Iterable[str]) -> list[str]:
552
+ return list(dict.fromkeys(item for item in values if item))
553
+
554
+
555
+ def _terms(question: str) -> list[str]:
556
+ stopwords = {
557
+ "a",
558
+ "an",
559
+ "and",
560
+ "are",
561
+ "does",
562
+ "flow",
563
+ "for",
564
+ "from",
565
+ "how",
566
+ "in",
567
+ "is",
568
+ "of",
569
+ "the",
570
+ "to",
571
+ "what",
572
+ "where",
573
+ "which",
574
+ }
575
+ return [token for token in _word_tokens(question) if len(token) > 1 and token not in stopwords]
576
+
577
+
578
+ def _tokenize(text: str) -> set[str]:
579
+ """The field-side tokenizer that mirrors ``_terms`` (unicode \\w words, lowercased),
580
+ so query terms are matched against whole tokens rather than substrings."""
581
+ tokens: set[str] = set()
582
+ for token in _word_tokens(text):
583
+ tokens.update(_term_variants(token))
584
+ return tokens
585
+
586
+
587
+ def _word_tokens(text: str) -> list[str]:
588
+ # Split common code identifiers before lowercasing so a human query for "upload"
589
+ # can match names such as UnifiedUploadBox or ocrService. \w is unicode-aware in
590
+ # py3, so "café" / "日本語" still survive tokenization.
591
+ spaced = re.sub(r"(?<=[a-z0-9])(?=[A-Z])", " ", text)
592
+ tokens: list[str] = []
593
+ for token in re.findall(r"\w+", spaced.lower()):
594
+ tokens.extend(part for part in token.split("_") if part)
595
+ return tokens
596
+
597
+
598
+ def _term_matches(term: str, tokens: set[str]) -> bool:
599
+ return any(variant in tokens for variant in _term_variants(term))
600
+
601
+
602
+ def _term_variants(token: str) -> set[str]:
603
+ variants = {token}
604
+ if len(token) > 5 and token.endswith("ies"):
605
+ variants.add(f"{token[:-3]}y")
606
+ if len(token) > 4 and token.endswith("es"):
607
+ variants.add(token[:-2])
608
+ if len(token) > 3 and token.endswith("s"):
609
+ variants.add(token[:-1])
610
+ if len(token) >= 6 and token[-1] in {"a", "e", "i", "o"}:
611
+ variants.add(token[:-1])
612
+ return {variant for variant in variants if len(variant) > 1}
613
+
614
+
615
+ def _flow_metadata_tokens(flow: Flow) -> set[str]:
616
+ values: list[str] = []
617
+ for node in flow.nodes:
618
+ values.extend(
619
+ str(node.metadata.get(key, ""))
620
+ for key in ("domain", "subject", "value_namespace", "operator")
621
+ )
622
+ values.extend(str(item) for item in node.metadata.get("values", []))
623
+ values.extend(str(item) for item in node.metadata.get("effects", []))
624
+ for branch in node.metadata.get("branches", []):
625
+ if isinstance(branch, dict):
626
+ values.extend(str(branch.get(key, "")) for key in ("label", "outcome"))
627
+ return _tokenize(" ".join(values))
628
+
629
+
630
+ def _normalize_path(value: str) -> str:
631
+ # Strip only a leading "./" prefix - not the {'.', '/'} char set, which would
632
+ # corrupt dot-prefixed paths like ".github/workflows/ci.yml".
633
+ value = value.replace("\\", "/")
634
+ return value[2:] if value.startswith("./") else value
635
+
636
+
637
+ def _path_matches_dependency(source_path: str, dependency_path: str) -> bool:
638
+ dependency = dependency_path.rstrip("/")
639
+ if not dependency:
640
+ return False
641
+ return source_path == dependency or source_path.startswith(f"{dependency}/")
@@ -0,0 +1,6 @@
1
+ """Render the canonical CodeDebrief model for humans."""
2
+
3
+ from codedebrief.render.html import render_html
4
+ from codedebrief.render.markdown import render_markdown
5
+
6
+ __all__ = ["render_html", "render_markdown"]