delimit-cli 4.5.13 → 4.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/CHANGELOG.md +45 -0
  2. package/README.md +9 -8
  3. package/bin/delimit-cli.js +162 -1
  4. package/bin/delimit-setup.js +46 -6
  5. package/gateway/ai/_compile_status.py +154 -0
  6. package/gateway/ai/agent_dispatch.py +36 -0
  7. package/gateway/ai/backends/tools_infra.py +150 -10
  8. package/gateway/ai/daemon.py +10 -0
  9. package/gateway/ai/daily_digest.py +1 -2
  10. package/gateway/ai/delimit_daemon.py +67 -0
  11. package/gateway/ai/dispatch_gate.py +399 -0
  12. package/gateway/ai/hot_reload.py +1 -2
  13. package/gateway/ai/led193_daemon/executor.py +9 -0
  14. package/gateway/ai/ledger_manager.py +9 -0
  15. package/gateway/ai/license_core.cpython-310-x86_64-linux-gnu.so +0 -0
  16. package/gateway/ai/notify.py +39 -0
  17. package/gateway/ai/outreach_substantive.py +676 -0
  18. package/gateway/ai/reaper.py +70 -0
  19. package/gateway/ai/reddit_scanner.py +10 -5
  20. package/gateway/ai/sensing/schema.py +1 -1
  21. package/gateway/ai/sensing/signal_store.py +0 -1
  22. package/gateway/ai/server.py +5171 -1462
  23. package/gateway/ai/social_capability/fit_floor.py +114 -12
  24. package/gateway/ai/tdqs_lint.py +611 -0
  25. package/gateway/ai/usage_allowlist.py +198 -0
  26. package/gateway/ai/workers/base.py +2 -2
  27. package/gateway/ai/workers/executor.py +32 -3
  28. package/gateway/ai/workers/outreach_drafter.py +0 -1
  29. package/gateway/ai/workers/pr_drafter.py +0 -1
  30. package/gateway/ai/x_ranker.py +12 -2
  31. package/gateway/core/json_schema_diff.py +25 -1
  32. package/lib/auth-signin.js +136 -0
  33. package/lib/auth-signout.js +169 -0
  34. package/lib/delimit-template.js +11 -0
  35. package/lib/migration-2092-banner.js +213 -0
  36. package/package.json +2 -2
  37. package/server.json +4 -4
@@ -0,0 +1,611 @@
1
+ """TDQS (Tool Definition Quality Score) linter for MCP tool docstrings.
2
+
3
+ Implements LED-2108. Glama's TDQS evaluates each MCP tool's docstring
4
+ across 6 dimensions on a 1-5 scale, then aggregates to a letter grade
5
+ A/B/C/D. This module parses a target file (default: ai/server.py),
6
+ extracts every @mcp.tool() decorated function, and scores its docstring
7
+ heuristically — no LLM calls, fully deterministic.
8
+
9
+ The 6 dimensions (per glama.ai/blog/2026-04-03-tool-definition-quality-score-tdqs):
10
+
11
+ 1. side_effects — does the description disclose what gets written /
12
+ called / notified / chained / destroyed; auth /
13
+ rate-limit notes when relevant.
14
+ 2. conciseness — appropriately sized, front-loaded with purpose,
15
+ free of redundancy.
16
+ 3. coverage — enough for an agent to succeed first try: error
17
+ handling, prerequisites, return shape.
18
+ 4. parameter_semantics — each parameter has constraint/intent beyond the
19
+ schema's bare type.
20
+ 5. disambiguation — names a sibling tool or otherwise differentiates
21
+ this tool from its neighbors.
22
+ 6. when_to_use — explicit "Use when ..." / "Don't use when ...";
23
+ alternatives named.
24
+
25
+ Each dimension score is in [1, 5]. The aggregate grade maps from the mean:
26
+
27
+ A: mean >= 4.5
28
+ B: 3.5 <= mean < 4.5
29
+ C: 2.5 <= mean < 3.5
30
+ D: mean < 2.5
31
+
32
+ This module is import-safe and has no side effects on import. Use it via
33
+ the public functions :func:`lint_file` and :func:`score_tool`.
34
+ """
35
+
36
+ from __future__ import annotations
37
+
38
+ import ast
39
+ import re
40
+ from pathlib import Path
41
+ from typing import Any, Dict, List, Optional, Tuple
42
+
43
+
44
+ # ─── Grade mapping ──────────────────────────────────────────────────────
45
+
46
+ # Inferred from Glama's letter-grade badge progression. Refine if Glama
47
+ # publishes the explicit thresholds. Boundary semantics: mean >= 4.5 is A,
48
+ # mean strictly < 2.5 is D, with B/C in between.
49
+ GRADE_THRESHOLDS = (
50
+ ("A", 4.5),
51
+ ("B", 3.5),
52
+ ("C", 2.5),
53
+ ("D", 0.0),
54
+ )
55
+
56
+
57
+ def grade_for_mean(mean: float) -> str:
58
+ """Map a mean score in [1, 5] to a letter grade A/B/C/D."""
59
+ for letter, floor in GRADE_THRESHOLDS:
60
+ if mean >= floor:
61
+ return letter
62
+ return "D"
63
+
64
+
65
+ # ─── Tool extraction ───────────────────────────────────────────────────
66
+
67
+ def _is_mcp_tool_decorator(decorator: ast.expr) -> bool:
68
+ """True if a decorator AST node is `@mcp.tool(...)` or `@mcp.tool`."""
69
+ target = decorator
70
+ if isinstance(decorator, ast.Call):
71
+ target = decorator.func
72
+ if isinstance(target, ast.Attribute):
73
+ return (
74
+ isinstance(target.value, ast.Name)
75
+ and target.value.id == "mcp"
76
+ and target.attr == "tool"
77
+ )
78
+ return False
79
+
80
+
81
+ def _function_param_names(node: ast.FunctionDef) -> List[str]:
82
+ """Return positional + keyword-only param names, excluding self/cls."""
83
+ args = node.args
84
+ names: List[str] = []
85
+ for arg in args.posonlyargs + args.args + args.kwonlyargs:
86
+ if arg.arg in ("self", "cls"):
87
+ continue
88
+ names.append(arg.arg)
89
+ return names
90
+
91
+
92
+ def _function_body_text(source: str, node: ast.FunctionDef) -> str:
93
+ """Return the source text of the function body (best effort)."""
94
+ try:
95
+ return ast.get_source_segment(source, node) or ""
96
+ except Exception:
97
+ return ""
98
+
99
+
100
+ def extract_tools(source: str) -> List[Dict[str, Any]]:
101
+ """Parse `source` and return a record per @mcp.tool()-decorated function.
102
+
103
+ Each record has: name, docstring, params, body_text, lineno, has_decorator.
104
+ Functions without docstrings are still returned (with docstring="") so
105
+ they can be flagged as zero-coverage by the scorer.
106
+ """
107
+ try:
108
+ tree = ast.parse(source)
109
+ except SyntaxError:
110
+ return []
111
+
112
+ tools: List[Dict[str, Any]] = []
113
+ for node in ast.walk(tree):
114
+ if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
115
+ continue
116
+ if not any(_is_mcp_tool_decorator(d) for d in node.decorator_list):
117
+ continue
118
+ docstring = ast.get_docstring(node) or ""
119
+ tools.append(
120
+ {
121
+ "name": node.name,
122
+ "docstring": docstring,
123
+ "params": _function_param_names(node),
124
+ "body_text": _function_body_text(source, node),
125
+ "lineno": node.lineno,
126
+ "has_decorator": True,
127
+ }
128
+ )
129
+ return tools
130
+
131
+
132
+ # ─── Per-dimension scorers ─────────────────────────────────────────────
133
+ #
134
+ # Each scorer returns (score, hint) where score is in [1, 5] and hint is a
135
+ # short remediation note to display when score < 4. Scorers must be
136
+ # deterministic and side-effect-free.
137
+
138
+ # Vocabulary lookups used across scorers. Pre-compiled where useful.
139
+ _SIDE_EFFECT_KEYWORDS = (
140
+ "writes", "write", "wrote",
141
+ "calls", "call ", "calling",
142
+ "notifies", "notify",
143
+ "chains", "chain", "auto-chain", "auto-chains",
144
+ "modifies", "modify",
145
+ "creates", "create",
146
+ "deletes", "delete", "destroys",
147
+ "records", "record ",
148
+ "fetches", "fetch", "downloads",
149
+ "posts ", "post to", "publishes",
150
+ "raises", "returns",
151
+ "no side effects", "side-effect-free", "pure",
152
+ "auth", "license", "rate limit", "rate-limit",
153
+ "side effects", "side effect",
154
+ )
155
+
156
+ _BOILERPLATE_PHRASES = (
157
+ "this function",
158
+ "this tool will",
159
+ "as a function",
160
+ "you can use this",
161
+ )
162
+
163
+ _SIBLING_PATTERNS = (
164
+ re.compile(r"\bunlike\s+\w+", re.IGNORECASE),
165
+ re.compile(r"\bdiffer(s|ent)\s+from\b", re.IGNORECASE),
166
+ re.compile(r"\bvs\.?\s+`?delimit_\w+", re.IGNORECASE),
167
+ re.compile(r"\bsibling\b", re.IGNORECASE),
168
+ re.compile(r"\bcomplements?\b", re.IGNORECASE),
169
+ re.compile(r"\bcompare(?:d|s)?\s+(?:to|with)\b", re.IGNORECASE),
170
+ re.compile(r"\bnot\s+to\s+be\s+confused\s+with\b", re.IGNORECASE),
171
+ re.compile(r"`?delimit_\w+`?\s+(?:is|does|handles|covers)", re.IGNORECASE),
172
+ re.compile(r"\buse\s+`?delimit_\w+", re.IGNORECASE),
173
+ re.compile(r"\bsee\s+also\b", re.IGNORECASE),
174
+ )
175
+
176
+ _WHEN_TO_USE_PATTERNS = (
177
+ re.compile(r"\buse\s+(?:this|when)\b", re.IGNORECASE),
178
+ re.compile(r"\bwhen\s+to\s+use\b", re.IGNORECASE),
179
+ re.compile(r"\b(?:do\s*not|don'?t)\s+use\b", re.IGNORECASE),
180
+ re.compile(r"\bnot\s+for\b", re.IGNORECASE),
181
+ re.compile(r"\bprimary\s+(?:integration|use|case)\b", re.IGNORECASE),
182
+ re.compile(r"\bfor\s+\w+\s*,\s*use\b", re.IGNORECASE),
183
+ re.compile(r"\b(?:useful|helpful)\s+(?:for|when)\b", re.IGNORECASE),
184
+ )
185
+
186
+ _PARAM_HINT_PATTERNS = (
187
+ re.compile(r"\b(default|defaults\s+to|default:)\b", re.IGNORECASE),
188
+ re.compile(r"\b(must|should|required|optional)\b", re.IGNORECASE),
189
+ re.compile(r"\b(e\.?g\.?|i\.?e\.?|example)\b", re.IGNORECASE),
190
+ re.compile(r"\b(range|max|min|maximum|minimum|<=|>=|cap|capped)\b", re.IGNORECASE),
191
+ re.compile(r"\b(true|false)\b", re.IGNORECASE),
192
+ re.compile(r"\b(path|url|json|yaml|repo|spec)\b", re.IGNORECASE),
193
+ re.compile(r":\s*\w"), # colon followed by description text
194
+ )
195
+
196
+
197
+ def score_side_effects(doc: str, body: str) -> Tuple[int, str]:
198
+ """Score 1-5 on side-effect disclosure.
199
+
200
+ A docstring that names what it writes / calls / chains scores high;
201
+ one that elides side effects when the body clearly performs them
202
+ scores low.
203
+ """
204
+ doc_l = doc.lower()
205
+ keyword_hits = sum(1 for kw in _SIDE_EFFECT_KEYWORDS if kw in doc_l)
206
+
207
+ # Body-level evidence of side effects we expect to see disclosed
208
+ body_l = body.lower()
209
+ body_writes = any(
210
+ s in body_l
211
+ for s in (
212
+ "_ledger_add", "_safe_call", "subprocess.", "requests.",
213
+ "urlopen", "open(", ".write(", "json.dump", "yaml.dump",
214
+ "_audit_event", "_record_evidence", "logger.warning",
215
+ "notify_inbox", "send_notification", "supabase",
216
+ )
217
+ )
218
+ body_pure = not body_writes and len(body) < 400
219
+
220
+ if keyword_hits >= 4:
221
+ score = 5
222
+ elif keyword_hits >= 3:
223
+ score = 4
224
+ elif keyword_hits >= 2:
225
+ score = 3
226
+ elif keyword_hits >= 1:
227
+ score = 2
228
+ else:
229
+ score = 1
230
+
231
+ # Penalty: body has writes but doc says nothing about them.
232
+ if body_writes and keyword_hits < 2:
233
+ score = min(score, 2)
234
+ # Bonus: pure helper with explicit "no side effects" / "returns" wording
235
+ # earns at least a 4.
236
+ if body_pure and ("returns" in doc_l or "no side effects" in doc_l):
237
+ score = max(score, 4)
238
+
239
+ hints = []
240
+ if score < 4:
241
+ hints.append(
242
+ "name what is written/called/chained "
243
+ "(e.g. 'writes to ledger', 'auto-chains delimit_evidence_collect')"
244
+ )
245
+ if body_writes and keyword_hits < 2:
246
+ hints.append("body shows writes/calls but docstring does not disclose them")
247
+ return score, "; ".join(hints)
248
+
249
+
250
+ def score_conciseness(doc: str) -> Tuple[int, str]:
251
+ """Score 1-5 on conciseness and front-loaded purpose."""
252
+ if not doc.strip():
253
+ return 1, "no docstring"
254
+
255
+ length = len(doc)
256
+ first_sentence = doc.split(".")[0].strip()
257
+ first_lower = first_sentence.lower()
258
+ word_count = len(first_sentence.split())
259
+
260
+ score = 5
261
+
262
+ # Length window: 50-500 chars is healthy; punish either extreme.
263
+ if length < 50:
264
+ score = min(score, 2)
265
+ elif length > 1500:
266
+ score = min(score, 2)
267
+ elif length > 800:
268
+ score = min(score, 3)
269
+
270
+ # Front-loaded purpose: first sentence should be an action+object.
271
+ # Heuristic: first word is a verb (capitalized non-article) and the
272
+ # sentence is between 4 and 25 words.
273
+ if word_count < 3:
274
+ score = min(score, 3)
275
+ elif word_count > 30:
276
+ score = min(score, 3)
277
+
278
+ # Boilerplate phrases drag the score.
279
+ for phrase in _BOILERPLATE_PHRASES:
280
+ if phrase in first_lower:
281
+ score = min(score, 3)
282
+ break
283
+
284
+ hints = []
285
+ if score < 4:
286
+ if length < 50:
287
+ hints.append("docstring is too short (<50 chars)")
288
+ elif length > 800:
289
+ hints.append("docstring is very long (>800 chars), trim or restructure")
290
+ if word_count < 3:
291
+ hints.append("first sentence is too short to convey purpose")
292
+ elif word_count > 30:
293
+ hints.append("first sentence is too long; lead with verb+object")
294
+ for phrase in _BOILERPLATE_PHRASES:
295
+ if phrase in first_lower:
296
+ hints.append(f"avoid boilerplate phrase '{phrase}'")
297
+ break
298
+ return score, "; ".join(hints)
299
+
300
+
301
+ def score_coverage(doc: str, params: List[str]) -> Tuple[int, str]:
302
+ """Score 1-5 on whether the docstring lets an agent succeed first try."""
303
+ if not doc.strip():
304
+ return 1, "no docstring"
305
+
306
+ doc_l = doc.lower()
307
+ has_args = "args:" in doc_l or "arguments:" in doc_l or "parameters:" in doc_l
308
+ has_returns = "returns:" in doc_l or "returns " in doc_l or "return value" in doc_l
309
+ has_errors = (
310
+ "raises:" in doc_l
311
+ or "errors:" in doc_l
312
+ or "error:" in doc_l
313
+ or "fails" in doc_l
314
+ or "exception" in doc_l
315
+ )
316
+ has_prereq = (
317
+ "prerequisite" in doc_l
318
+ or "requires" in doc_l
319
+ or "before" in doc_l
320
+ or "auth" in doc_l
321
+ )
322
+
323
+ score = 1
324
+ if params and has_args:
325
+ score += 2
326
+ elif not params:
327
+ # No params — Args section is optional, give partial credit.
328
+ score += 1
329
+ if has_returns:
330
+ score += 1
331
+ if has_errors or has_prereq:
332
+ score += 1
333
+
334
+ score = min(score, 5)
335
+
336
+ hints = []
337
+ if params and not has_args:
338
+ hints.append("add Args: section documenting each parameter")
339
+ if not has_returns:
340
+ hints.append("describe the return shape (Returns: ...)")
341
+ if not (has_errors or has_prereq):
342
+ hints.append("note prerequisites or error conditions where they exist")
343
+ return score, "; ".join(hints)
344
+
345
+
346
+ def score_parameter_semantics(doc: str, params: List[str]) -> Tuple[int, str]:
347
+ """Score 1-5 on whether docstring clarifies param intent beyond schema."""
348
+ if not params:
349
+ # No params — neutral 4 (cannot fail this dimension by absence).
350
+ return 4, ""
351
+
352
+ if not doc.strip():
353
+ return 1, "no docstring; cannot describe params"
354
+
355
+ # Try to grab the Args block. We accept the Google style.
356
+ args_match = re.search(
357
+ r"(?:Args|Arguments|Parameters):\s*\n(.*?)(?:\n\s*\n|\Z|\n[A-Z][a-z]+:)",
358
+ doc,
359
+ re.DOTALL,
360
+ )
361
+ if not args_match:
362
+ return 1, "no Args block found"
363
+
364
+ args_block = args_match.group(1)
365
+
366
+ documented = 0
367
+ well_described = 0
368
+ for p in params:
369
+ # match `param:` at the start of a line (with optional indent).
370
+ param_re = re.compile(rf"^\s*{re.escape(p)}\s*:\s*(.*)$", re.MULTILINE)
371
+ m = param_re.search(args_block)
372
+ if not m:
373
+ continue
374
+ documented += 1
375
+ desc = m.group(1).strip()
376
+ # Pull continuation lines that begin with deeper indent
377
+ if len(desc) < 5:
378
+ continue
379
+ # Has at least one constraint/intent hint?
380
+ if any(rx.search(desc) for rx in _PARAM_HINT_PATTERNS) and len(desc) > 12:
381
+ well_described += 1
382
+
383
+ if not documented:
384
+ return 1, "no params documented in Args block"
385
+
386
+ coverage = documented / len(params)
387
+ quality = well_described / len(params)
388
+
389
+ # Combined score: weighted average, capped at 5.
390
+ raw = 1 + (coverage * 2) + (quality * 2)
391
+ score = int(round(raw))
392
+ score = max(1, min(5, score))
393
+
394
+ hints = []
395
+ if coverage < 1.0:
396
+ hints.append(
397
+ f"only {documented}/{len(params)} parameters documented in Args block"
398
+ )
399
+ if quality < 0.6:
400
+ hints.append(
401
+ "param descriptions lack constraints/defaults/examples beyond bare types"
402
+ )
403
+ return score, "; ".join(hints)
404
+
405
+
406
+ def score_disambiguation(doc: str, name: str) -> Tuple[int, str]:
407
+ """Score 1-5 on whether docstring differentiates this tool from siblings."""
408
+ if not doc.strip():
409
+ return 1, "no docstring"
410
+
411
+ # Self-mentions don't count.
412
+ self_pattern = re.compile(rf"\b{re.escape(name)}\b", re.IGNORECASE)
413
+ doc_for_match = self_pattern.sub("", doc)
414
+
415
+ matches = sum(1 for rx in _SIBLING_PATTERNS if rx.search(doc_for_match))
416
+ differentiator_words = sum(
417
+ 1
418
+ for w in (
419
+ "unlike", "differs", "vs ", "vs.", "alternative", "instead of",
420
+ "rather than", "prefer", "complement", "compared to",
421
+ )
422
+ if w in doc.lower()
423
+ )
424
+
425
+ if matches >= 2 or differentiator_words >= 2:
426
+ score = 5
427
+ elif matches >= 1 or differentiator_words >= 1:
428
+ score = 4
429
+ elif "delimit_" in doc_for_match.lower():
430
+ score = 3
431
+ elif len(doc) > 200:
432
+ score = 2 # long but no sibling reference
433
+ else:
434
+ score = 2
435
+
436
+ hints = []
437
+ if score < 4:
438
+ hints.append(
439
+ "name a sibling tool and contrast (e.g. 'unlike delimit_diff, this also enforces policy')"
440
+ )
441
+ return score, "; ".join(hints)
442
+
443
+
444
+ def score_when_to_use(doc: str) -> Tuple[int, str]:
445
+ """Score 1-5 on whether docstring offers usage / anti-usage guidance."""
446
+ if not doc.strip():
447
+ return 1, "no docstring"
448
+
449
+ use_hits = sum(1 for rx in _WHEN_TO_USE_PATTERNS if rx.search(doc))
450
+ has_when = bool(re.search(r"\bwhen\s+to\s+use\b", doc, re.IGNORECASE))
451
+ has_when_not = bool(
452
+ re.search(r"\bwhen\s+(?:not|NOT)\s+to\s+use\b", doc, re.IGNORECASE)
453
+ or re.search(r"\b(?:do\s*not|don'?t)\s+use\b", doc, re.IGNORECASE)
454
+ )
455
+
456
+ if has_when and has_when_not:
457
+ score = 5
458
+ elif has_when or use_hits >= 2:
459
+ score = 4
460
+ elif use_hits >= 1:
461
+ score = 3
462
+ elif len(doc) > 200:
463
+ score = 2
464
+ else:
465
+ score = 1
466
+
467
+ hints = []
468
+ if score < 4:
469
+ hints.append("add explicit 'When to use:' / 'When NOT to use:' guidance")
470
+ return score, "; ".join(hints)
471
+
472
+
473
+ # ─── Aggregation ───────────────────────────────────────────────────────
474
+
475
+ DIMENSIONS = (
476
+ "side_effects",
477
+ "conciseness",
478
+ "coverage",
479
+ "parameter_semantics",
480
+ "disambiguation",
481
+ "when_to_use",
482
+ )
483
+
484
+
485
+ def score_tool(tool: Dict[str, Any]) -> Dict[str, Any]:
486
+ """Score a single tool record (as returned by extract_tools)."""
487
+ doc = tool.get("docstring") or ""
488
+ params = tool.get("params") or []
489
+ body = tool.get("body_text") or ""
490
+ name = tool.get("name") or ""
491
+
492
+ s1, h1 = score_side_effects(doc, body)
493
+ s2, h2 = score_conciseness(doc)
494
+ s3, h3 = score_coverage(doc, params)
495
+ s4, h4 = score_parameter_semantics(doc, params)
496
+ s5, h5 = score_disambiguation(doc, name)
497
+ s6, h6 = score_when_to_use(doc)
498
+
499
+ scores = {
500
+ "side_effects": s1,
501
+ "conciseness": s2,
502
+ "coverage": s3,
503
+ "parameter_semantics": s4,
504
+ "disambiguation": s5,
505
+ "when_to_use": s6,
506
+ }
507
+ hints = {
508
+ "side_effects": h1,
509
+ "conciseness": h2,
510
+ "coverage": h3,
511
+ "parameter_semantics": h4,
512
+ "disambiguation": h5,
513
+ "when_to_use": h6,
514
+ }
515
+ mean = sum(scores.values()) / len(scores)
516
+ grade = grade_for_mean(mean)
517
+
518
+ defects = [
519
+ {"dim": dim, "score": scores[dim], "hint": hints[dim]}
520
+ for dim in DIMENSIONS
521
+ if scores[dim] < 4
522
+ ]
523
+
524
+ return {
525
+ "name": name,
526
+ "lineno": tool.get("lineno"),
527
+ "scores": scores,
528
+ "mean_score": round(mean, 2),
529
+ "grade": grade,
530
+ "defects": defects,
531
+ }
532
+
533
+
534
+ def aggregate(tool_results: List[Dict[str, Any]]) -> Dict[str, Any]:
535
+ """Roll up per-tool scores into a corpus-level grade."""
536
+ if not tool_results:
537
+ return {"grade": "N/A", "mean_score": 0.0, "dim_means": {}, "tool_count": 0}
538
+
539
+ dim_means: Dict[str, float] = {}
540
+ for dim in DIMENSIONS:
541
+ dim_means[dim] = round(
542
+ sum(t["scores"][dim] for t in tool_results) / len(tool_results), 2
543
+ )
544
+
545
+ overall = round(sum(t["mean_score"] for t in tool_results) / len(tool_results), 2)
546
+ return {
547
+ "grade": grade_for_mean(overall),
548
+ "mean_score": overall,
549
+ "dim_means": dim_means,
550
+ "tool_count": len(tool_results),
551
+ }
552
+
553
+
554
+ def lint_file(target_file: str) -> Dict[str, Any]:
555
+ """Lint a Python source file and return TDQS results.
556
+
557
+ Args:
558
+ target_file: Path to a Python file containing @mcp.tool()-decorated functions.
559
+
560
+ Returns:
561
+ {tools: [...], aggregate: {...}, target_file: ...}
562
+ """
563
+ path = Path(target_file)
564
+ if not path.exists():
565
+ return {
566
+ "error": f"target_file not found: {target_file}",
567
+ "tools": [],
568
+ "aggregate": {"grade": "N/A", "mean_score": 0.0, "dim_means": {}, "tool_count": 0},
569
+ "target_file": target_file,
570
+ }
571
+
572
+ source = path.read_text(encoding="utf-8")
573
+ raw_tools = extract_tools(source)
574
+ scored = [score_tool(t) for t in raw_tools]
575
+ return {
576
+ "tools": scored,
577
+ "aggregate": aggregate(scored),
578
+ "target_file": str(path),
579
+ }
580
+
581
+
582
+ def render_human(result: Dict[str, Any]) -> str:
583
+ """Render a lint_file result as a human-readable report."""
584
+ if result.get("error"):
585
+ return f"ERROR: {result['error']}"
586
+
587
+ agg = result["aggregate"]
588
+ lines = [
589
+ f"TDQS lint report — {result['target_file']}",
590
+ f"Tools scored: {agg['tool_count']}",
591
+ f"Aggregate grade: {agg['grade']} (mean={agg['mean_score']:.2f})",
592
+ "Per-dimension means:",
593
+ ]
594
+ for dim, mean in agg.get("dim_means", {}).items():
595
+ lines.append(f" {dim:<22} {mean:.2f}")
596
+ lines.append("")
597
+
598
+ # Worst-first ordering helps remediation.
599
+ worst = sorted(result["tools"], key=lambda t: t["mean_score"])
600
+ lines.append("Tools with defects (worst first):")
601
+ for t in worst:
602
+ if not t["defects"]:
603
+ continue
604
+ lines.append(
605
+ f" [{t['grade']}] {t['name']} "
606
+ f"(mean={t['mean_score']:.2f}, line {t['lineno']})"
607
+ )
608
+ for d in t["defects"]:
609
+ hint = d["hint"] or "(no specific hint)"
610
+ lines.append(f" - {d['dim']}: {d['score']}/5 — {hint}")
611
+ return "\n".join(lines)