claude-toolstack-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. claude_toolstack_cli-1.0.0.dist-info/METADATA +354 -0
  2. claude_toolstack_cli-1.0.0.dist-info/RECORD +48 -0
  3. claude_toolstack_cli-1.0.0.dist-info/WHEEL +5 -0
  4. claude_toolstack_cli-1.0.0.dist-info/entry_points.txt +2 -0
  5. claude_toolstack_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
  6. claude_toolstack_cli-1.0.0.dist-info/top_level.txt +1 -0
  7. cts/__init__.py +3 -0
  8. cts/__main__.py +5 -0
  9. cts/autopilot.py +633 -0
  10. cts/bundle.py +958 -0
  11. cts/cli.py +2858 -0
  12. cts/confidence.py +218 -0
  13. cts/config.py +19 -0
  14. cts/corpus/__init__.py +139 -0
  15. cts/corpus/apply.py +305 -0
  16. cts/corpus/archive.py +309 -0
  17. cts/corpus/baseline.py +294 -0
  18. cts/corpus/evaluate.py +409 -0
  19. cts/corpus/experiment_eval.py +585 -0
  20. cts/corpus/experiment_schema.py +380 -0
  21. cts/corpus/extract.py +353 -0
  22. cts/corpus/load.py +44 -0
  23. cts/corpus/model.py +114 -0
  24. cts/corpus/patch.py +467 -0
  25. cts/corpus/registry.py +420 -0
  26. cts/corpus/report.py +745 -0
  27. cts/corpus/scan.py +87 -0
  28. cts/corpus/store.py +63 -0
  29. cts/corpus/trends.py +478 -0
  30. cts/corpus/tuning_schema.py +313 -0
  31. cts/corpus/variants.py +335 -0
  32. cts/ctags.py +133 -0
  33. cts/diff_context.py +92 -0
  34. cts/errors.py +109 -0
  35. cts/http.py +89 -0
  36. cts/ranking.py +466 -0
  37. cts/render.py +388 -0
  38. cts/schema.py +96 -0
  39. cts/semantic/__init__.py +47 -0
  40. cts/semantic/candidates.py +150 -0
  41. cts/semantic/chunker.py +184 -0
  42. cts/semantic/config.py +120 -0
  43. cts/semantic/embedder.py +151 -0
  44. cts/semantic/indexer.py +159 -0
  45. cts/semantic/search.py +252 -0
  46. cts/semantic/store.py +330 -0
  47. cts/sidecar.py +431 -0
  48. cts/structural.py +305 -0
@@ -0,0 +1,585 @@
1
+ """Experiment evaluation: assign runs to variants and pick a winner.
2
+
3
+ Implements the full experiment lifecycle:
4
+ 1. **Assignment** — tag each corpus record with a variant name
5
+ 2. **Per-variant KPIs** — extract KPIs for each variant's records
6
+ 3. **Decision** — apply decision rules (primary KPI, constraints,
7
+ tie-breakers) to determine the winner
8
+
9
+ Assignment modes:
10
+ - ``manual``: records already tagged (variant field present)
11
+ - ``repo_partition``: map repo → variant via a partition dict
12
+ - ``time_window``: map timestamp → variant via date ranges
13
+
14
+ Decision algorithm:
15
+ 1. Check constraints — a variant with any constraint violation is
16
+ eliminated (unless all variants violate the same constraint).
17
+ 2. Compare primary KPI — the variant with the better value wins.
18
+ 3. Tie-breakers — if primary KPI is within noise threshold, fall
19
+ through to tie-breaker KPIs in order.
20
+ 4. If still tied → verdict ``tie``.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ from typing import Any, Dict, List, Tuple
26
+
27
+ from cts.corpus.evaluate import extract_kpis
28
+
29
+ # ---------------------------------------------------------------------------
30
+ # Assignment: tag records with variant names
31
+ # ---------------------------------------------------------------------------
32
+
33
+ # KPI direction maps (higher = better or lower = better)
34
+ _HIGHER_BETTER = {
35
+ "confidence_final_mean",
36
+ "confidence_delta_mean",
37
+ "semantic_lift_mean",
38
+ }
39
+ _LOWER_BETTER = {
40
+ "truncation_rate",
41
+ "autopilot_low_lift_rate",
42
+ "bundle_bytes_p90",
43
+ "should_autopilot_count",
44
+ }
45
+
46
+ # Noise thresholds — differences below these are not significant
47
+ _NOISE: Dict[str, float] = {
48
+ "confidence_final_mean": 0.02,
49
+ "confidence_delta_mean": 0.02,
50
+ "truncation_rate": 0.02,
51
+ "autopilot_low_lift_rate": 0.05,
52
+ "bundle_bytes_p90": 5000,
53
+ "should_autopilot_count": 1,
54
+ "semantic_lift_mean": 0.01,
55
+ }
56
+
57
+
58
+ def assign_manual(
59
+ records: List[Dict[str, Any]],
60
+ ) -> Dict[str, List[Dict[str, Any]]]:
61
+ """Group records by their existing ``variant`` field.
62
+
63
+ Records without a ``variant`` field are placed in "unassigned".
64
+ """
65
+ groups: Dict[str, List[Dict[str, Any]]] = {}
66
+ for rec in records:
67
+ variant = rec.get("variant", "unassigned")
68
+ groups.setdefault(variant, []).append(rec)
69
+ return groups
70
+
71
+
72
+ def assign_repo_partition(
73
+ records: List[Dict[str, Any]],
74
+ partition: Dict[str, List[str]],
75
+ ) -> Dict[str, List[Dict[str, Any]]]:
76
+ """Assign records to variants based on repo membership.
77
+
78
+ Args:
79
+ records: Corpus records with a ``repo`` field.
80
+ partition: Mapping ``{variant_name: [repo1, repo2, ...]}``.
81
+
82
+ Returns:
83
+ Dict of variant_name → list of records.
84
+ """
85
+ # Build reverse lookup: repo → variant
86
+ repo_to_variant: Dict[str, str] = {}
87
+ for variant, repos in partition.items():
88
+ for repo in repos:
89
+ repo_to_variant[repo] = variant
90
+
91
+ groups: Dict[str, List[Dict[str, Any]]] = {}
92
+ for rec in records:
93
+ repo = rec.get("repo", "")
94
+ variant = repo_to_variant.get(repo, "unassigned")
95
+ groups.setdefault(variant, []).append(rec)
96
+ return groups
97
+
98
+
99
+ def assign_time_window(
100
+ records: List[Dict[str, Any]],
101
+ windows: Dict[str, Tuple[float, float]],
102
+ ) -> Dict[str, List[Dict[str, Any]]]:
103
+ """Assign records to variants based on timestamp ranges.
104
+
105
+ Args:
106
+ records: Corpus records with a ``timestamp`` field.
107
+ windows: Mapping ``{variant_name: (start_ts, end_ts)}``.
108
+ Timestamps are epoch seconds. Intervals are half-open:
109
+ ``start <= ts < end``.
110
+
111
+ Returns:
112
+ Dict of variant_name → list of records.
113
+ """
114
+ groups: Dict[str, List[Dict[str, Any]]] = {}
115
+ for rec in records:
116
+ ts = rec.get("timestamp", 0.0)
117
+ assigned = "unassigned"
118
+ for variant, (start, end) in windows.items():
119
+ if start <= ts < end:
120
+ assigned = variant
121
+ break
122
+ groups.setdefault(assigned, []).append(rec)
123
+ return groups
124
+
125
+
126
+ def assign_records(
127
+ records: List[Dict[str, Any]],
128
+ assignment: Dict[str, Any],
129
+ ) -> Dict[str, List[Dict[str, Any]]]:
130
+ """Route to the correct assignment function based on mode.
131
+
132
+ Args:
133
+ records: Corpus records.
134
+ assignment: Assignment spec dict with ``mode`` and ``details``.
135
+
136
+ Returns:
137
+ Dict of variant_name → list of records.
138
+ """
139
+ mode = assignment.get("mode", "manual")
140
+ details = assignment.get("details", {})
141
+
142
+ if mode == "repo_partition":
143
+ return assign_repo_partition(records, details)
144
+ elif mode == "time_window":
145
+ # Convert detail values to tuples
146
+ windows: Dict[str, Tuple[float, float]] = {}
147
+ for name, window in details.items():
148
+ if isinstance(window, (list, tuple)) and len(window) == 2:
149
+ windows[name] = (float(window[0]), float(window[1]))
150
+ return assign_time_window(records, windows)
151
+ else:
152
+ return assign_manual(records)
153
+
154
+
155
+ # ---------------------------------------------------------------------------
156
+ # Decision logic
157
+ # ---------------------------------------------------------------------------
158
+
159
+
160
+ def _check_constraint(
161
+ kpis: Dict[str, Any],
162
+ constraint: Dict[str, Any],
163
+ ) -> bool:
164
+ """Check if a single constraint is satisfied.
165
+
166
+ Returns True if constraint passes, False if violated.
167
+ """
168
+ kpi_name = constraint.get("kpi", "")
169
+ operator = constraint.get("operator", "<=")
170
+ threshold = constraint.get("threshold", 0.0)
171
+
172
+ value = kpis.get(kpi_name)
173
+ if value is None:
174
+ return True # Can't check — assume ok
175
+
176
+ try:
177
+ val = float(value)
178
+ thr = float(threshold)
179
+ except (ValueError, TypeError):
180
+ return True
181
+
182
+ if operator == "<=":
183
+ return val <= thr
184
+ elif operator == ">=":
185
+ return val >= thr
186
+ elif operator == "<":
187
+ return val < thr
188
+ elif operator == ">":
189
+ return val > thr
190
+ return True
191
+
192
+
193
+ def check_constraints(
194
+ kpis: Dict[str, Any],
195
+ constraints: List[Dict[str, Any]],
196
+ ) -> List[Dict[str, Any]]:
197
+ """Check all constraints against KPIs.
198
+
199
+ Returns list of violation dicts (empty if all pass).
200
+ """
201
+ violations: List[Dict[str, Any]] = []
202
+ for c in constraints:
203
+ if not _check_constraint(kpis, c):
204
+ violations.append(
205
+ {
206
+ "kpi": c.get("kpi", ""),
207
+ "operator": c.get("operator", "<="),
208
+ "threshold": c.get("threshold", 0.0),
209
+ "actual": kpis.get(c.get("kpi", "")),
210
+ }
211
+ )
212
+ return violations
213
+
214
+
215
+ def _compare_kpi(
216
+ kpi_name: str,
217
+ value_a: float,
218
+ value_b: float,
219
+ ) -> str:
220
+ """Compare two KPI values accounting for direction and noise.
221
+
222
+ Returns "a_wins", "b_wins", or "tie".
223
+ """
224
+ noise = _NOISE.get(kpi_name, 0.01)
225
+
226
+ if abs(value_a - value_b) <= noise:
227
+ return "tie"
228
+
229
+ if kpi_name in _HIGHER_BETTER:
230
+ return "a_wins" if value_a > value_b else "b_wins"
231
+ elif kpi_name in _LOWER_BETTER:
232
+ return "a_wins" if value_a < value_b else "b_wins"
233
+
234
+ # Unknown direction — treat as higher=better
235
+ return "a_wins" if value_a > value_b else "b_wins"
236
+
237
+
238
+ def pick_winner(
239
+ variant_kpis: Dict[str, Dict[str, Any]],
240
+ decision_rule: Dict[str, Any],
241
+ ) -> Dict[str, Any]:
242
+ """Apply decision rules to determine the experiment winner.
243
+
244
+ Args:
245
+ variant_kpis: Mapping ``{variant_name: kpi_dict}``.
246
+ decision_rule: Decision rule with primary_kpi, constraints,
247
+ tie_breakers.
248
+
249
+ Returns:
250
+ Result dict with winner, verdict, constraint info, and reasoning.
251
+ """
252
+ primary_kpi = decision_rule.get("primary_kpi", "confidence_final_mean")
253
+ constraints = decision_rule.get("constraints", [])
254
+ tie_breakers = decision_rule.get("tie_breakers", [])
255
+
256
+ variant_names = sorted(variant_kpis.keys())
257
+
258
+ # Need at least 2 variants to compare
259
+ if len(variant_names) < 2:
260
+ return {
261
+ "winner": variant_names[0] if variant_names else None,
262
+ "verdict": "insufficient_variants",
263
+ "reasoning": f"Only {len(variant_names)} variant(s) — need at least 2",
264
+ "per_variant": {},
265
+ }
266
+
267
+ # Check for data in each variant
268
+ per_variant: Dict[str, Any] = {}
269
+ for name in variant_names:
270
+ kpis = variant_kpis[name]
271
+ total = kpis.get("total", 0)
272
+ violations = check_constraints(kpis, constraints)
273
+ per_variant[name] = {
274
+ "total": total,
275
+ "kpis": kpis,
276
+ "constraint_violations": violations,
277
+ "eliminated": len(violations) > 0,
278
+ }
279
+
280
+ # Check if any variant has no data
281
+ no_data = [n for n in variant_names if per_variant[n]["total"] == 0]
282
+ if no_data:
283
+ return {
284
+ "winner": None,
285
+ "verdict": "no_data",
286
+ "reasoning": f"Variant(s) with no data: {', '.join(no_data)}",
287
+ "per_variant": per_variant,
288
+ }
289
+
290
+ # Eliminate variants that violate constraints
291
+ # (unless ALL variants violate the same constraint)
292
+ surviving = [n for n in variant_names if not per_variant[n]["eliminated"]]
293
+
294
+ if len(surviving) == 0:
295
+ # All variants violate constraints — compare anyway but flag it
296
+ surviving = list(variant_names)
297
+ all_violated = True
298
+ else:
299
+ all_violated = False
300
+
301
+ if len(surviving) == 1:
302
+ return {
303
+ "winner": surviving[0],
304
+ "verdict": "winner_by_elimination",
305
+ "reasoning": (
306
+ f"{surviving[0]} is the only variant passing all constraints"
307
+ ),
308
+ "per_variant": per_variant,
309
+ }
310
+
311
+ # Compare primary KPI across surviving variants
312
+ # For >2 variants, do pairwise from first surviving
313
+ best = surviving[0]
314
+ reasoning_parts: List[str] = []
315
+ resolved_by_tiebreaker = False
316
+
317
+ for challenger in surviving[1:]:
318
+ best_val = per_variant[best]["kpis"].get(primary_kpi, 0)
319
+ chal_val = per_variant[challenger]["kpis"].get(primary_kpi, 0)
320
+
321
+ try:
322
+ best_val = float(best_val)
323
+ chal_val = float(chal_val)
324
+ except (ValueError, TypeError):
325
+ continue
326
+
327
+ result = _compare_kpi(primary_kpi, best_val, chal_val)
328
+
329
+ if result == "b_wins":
330
+ best = challenger
331
+ reasoning_parts.append(
332
+ f"{challenger} beats {best} on {primary_kpi} "
333
+ f"({chal_val:.4f} vs {best_val:.4f})"
334
+ )
335
+ elif result == "tie":
336
+ # Try tie-breakers
337
+ tie_broken = False
338
+ for tb_kpi in tie_breakers:
339
+ tb_best = per_variant[best]["kpis"].get(tb_kpi, 0)
340
+ tb_chal = per_variant[challenger]["kpis"].get(tb_kpi, 0)
341
+ try:
342
+ tb_best = float(tb_best)
343
+ tb_chal = float(tb_chal)
344
+ except (ValueError, TypeError):
345
+ continue
346
+
347
+ tb_result = _compare_kpi(tb_kpi, tb_best, tb_chal)
348
+ if tb_result == "b_wins":
349
+ best = challenger
350
+ reasoning_parts.append(
351
+ f"Tied on {primary_kpi}, {challenger} wins tie-breaker {tb_kpi}"
352
+ )
353
+ tie_broken = True
354
+ resolved_by_tiebreaker = True
355
+ break
356
+ elif tb_result == "a_wins":
357
+ reasoning_parts.append(
358
+ f"Tied on {primary_kpi}, {best} wins tie-breaker {tb_kpi}"
359
+ )
360
+ tie_broken = True
361
+ resolved_by_tiebreaker = True
362
+ break
363
+
364
+ if not tie_broken:
365
+ reasoning_parts.append(
366
+ f"Tied on {primary_kpi} between {best} and {challenger}"
367
+ )
368
+ else:
369
+ # best stays
370
+ reasoning_parts.append(
371
+ f"{best} beats {challenger} on {primary_kpi} "
372
+ f"({best_val:.4f} vs {chal_val:.4f})"
373
+ )
374
+
375
+ # Determine final verdict
376
+ if resolved_by_tiebreaker:
377
+ # Tie-breaker resolved it — trust the result
378
+ verdict = "winner"
379
+ winner = best
380
+ else:
381
+ # Check if the "best" is actually clearly better or tied
382
+ all_kpi_vals = []
383
+ for name in surviving:
384
+ val = per_variant[name]["kpis"].get(primary_kpi, 0)
385
+ try:
386
+ all_kpi_vals.append((name, float(val)))
387
+ except (ValueError, TypeError):
388
+ all_kpi_vals.append((name, 0.0))
389
+
390
+ noise = _NOISE.get(primary_kpi, 0.01)
391
+ best_val_f = dict(all_kpi_vals).get(best, 0.0)
392
+ all_within_noise = all(abs(v - best_val_f) <= noise for _, v in all_kpi_vals)
393
+
394
+ if all_within_noise and len(surviving) > 1:
395
+ verdict = "tie"
396
+ winner = None
397
+ else:
398
+ verdict = "winner"
399
+ winner = best
400
+
401
+ if all_violated:
402
+ verdict = f"{verdict}_all_constraints_violated"
403
+
404
+ return {
405
+ "winner": winner,
406
+ "verdict": verdict,
407
+ "reasoning": (
408
+ "; ".join(reasoning_parts) if reasoning_parts else "No comparison needed"
409
+ ),
410
+ "per_variant": per_variant,
411
+ }
412
+
413
+
414
+ # ---------------------------------------------------------------------------
415
+ # Full experiment evaluation
416
+ # ---------------------------------------------------------------------------
417
+
418
+
419
+ def evaluate_experiment(
420
+ records: List[Dict[str, Any]],
421
+ experiment: Dict[str, Any],
422
+ ) -> Dict[str, Any]:
423
+ """End-to-end experiment evaluation.
424
+
425
+ 1. Assign records to variants.
426
+ 2. Extract KPIs per variant.
427
+ 3. Apply decision rules to pick a winner.
428
+
429
+ Args:
430
+ records: Corpus records (all variants mixed).
431
+ experiment: Experiment envelope dict.
432
+
433
+ Returns:
434
+ Result dict with per-variant KPIs, winner, and reasoning.
435
+ """
436
+ assignment = experiment.get("assignment", {"mode": "manual"})
437
+ decision_rule = experiment.get("decision_rule", {})
438
+ variant_names = [
439
+ v.get("name", f"V{i}") for i, v in enumerate(experiment.get("variants", []))
440
+ ]
441
+
442
+ # Assign records to variants
443
+ groups = assign_records(records, assignment)
444
+
445
+ # Extract KPIs per variant
446
+ variant_kpis: Dict[str, Dict[str, Any]] = {}
447
+ for name in variant_names:
448
+ recs = groups.get(name, [])
449
+ variant_kpis[name] = extract_kpis(recs)
450
+
451
+ # Also capture unassigned if any
452
+ unassigned = groups.get("unassigned", [])
453
+
454
+ # Pick winner
455
+ result = pick_winner(variant_kpis, decision_rule)
456
+
457
+ return {
458
+ "experiment_id": experiment.get("id", ""),
459
+ "variant_count": len(variant_names),
460
+ "total_records": len(records),
461
+ "unassigned_records": len(unassigned),
462
+ "per_variant": result["per_variant"],
463
+ "winner": result["winner"],
464
+ "verdict": result["verdict"],
465
+ "reasoning": result["reasoning"],
466
+ "decision_rule": decision_rule,
467
+ }
468
+
469
+
470
+ # ---------------------------------------------------------------------------
471
+ # Renderers
472
+ # ---------------------------------------------------------------------------
473
+
474
+
475
+ def render_experiment_result_text(result: Dict[str, Any]) -> str:
476
+ """Render experiment evaluation as plain text."""
477
+ lines: List[str] = []
478
+ lines.append("=" * 60)
479
+ lines.append("EXPERIMENT EVALUATION")
480
+ lines.append("=" * 60)
481
+ lines.append("")
482
+ lines.append(f"Experiment: {result.get('experiment_id', '?')}")
483
+ lines.append(f"Records: {result.get('total_records', 0)}")
484
+ lines.append(f"Unassigned: {result.get('unassigned_records', 0)}")
485
+ lines.append(f"Verdict: {result.get('verdict', '?')}")
486
+ lines.append(f"Winner: {result.get('winner') or 'none'}")
487
+ lines.append(f"Reasoning: {result.get('reasoning', '')}")
488
+ lines.append("")
489
+
490
+ per_variant = result.get("per_variant", {})
491
+ for name in sorted(per_variant):
492
+ pv = per_variant[name]
493
+ kpis = pv.get("kpis", {})
494
+ lines.append(f"--- Variant: {name} ---")
495
+ lines.append(f" Records: {pv.get('total', 0)}")
496
+ lines.append(f" Eliminated: {pv.get('eliminated', False)}")
497
+ violations = pv.get("constraint_violations", [])
498
+ if violations:
499
+ for v in violations:
500
+ lines.append(
501
+ f" VIOLATION: {v['kpi']} {v['operator']} "
502
+ f"{v['threshold']} (actual: {v['actual']})"
503
+ )
504
+ for k, v in sorted(kpis.items()):
505
+ if k != "total":
506
+ lines.append(f" {k}: {v}")
507
+ lines.append("")
508
+
509
+ return "\n".join(lines)
510
+
511
+
512
+ def render_experiment_result_json(result: Dict[str, Any]) -> str:
513
+ """Render experiment evaluation as JSON."""
514
+ import json
515
+
516
+ return json.dumps(result, indent=2, default=str)
517
+
518
+
519
+ def render_experiment_result_markdown(result: Dict[str, Any]) -> str:
520
+ """Render experiment evaluation as Markdown."""
521
+ lines: List[str] = []
522
+ lines.append("# Experiment Evaluation")
523
+ lines.append("")
524
+
525
+ winner = result.get("winner") or "none"
526
+ verdict = result.get("verdict", "?")
527
+ lines.append(f"**Experiment:** {result.get('experiment_id', '?')}")
528
+ lines.append(f"**Records:** {result.get('total_records', 0)}")
529
+ lines.append(f"**Verdict:** {verdict}")
530
+ lines.append(f"**Winner:** {winner}")
531
+ lines.append(f"**Reasoning:** {result.get('reasoning', '')}")
532
+ lines.append("")
533
+
534
+ # KPI comparison table
535
+ per_variant = result.get("per_variant", {})
536
+ variant_names = sorted(per_variant.keys())
537
+ if variant_names:
538
+ # Collect all KPI names
539
+ all_kpis = set()
540
+ for pv in per_variant.values():
541
+ all_kpis.update(pv.get("kpis", {}).keys())
542
+ all_kpis.discard("total")
543
+ kpi_names = sorted(all_kpis)
544
+
545
+ # Header
546
+ header = "| KPI |"
547
+ sep = "|-----|"
548
+ for name in variant_names:
549
+ header += f" {name} |"
550
+ sep += "------|"
551
+ lines.append(header)
552
+ lines.append(sep)
553
+
554
+ # Records row
555
+ row = "| Records |"
556
+ for name in variant_names:
557
+ row += f" {per_variant[name].get('total', 0)} |"
558
+ lines.append(row)
559
+
560
+ # KPI rows
561
+ for kpi in kpi_names:
562
+ row = f"| {kpi} |"
563
+ for name in variant_names:
564
+ val = per_variant[name].get("kpis", {}).get(kpi, "—")
565
+ if isinstance(val, float):
566
+ row += f" {val:.4f} |"
567
+ else:
568
+ row += f" {val} |"
569
+ lines.append(row)
570
+
571
+ lines.append("")
572
+
573
+ # Constraint violations
574
+ for name in variant_names:
575
+ violations = per_variant[name].get("constraint_violations", [])
576
+ if violations:
577
+ lines.append(f"### ⚠ {name} — Constraint Violations")
578
+ for v in violations:
579
+ lines.append(
580
+ f"- `{v['kpi']}` {v['operator']} {v['threshold']} "
581
+ f"(actual: {v['actual']})"
582
+ )
583
+ lines.append("")
584
+
585
+ return "\n".join(lines)