claude-toolstack-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. claude_toolstack_cli-1.0.0.dist-info/METADATA +354 -0
  2. claude_toolstack_cli-1.0.0.dist-info/RECORD +48 -0
  3. claude_toolstack_cli-1.0.0.dist-info/WHEEL +5 -0
  4. claude_toolstack_cli-1.0.0.dist-info/entry_points.txt +2 -0
  5. claude_toolstack_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
  6. claude_toolstack_cli-1.0.0.dist-info/top_level.txt +1 -0
  7. cts/__init__.py +3 -0
  8. cts/__main__.py +5 -0
  9. cts/autopilot.py +633 -0
  10. cts/bundle.py +958 -0
  11. cts/cli.py +2858 -0
  12. cts/confidence.py +218 -0
  13. cts/config.py +19 -0
  14. cts/corpus/__init__.py +139 -0
  15. cts/corpus/apply.py +305 -0
  16. cts/corpus/archive.py +309 -0
  17. cts/corpus/baseline.py +294 -0
  18. cts/corpus/evaluate.py +409 -0
  19. cts/corpus/experiment_eval.py +585 -0
  20. cts/corpus/experiment_schema.py +380 -0
  21. cts/corpus/extract.py +353 -0
  22. cts/corpus/load.py +44 -0
  23. cts/corpus/model.py +114 -0
  24. cts/corpus/patch.py +467 -0
  25. cts/corpus/registry.py +420 -0
  26. cts/corpus/report.py +745 -0
  27. cts/corpus/scan.py +87 -0
  28. cts/corpus/store.py +63 -0
  29. cts/corpus/trends.py +478 -0
  30. cts/corpus/tuning_schema.py +313 -0
  31. cts/corpus/variants.py +335 -0
  32. cts/ctags.py +133 -0
  33. cts/diff_context.py +92 -0
  34. cts/errors.py +109 -0
  35. cts/http.py +89 -0
  36. cts/ranking.py +466 -0
  37. cts/render.py +388 -0
  38. cts/schema.py +96 -0
  39. cts/semantic/__init__.py +47 -0
  40. cts/semantic/candidates.py +150 -0
  41. cts/semantic/chunker.py +184 -0
  42. cts/semantic/config.py +120 -0
  43. cts/semantic/embedder.py +151 -0
  44. cts/semantic/indexer.py +159 -0
  45. cts/semantic/search.py +252 -0
  46. cts/semantic/store.py +330 -0
  47. cts/sidecar.py +431 -0
  48. cts/structural.py +305 -0
cts/ranking.py ADDED
@@ -0,0 +1,466 @@
1
+ """Match ranking: path weighting, stack trace boost, recency.
2
+
3
+ Supports explain mode for per-candidate signal breakdown.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import re
9
+ import subprocess
10
+ from typing import Any, Dict, List, Optional, Tuple, Union
11
+
12
+ # Path segments that indicate "real code" (boost)
13
+ PREFERRED_ROOTS = {
14
+ "src",
15
+ "app",
16
+ "lib",
17
+ "cmd",
18
+ "pkg",
19
+ "internal",
20
+ "core",
21
+ "server",
22
+ "api",
23
+ "services",
24
+ "handlers",
25
+ }
26
+
27
+ # Path segments that indicate low-signal (demote)
28
+ DEPRIORITIZED_ROOTS = {
29
+ "vendor",
30
+ "node_modules",
31
+ "dist",
32
+ "build",
33
+ "target",
34
+ ".next",
35
+ ".turbo",
36
+ ".cache",
37
+ "coverage",
38
+ "__pycache__",
39
+ "test_data",
40
+ "fixtures",
41
+ "testdata",
42
+ "mocks",
43
+ }
44
+
45
+ # ---------------------------------------------------------------------------
46
+ # Stack trace extraction
47
+ # ---------------------------------------------------------------------------
48
+
49
+ # Patterns that extract file:line from common stack trace formats
50
+ _TRACE_PATTERNS = [
51
+ # Python: File "path.py", line 42
52
+ re.compile(r'File "([^"]+)", line (\d+)'),
53
+ # Node/JS: at funcName (path.js:42:10) or at path.js:42:10
54
+ re.compile(r"at (?:\S+ \()?([^():]+):(\d+)(?::\d+)?\)?"),
55
+ # Java: at pkg.Class.method(File.java:42)
56
+ re.compile(r"at .+\(([A-Za-z0-9_]+\.\w+):(\d+)\)"),
57
+ # Go: /path/to/file.go:42
58
+ re.compile(r"\s+(/?\S+\.go):(\d+)"),
59
+ # Rust: --> src/main.rs:42:10
60
+ re.compile(r"-->\s+(.+?):(\d+)(?::\d+)?"),
61
+ # .NET: in Namespace.Class.Method() in /path/File.cs:line 42
62
+ re.compile(r"in (\S+\.cs):line (\d+)"),
63
+ # Generic: path/file.ext:42 at start of line
64
+ re.compile(r"^\s*(\S+\.\w{1,5}):(\d+)"),
65
+ ]
66
+
67
+
68
+ def extract_trace_files(
69
+ text: str,
70
+ ) -> List[Tuple[str, int]]:
71
+ """Extract (file_path, line_number) pairs from stack trace text.
72
+
73
+ Returns de-duplicated list ordered by first appearance.
74
+ """
75
+ seen = set()
76
+ results: List[Tuple[str, int]] = []
77
+
78
+ for line in text.splitlines():
79
+ for pattern in _TRACE_PATTERNS:
80
+ m = pattern.search(line)
81
+ if m:
82
+ fpath = m.group(1)
83
+ try:
84
+ lineno = int(m.group(2))
85
+ except (ValueError, IndexError):
86
+ lineno = 1
87
+ key = (fpath, lineno)
88
+ if key not in seen:
89
+ seen.add(key)
90
+ results.append(key)
91
+ break # one match per line
92
+
93
+ return results
94
+
95
+
96
+ def looks_like_stack_trace(text: str) -> bool:
97
+ """Heuristic: does this text contain a stack trace?"""
98
+ indicators = [
99
+ "Traceback (most recent call last)",
100
+ "at ",
101
+ 'File "',
102
+ "Error:",
103
+ "Exception:",
104
+ "panic:",
105
+ "FAILED",
106
+ "error[E",
107
+ "System.Exception",
108
+ "NullReferenceException",
109
+ ]
110
+ lines = text.splitlines()
111
+ hits = sum(1 for ln in lines if any(ind in ln for ind in indicators))
112
+ return hits >= 2
113
+
114
+
115
+ # ---------------------------------------------------------------------------
116
+ # Path scoring
117
+ # ---------------------------------------------------------------------------
118
+
119
+
120
+ def path_score(
121
+ path: str,
122
+ prefer: Optional[List[str]] = None,
123
+ avoid: Optional[List[str]] = None,
124
+ ) -> float:
125
+ """Score a file path: higher = more relevant.
126
+
127
+ Default range roughly -1.0 to +1.0.
128
+ """
129
+ return path_score_explained(path, prefer, avoid)["score"]
130
+
131
+
132
+ def path_score_explained(
133
+ path: str,
134
+ prefer: Optional[List[str]] = None,
135
+ avoid: Optional[List[str]] = None,
136
+ ) -> Dict[str, Any]:
137
+ """Score a path and return signal breakdown.
138
+
139
+ Returns dict with:
140
+ score: float
141
+ path_boost: float
142
+ path_penalty: float
143
+ test_penalty: float
144
+ classification: preferred|avoided|neutral
145
+ prefer_match: str|None (which segment matched)
146
+ avoid_match: str|None
147
+ """
148
+ parts = set(path.replace("\\", "/").split("/"))
149
+
150
+ preferred = PREFERRED_ROOTS
151
+ if prefer:
152
+ preferred = preferred | set(prefer)
153
+ deprioritized = DEPRIORITIZED_ROOTS
154
+ if avoid:
155
+ deprioritized = deprioritized | set(avoid)
156
+
157
+ path_boost = 0.0
158
+ path_penalty = 0.0
159
+ test_penalty = 0.0
160
+ prefer_match: Optional[str] = None
161
+ avoid_match: Optional[str] = None
162
+ classification = "neutral"
163
+
164
+ hit = parts & preferred
165
+ if hit:
166
+ path_boost = 0.5
167
+ prefer_match = sorted(hit)[0]
168
+ classification = "preferred"
169
+
170
+ hit_avoid = parts & deprioritized
171
+ if hit_avoid:
172
+ path_penalty = -0.8
173
+ avoid_match = sorted(hit_avoid)[0]
174
+ classification = "avoided"
175
+
176
+ basename = path.rsplit("/", 1)[-1] if "/" in path else path
177
+ if basename.startswith("test_") or basename.endswith("_test.go"):
178
+ test_penalty = -0.2
179
+ if ".test." in basename or ".spec." in basename:
180
+ test_penalty = -0.2
181
+
182
+ score = path_boost + path_penalty + test_penalty
183
+
184
+ return {
185
+ "score": score,
186
+ "path_boost": path_boost,
187
+ "path_penalty": path_penalty,
188
+ "test_penalty": test_penalty,
189
+ "classification": classification,
190
+ "prefer_match": prefer_match,
191
+ "avoid_match": avoid_match,
192
+ }
193
+
194
+
195
+ # ---------------------------------------------------------------------------
196
+ # Recency scoring (git log)
197
+ # ---------------------------------------------------------------------------
198
+
199
+
200
+ def file_recency_hours(repo_path: str, file_path: str) -> Optional[float]:
201
+ """Get hours since last commit touching this file.
202
+
203
+ Returns None if git is unavailable or file has no history.
204
+ """
205
+ try:
206
+ result = subprocess.run(
207
+ [
208
+ "git",
209
+ "log",
210
+ "-n",
211
+ "1",
212
+ "--format=%ct",
213
+ "--",
214
+ file_path,
215
+ ],
216
+ cwd=repo_path,
217
+ capture_output=True,
218
+ text=True,
219
+ timeout=5,
220
+ )
221
+ if result.returncode == 0 and result.stdout.strip():
222
+ import time
223
+
224
+ ts = int(result.stdout.strip())
225
+ hours = (time.time() - ts) / 3600
226
+ return max(0.0, hours)
227
+ except Exception:
228
+ pass
229
+ return None
230
+
231
+
232
+ def recency_score(hours: Optional[float]) -> float:
233
+ """Convert hours-since-change to a boost score.
234
+
235
+ Recent files (< 24h) get +0.3, older files get less.
236
+ """
237
+ if hours is None:
238
+ return 0.0
239
+ if hours < 24:
240
+ return 0.3
241
+ if hours < 168: # 1 week
242
+ return 0.15
243
+ if hours < 720: # 1 month
244
+ return 0.05
245
+ return 0.0
246
+
247
+
248
+ # ---------------------------------------------------------------------------
249
+ # Composite ranking
250
+ # ---------------------------------------------------------------------------
251
+
252
+
253
+ def rank_matches(
254
+ matches: List[Dict[str, Any]],
255
+ trace_files: Optional[List[Tuple[str, int]]] = None,
256
+ prefer_paths: Optional[List[str]] = None,
257
+ avoid_paths: Optional[List[str]] = None,
258
+ repo_root: Optional[str] = None,
259
+ explain: bool = False,
260
+ ctags_info: Optional[Dict[str, Any]] = None,
261
+ query_symbol: Optional[str] = None,
262
+ diff_context: Optional[Dict[str, Any]] = None,
263
+ ) -> Union[List[Dict[str, Any]], Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]]:
264
+ """Re-rank matches using path score + trace boost + recency + structural.
265
+
266
+ Returns a sorted list with _rank_score attached.
267
+ If *explain=True*, returns (ranked, score_cards) where score_cards
268
+ is a list of per-candidate signal breakdowns.
269
+
270
+ *ctags_info* is an optional dict with:
271
+ - def_files: set of file paths that define the query symbol
272
+ - kind_weight: float weight for the best ctags kind (0..0.6)
273
+ - best_kind: str name of the best kind found
274
+
275
+ *query_symbol* enables structural heuristics (def/export detection)
276
+ on match snippets. Only effective when a symbol name is provided.
277
+
278
+ *diff_context* is an optional dict from build_diff_context() with:
279
+ - changed_files: set of file paths from the diff
280
+ - hunk_ranges: dict mapping path -> list of (start, end) tuples
281
+ - changed_identifiers: set of identifier tokens from + lines
282
+ """
283
+ trace_set = set()
284
+ if trace_files:
285
+ for fpath, _ in trace_files:
286
+ trace_set.add(fpath)
287
+ if "/" in fpath:
288
+ trace_set.add(fpath.rsplit("/", 1)[-1])
289
+
290
+ # Ctags structural data
291
+ ctags_def_files: set = set()
292
+ ctags_kind_w: float = 0.0
293
+ ctags_best_kind: str = ""
294
+ if ctags_info:
295
+ ctags_def_files = ctags_info.get("def_files", set())
296
+ ctags_kind_w = ctags_info.get("kind_weight", 0.0)
297
+ ctags_best_kind = ctags_info.get("best_kind", "")
298
+
299
+ # Diff context data
300
+ diff_files: set = set()
301
+ diff_hunks: dict = {}
302
+ diff_idents: set = set()
303
+ if diff_context:
304
+ diff_files = diff_context.get("changed_files", set())
305
+ diff_hunks = diff_context.get("hunk_ranges", {})
306
+ diff_idents = diff_context.get("changed_identifiers", set())
307
+
308
+ scored = []
309
+ cards: List[Dict[str, Any]] = []
310
+
311
+ for m in matches:
312
+ path = m.get("path", "")
313
+
314
+ # Path signal
315
+ path_detail = path_score_explained(path, prefer=prefer_paths, avoid=avoid_paths)
316
+ path_total = path_detail["score"]
317
+
318
+ # Trace signal
319
+ trace_boost = 0.0
320
+ is_trace = False
321
+ basename = path.rsplit("/", 1)[-1] if "/" in path else path
322
+ if path in trace_set or basename in trace_set:
323
+ trace_boost = 2.0
324
+ is_trace = True
325
+
326
+ # Recency signal
327
+ rec_boost = 0.0
328
+ git_hours: Optional[float] = None
329
+ if repo_root:
330
+ git_hours = file_recency_hours(repo_root, path)
331
+ rec_boost = recency_score(git_hours)
332
+
333
+ # Structural signals (ctags)
334
+ ctags_def_boost = 0.0
335
+ ctags_kind_boost = 0.0
336
+ is_def_file = False
337
+ if ctags_def_files and path in ctags_def_files:
338
+ is_def_file = True
339
+ ctags_def_boost = 0.8
340
+ ctags_kind_boost = ctags_kind_w
341
+
342
+ # Structural heuristics (snippet-based def/export/call detection)
343
+ def_likeness_boost = 0.0
344
+ export_boost = 0.0
345
+ caller_proximity_boost = 0.0
346
+ struct_rule = ""
347
+ struct_def_conf = 0.0
348
+ struct_export_conf = 0.0
349
+ struct_call_conf = 0.0
350
+ is_prob_def = False
351
+ is_prob_export = False
352
+ is_prob_call = False
353
+ if query_symbol:
354
+ snippet = m.get("snippet", "")
355
+ if snippet:
356
+ from cts.structural import classify_snippet
357
+
358
+ sc = classify_snippet(path, query_symbol, snippet)
359
+ struct_def_conf = sc["def_conf"]
360
+ struct_export_conf = sc["export_conf"]
361
+ struct_call_conf = sc["call_conf"]
362
+ is_prob_def = sc["is_probable_definition"]
363
+ is_prob_export = sc["is_probable_export"]
364
+ is_prob_call = sc["is_probable_call_site"]
365
+ struct_rule = sc["matched_rule"]
366
+ def_likeness_boost = round(0.5 * struct_def_conf, 2)
367
+ export_boost = round(0.3 * struct_export_conf, 2)
368
+ caller_proximity_boost = round(0.2 * struct_call_conf, 2)
369
+
370
+ # Diff-aware signals
371
+ changed_file_boost = 0.0
372
+ hunk_overlap_boost = 0.0
373
+ diff_ident_def_boost = 0.0
374
+ is_changed_file = False
375
+ is_in_hunk = False
376
+ if diff_files:
377
+ norm_path = path.replace("\\", "/")
378
+ if norm_path in diff_files:
379
+ changed_file_boost = 0.4
380
+ is_changed_file = True
381
+ # Hunk overlap check
382
+ match_line = m.get("line", 0)
383
+ if match_line and diff_hunks:
384
+ from cts.diff_context import is_in_hunk as _hunk_check
385
+
386
+ if _hunk_check(norm_path, match_line, diff_hunks):
387
+ hunk_overlap_boost = 0.3
388
+ is_in_hunk = True
389
+ # Identifier-in-diff boost: symbol is a changed ident + def
390
+ if query_symbol and diff_idents and is_prob_def:
391
+ if query_symbol in diff_idents:
392
+ diff_ident_def_boost = 0.5
393
+
394
+ total = (
395
+ path_total
396
+ + trace_boost
397
+ + rec_boost
398
+ + ctags_def_boost
399
+ + ctags_kind_boost
400
+ + def_likeness_boost
401
+ + export_boost
402
+ + caller_proximity_boost
403
+ + changed_file_boost
404
+ + hunk_overlap_boost
405
+ + diff_ident_def_boost
406
+ )
407
+ scored.append((total, m))
408
+
409
+ if explain:
410
+ cards.append(
411
+ {
412
+ "path": path,
413
+ "line": m.get("line", 0),
414
+ "score_total": round(total, 2),
415
+ "signals": {
416
+ "path_boost": path_detail["path_boost"],
417
+ "path_penalty": path_detail["path_penalty"],
418
+ "test_penalty": path_detail["test_penalty"],
419
+ "trace_boost": trace_boost,
420
+ "recency_boost": rec_boost,
421
+ "ctags_def_boost": ctags_def_boost,
422
+ "ctags_kind_boost": ctags_kind_boost,
423
+ "def_likeness_boost": def_likeness_boost,
424
+ "export_boost": export_boost,
425
+ "caller_proximity_boost": caller_proximity_boost,
426
+ "changed_file_boost": changed_file_boost,
427
+ "hunk_overlap_boost": hunk_overlap_boost,
428
+ "diff_ident_def_boost": diff_ident_def_boost,
429
+ },
430
+ "features": {
431
+ "classification": path_detail["classification"],
432
+ "is_trace_file": is_trace,
433
+ "is_def_file": is_def_file,
434
+ "ctags_best_kind": ctags_best_kind,
435
+ "is_prob_def": is_prob_def,
436
+ "is_prob_export": is_prob_export,
437
+ "is_prob_call": is_prob_call,
438
+ "is_changed_file": is_changed_file,
439
+ "is_in_hunk": is_in_hunk,
440
+ "struct_rule": struct_rule,
441
+ "struct_def_conf": struct_def_conf,
442
+ "struct_export_conf": struct_export_conf,
443
+ "struct_call_conf": struct_call_conf,
444
+ "git_age_hours": (
445
+ round(git_hours, 1) if git_hours is not None else None
446
+ ),
447
+ "prefer_match": path_detail["prefer_match"],
448
+ "avoid_match": path_detail["avoid_match"],
449
+ },
450
+ }
451
+ )
452
+
453
+ scored.sort(key=lambda x: x[0], reverse=True)
454
+
455
+ results = []
456
+ for s, m in scored:
457
+ m_copy = dict(m)
458
+ m_copy["_rank_score"] = round(s, 2)
459
+ results.append(m_copy)
460
+
461
+ if explain:
462
+ # Sort cards to match ranked order
463
+ cards.sort(key=lambda c: c["score_total"], reverse=True)
464
+ return results, cards
465
+
466
+ return results