claude-jacked 0.2.3__py3-none-any.whl → 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. claude_jacked-0.2.9.dist-info/METADATA +523 -0
  2. claude_jacked-0.2.9.dist-info/RECORD +33 -0
  3. jacked/cli.py +752 -47
  4. jacked/client.py +196 -29
  5. jacked/data/agents/code-simplicity-reviewer.md +87 -0
  6. jacked/data/agents/defensive-error-handler.md +93 -0
  7. jacked/data/agents/double-check-reviewer.md +214 -0
  8. jacked/data/agents/git-pr-workflow-manager.md +149 -0
  9. jacked/data/agents/issue-pr-coordinator.md +131 -0
  10. jacked/data/agents/pr-workflow-checker.md +199 -0
  11. jacked/data/agents/readme-maintainer.md +123 -0
  12. jacked/data/agents/test-coverage-engineer.md +155 -0
  13. jacked/data/agents/test-coverage-improver.md +139 -0
  14. jacked/data/agents/wiki-documentation-architect.md +580 -0
  15. jacked/data/commands/audit-rules.md +103 -0
  16. jacked/data/commands/dc.md +155 -0
  17. jacked/data/commands/learn.md +89 -0
  18. jacked/data/commands/pr.md +4 -0
  19. jacked/data/commands/redo.md +85 -0
  20. jacked/data/commands/techdebt.md +115 -0
  21. jacked/data/prompts/security_gatekeeper.txt +58 -0
  22. jacked/data/rules/jacked_behaviors.md +11 -0
  23. jacked/data/skills/jacked/SKILL.md +162 -0
  24. jacked/index_write_tracker.py +227 -0
  25. jacked/indexer.py +255 -129
  26. jacked/retriever.py +389 -137
  27. jacked/searcher.py +65 -13
  28. jacked/transcript.py +339 -0
  29. claude_jacked-0.2.3.dist-info/METADATA +0 -483
  30. claude_jacked-0.2.3.dist-info/RECORD +0 -13
  31. {claude_jacked-0.2.3.dist-info → claude_jacked-0.2.9.dist-info}/WHEEL +0 -0
  32. {claude_jacked-0.2.3.dist-info → claude_jacked-0.2.9.dist-info}/entry_points.txt +0 -0
  33. {claude_jacked-0.2.3.dist-info → claude_jacked-0.2.9.dist-info}/licenses/LICENSE +0 -0
jacked/searcher.py CHANGED
@@ -3,11 +3,18 @@ Session searching for Jacked.
3
3
 
4
4
  Handles semantic search across indexed sessions using Qdrant Cloud Inference.
5
5
  Implements multi-factor ranking: ownership, repo, recency, and semantic similarity.
6
+
7
+ Supports filtering by content_type:
8
+ - plan: Full implementation strategy (gold)
9
+ - subagent_summary: Rich summaries from agent outputs (gold)
10
+ - summary_label: Tiny chapter titles from compaction
11
+ - user_message: User messages for intent matching
12
+ - chunk: Full transcript chunks
6
13
  """
7
14
 
8
15
  import logging
9
16
  import math
10
- from dataclasses import dataclass
17
+ from dataclasses import dataclass, field
11
18
  from datetime import datetime, timezone
12
19
  from typing import Optional
13
20
  from collections import defaultdict
@@ -16,6 +23,15 @@ from jacked.config import SmartForkConfig, get_repo_id
16
23
  from jacked.client import QdrantSessionClient
17
24
 
18
25
 
26
+ # Default content types for search (high-value content)
27
+ DEFAULT_SEARCH_CONTENT_TYPES = [
28
+ "plan",
29
+ "subagent_summary",
30
+ "summary_label",
31
+ "user_message",
32
+ ]
33
+
34
+
19
35
  logger = logging.getLogger(__name__)
20
36
 
21
37
 
@@ -37,6 +53,9 @@ class SearchResult:
37
53
  is_current_repo: Whether this is from the current repo
38
54
  intent_preview: Preview of the matched intent text
39
55
  chunk_count: Number of transcript chunks stored
56
+ has_plan: Whether this session has a plan file indexed
57
+ has_agent_summaries: Whether this session has agent summaries
58
+ content_types_found: Set of content types found in this session
40
59
  """
41
60
  session_id: str
42
61
  repo_name: str
@@ -50,13 +69,23 @@ class SearchResult:
50
69
  is_current_repo: bool
51
70
  intent_preview: str
52
71
  chunk_count: int
72
+ has_plan: bool = False
73
+ has_agent_summaries: bool = False
74
+ content_types_found: set = field(default_factory=set)
53
75
 
54
76
  def __str__(self) -> str:
55
77
  """Format result for display."""
56
78
  ts_str = self.timestamp.strftime("%Y-%m-%d") if self.timestamp else "unknown"
57
79
  owner = "YOU" if self.is_own else f"@{self.user_name}"
80
+ # Add indicators for rich content
81
+ indicators = []
82
+ if self.has_plan:
83
+ indicators.append("📋")
84
+ if self.has_agent_summaries:
85
+ indicators.append("🤖")
86
+ indicator_str = " ".join(indicators)
58
87
  return (
59
- f"[{self.score:.0f}%] {owner} - {self.repo_name} - {ts_str}\n"
88
+ f"[{self.score:.0f}%] {owner} - {self.repo_name} - {ts_str} {indicator_str}\n"
60
89
  f" {self.intent_preview[:80]}..."
61
90
  )
62
91
 
@@ -96,6 +125,7 @@ class SessionSearcher:
96
125
  min_score: float = 0.3,
97
126
  mine_only: bool = False,
98
127
  user_filter: Optional[str] = None,
128
+ content_types: Optional[list[str]] = None,
99
129
  ) -> list[SearchResult]:
100
130
  """
101
131
  Search for sessions similar to the query with multi-factor ranking.
@@ -113,6 +143,8 @@ class SessionSearcher:
113
143
  min_score: Minimum cosine similarity score (0-1)
114
144
  mine_only: If True, only return current user's sessions
115
145
  user_filter: If set, only return sessions from this user
146
+ content_types: Content types to search (default: plan, subagent_summary,
147
+ summary_label, user_message). Use ["chunk"] for full-text search.
116
148
 
117
149
  Returns:
118
150
  List of SearchResult objects, sorted by multi-factor relevance
@@ -126,18 +158,31 @@ class SessionSearcher:
126
158
  current_repo_id = get_repo_id(repo_path) if repo_path else None
127
159
  current_user = self.config.user_name
128
160
 
129
- # Search for intent points using server-side embedding
161
+ # Default to high-value content types
162
+ if content_types is None:
163
+ content_types = DEFAULT_SEARCH_CONTENT_TYPES
164
+
165
+ # Build user filter for Qdrant (more efficient than post-filter)
166
+ qdrant_user_filter = None
167
+ if mine_only:
168
+ qdrant_user_filter = current_user
169
+ elif user_filter:
170
+ qdrant_user_filter = user_filter
171
+
172
+ # Search using content_types filter
130
173
  # Get more results than needed since we'll aggregate and re-rank
131
174
  raw_results = self.client.search(
132
175
  query_text=query,
133
176
  repo_id=None, # Don't filter in Qdrant, we'll boost instead
134
- point_type="intent",
177
+ content_types=content_types,
178
+ user_name=qdrant_user_filter,
135
179
  limit=limit * 10, # Get extra for aggregation and filtering
136
180
  )
137
181
 
138
- # Aggregate by session (multiple intent chunks per session)
182
+ # Aggregate by session (multiple points per session)
139
183
  session_scores: dict[str, list[float]] = defaultdict(list)
140
184
  session_data: dict[str, dict] = {}
185
+ session_content_types: dict[str, set] = defaultdict(set)
141
186
 
142
187
  for result in raw_results:
143
188
  if result.score < min_score:
@@ -146,17 +191,13 @@ class SessionSearcher:
146
191
  payload = result.payload or {}
147
192
  session_id = payload.get("session_id")
148
193
  session_user = payload.get("user_name", "unknown")
194
+ content_type = payload.get("content_type", payload.get("type", ""))
149
195
 
150
196
  if not session_id:
151
197
  continue
152
198
 
153
- # Apply filters
154
- if mine_only and session_user != current_user:
155
- continue
156
- if user_filter and session_user != user_filter:
157
- continue
158
-
159
199
  session_scores[session_id].append(result.score)
200
+ session_content_types[session_id].add(content_type)
160
201
 
161
202
  # Keep the best payload data (highest score)
162
203
  if session_id not in session_data or result.score > max(session_scores[session_id][:-1], default=0):
@@ -168,6 +209,7 @@ class SessionSearcher:
168
209
  # Use max score for semantic ranking (best match in session)
169
210
  semantic_score = max(scores)
170
211
  payload = session_data[session_id]
212
+ found_types = session_content_types[session_id]
171
213
 
172
214
  # Parse timestamp
173
215
  timestamp = None
@@ -192,6 +234,9 @@ class SessionSearcher:
192
234
  timestamp=timestamp,
193
235
  )
194
236
 
237
+ # Get preview text - prefer content field over intent_text
238
+ preview = payload.get("content", payload.get("intent_text", ""))[:200]
239
+
195
240
  results.append(
196
241
  SearchResult(
197
242
  session_id=session_id,
@@ -204,8 +249,11 @@ class SessionSearcher:
204
249
  semantic_score=semantic_score * 100,
205
250
  is_own=is_own,
206
251
  is_current_repo=is_current_repo,
207
- intent_preview=payload.get("intent_text", "")[:200],
208
- chunk_count=payload.get("transcript_chunk_count", 0),
252
+ intent_preview=preview,
253
+ chunk_count=payload.get("total_chunks", 0),
254
+ has_plan="plan" in found_types,
255
+ has_agent_summaries="subagent_summary" in found_types,
256
+ content_types_found=found_types,
209
257
  )
210
258
  )
211
259
 
@@ -285,9 +333,13 @@ class SessionSearcher:
285
333
  session_id=session.get("session_id", ""),
286
334
  repo_name=session.get("repo_name", "unknown"),
287
335
  repo_path=session.get("repo_path", ""),
336
+ user_name=session.get("user_name", "unknown"),
288
337
  machine=session.get("machine", "unknown"),
289
338
  timestamp=timestamp,
290
339
  score=100, # No relevance score for list
340
+ semantic_score=0.0, # Not applicable for list
341
+ is_own=session.get("user_name") == self.config.user_name,
342
+ is_current_repo=True, # We're filtering by repo
291
343
  intent_preview="", # Not available in list
292
344
  chunk_count=session.get("chunk_count", 0),
293
345
  )
jacked/transcript.py CHANGED
@@ -475,3 +475,342 @@ def _is_uuid_format(name: str) -> bool:
475
475
  """
476
476
  uuid_pattern = r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$"
477
477
  return bool(re.match(uuid_pattern, name.lower()))
478
+
479
+
480
+ # =============================================================================
481
+ # NEW: Extraction functions for subagent summaries, labels, and plans
482
+ # =============================================================================
483
+
484
+
485
+ @dataclass
486
+ class SummaryLabel:
487
+ """A compaction summary label (tiny chapter title).
488
+
489
+ Examples:
490
+ >>> label = SummaryLabel("Implementing auth flow", "abc123", None)
491
+ >>> label.label
492
+ 'Implementing auth flow'
493
+ """
494
+ label: str
495
+ leaf_uuid: Optional[str] = None
496
+ timestamp: Optional[datetime] = None
497
+
498
+
499
+ @dataclass
500
+ class AgentSummary:
501
+ """Summary extracted from a subagent's final output.
502
+
503
+ Examples:
504
+ >>> summary = AgentSummary("a4e75d5", "Explore", "## Summary\\n...", None)
505
+ >>> summary.agent_id
506
+ 'a4e75d5'
507
+ """
508
+ agent_id: str
509
+ agent_type: Optional[str]
510
+ summary_text: str
511
+ timestamp: Optional[datetime] = None
512
+
513
+
514
+ @dataclass
515
+ class PlanFile:
516
+ """A plan file linked to a session via slug.
517
+
518
+ Examples:
519
+ >>> plan = PlanFile("hidden-finding-goose", Path("..."), "# Plan content")
520
+ >>> plan.slug
521
+ 'hidden-finding-goose'
522
+ """
523
+ slug: str
524
+ path: Path
525
+ content: str
526
+
527
+
528
+ def find_subagent_files(session_path: Path) -> list[Path]:
529
+ """Find all subagent JSONL files for a session.
530
+
531
+ Subagents are stored in {session-id}/subagents/agent-*.jsonl
532
+
533
+ Args:
534
+ session_path: Path to the main session JSONL file
535
+
536
+ Returns:
537
+ List of paths to subagent JSONL files
538
+
539
+ Examples:
540
+ >>> # files = find_subagent_files(Path('session.jsonl')) # doctest: +SKIP
541
+ """
542
+ # Session dir is {session-id}/ next to {session-id}.jsonl
543
+ session_dir = session_path.parent / session_path.stem
544
+ subagents_dir = session_dir / "subagents"
545
+
546
+ if not subagents_dir.exists():
547
+ return []
548
+
549
+ # Use iterator to avoid memory issues with large directories
550
+ return sorted(subagents_dir.glob("agent-*.jsonl"))
551
+
552
+
553
+ def extract_agent_summary(agent_file: Path) -> Optional[AgentSummary]:
554
+ """Extract the final summary text from an agent's session.
555
+
556
+ The summary is the text content from the LAST assistant message.
557
+
558
+ Args:
559
+ agent_file: Path to an agent-*.jsonl file
560
+
561
+ Returns:
562
+ AgentSummary or None if no usable summary found
563
+
564
+ Examples:
565
+ >>> # summary = extract_agent_summary(Path('agent-abc.jsonl')) # doctest: +SKIP
566
+ """
567
+ # Extract agent_id from filename: agent-a4e75d5.jsonl -> a4e75d5
568
+ agent_id = agent_file.stem.replace("agent-", "")
569
+
570
+ last_assistant_msg = None
571
+ timestamp = None
572
+
573
+ try:
574
+ with open(agent_file, "r", encoding="utf-8") as f:
575
+ for line in f:
576
+ line = line.strip()
577
+ if not line:
578
+ continue
579
+ try:
580
+ data = json.loads(line)
581
+ except json.JSONDecodeError:
582
+ continue
583
+
584
+ if data.get("type") == "assistant":
585
+ last_assistant_msg = data
586
+ ts_str = data.get("timestamp")
587
+ if ts_str:
588
+ try:
589
+ timestamp = datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
590
+ except ValueError:
591
+ pass
592
+ except (IOError, OSError) as e:
593
+ logger.warning(f"Failed to read agent file {agent_file}: {e}")
594
+ return None
595
+
596
+ if not last_assistant_msg:
597
+ return None
598
+
599
+ # Extract text content from the message
600
+ message = last_assistant_msg.get("message", {})
601
+ content = message.get("content", [])
602
+
603
+ text_parts = []
604
+ if isinstance(content, list):
605
+ for block in content:
606
+ if isinstance(block, dict) and block.get("type") == "text":
607
+ text = block.get("text", "")
608
+ if text:
609
+ text_parts.append(text)
610
+ elif isinstance(content, str):
611
+ text_parts.append(content)
612
+
613
+ summary_text = "\n".join(text_parts).strip()
614
+
615
+ # Skip if too short (likely not a real summary)
616
+ if len(summary_text) < 200:
617
+ logger.debug(f"Agent {agent_id} summary too short ({len(summary_text)} chars), skipping")
618
+ return None
619
+
620
+ # Agent type is hard to detect from the file itself
621
+ # Could infer from content patterns but skip for MVP
622
+ agent_type = None
623
+
624
+ return AgentSummary(
625
+ agent_id=agent_id,
626
+ agent_type=agent_type,
627
+ summary_text=summary_text,
628
+ timestamp=timestamp,
629
+ )
630
+
631
+
632
+ def extract_summary_labels(session_path: Path) -> list[SummaryLabel]:
633
+ """Extract summary labels from a session JSONL file.
634
+
635
+ These are the tiny "chapter titles" from compaction events.
636
+
637
+ Args:
638
+ session_path: Path to the main session JSONL file
639
+
640
+ Returns:
641
+ List of SummaryLabel objects
642
+
643
+ Examples:
644
+ >>> # labels = extract_summary_labels(Path('session.jsonl')) # doctest: +SKIP
645
+ """
646
+ labels = []
647
+
648
+ try:
649
+ with open(session_path, "r", encoding="utf-8") as f:
650
+ for line in f:
651
+ line = line.strip()
652
+ if not line:
653
+ continue
654
+ try:
655
+ data = json.loads(line)
656
+ except json.JSONDecodeError:
657
+ continue
658
+
659
+ if data.get("type") == "summary":
660
+ label_text = data.get("summary", "")
661
+ if label_text:
662
+ timestamp = None
663
+ ts_str = data.get("timestamp")
664
+ if ts_str:
665
+ try:
666
+ timestamp = datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
667
+ except ValueError:
668
+ pass
669
+
670
+ labels.append(SummaryLabel(
671
+ label=label_text,
672
+ leaf_uuid=data.get("leafUuid"),
673
+ timestamp=timestamp,
674
+ ))
675
+ except (IOError, OSError) as e:
676
+ logger.warning(f"Failed to read session file {session_path}: {e}")
677
+
678
+ return labels
679
+
680
+
681
+ def extract_session_slug(session_path: Path) -> Optional[str]:
682
+ """Extract the slug from a session JSONL file.
683
+
684
+ The slug links the session to its plan file in ~/.claude/plans/
685
+
686
+ Args:
687
+ session_path: Path to the main session JSONL file
688
+
689
+ Returns:
690
+ The slug string or None if not found
691
+
692
+ Examples:
693
+ >>> # slug = extract_session_slug(Path('session.jsonl')) # doctest: +SKIP
694
+ """
695
+ try:
696
+ with open(session_path, "r", encoding="utf-8") as f:
697
+ for line in f:
698
+ line = line.strip()
699
+ if not line:
700
+ continue
701
+ try:
702
+ data = json.loads(line)
703
+ except json.JSONDecodeError:
704
+ continue
705
+
706
+ if "slug" in data:
707
+ return data["slug"]
708
+ except (IOError, OSError) as e:
709
+ logger.warning(f"Failed to read session file {session_path}: {e}")
710
+
711
+ return None
712
+
713
+
714
+ def find_plan_file(slug: str, plans_dir: Optional[Path] = None) -> Optional[PlanFile]:
715
+ """Find and read the plan file for a session slug.
716
+
717
+ Args:
718
+ slug: The session slug (e.g., "hidden-finding-goose")
719
+ plans_dir: Path to plans directory (default: ~/.claude/plans/)
720
+
721
+ Returns:
722
+ PlanFile or None if not found or invalid
723
+
724
+ Examples:
725
+ >>> # plan = find_plan_file("hidden-finding-goose") # doctest: +SKIP
726
+ """
727
+ if plans_dir is None:
728
+ plans_dir = Path.home() / ".claude" / "plans"
729
+
730
+ plan_path = plans_dir / f"{slug}.md"
731
+
732
+ if not plan_path.exists():
733
+ return None
734
+
735
+ try:
736
+ # Size validation - skip huge files
737
+ file_size = plan_path.stat().st_size
738
+ if file_size > 100_000: # 100KB sanity check
739
+ logger.warning(f"Plan file too large ({file_size} bytes), skipping: {plan_path}")
740
+ return None
741
+
742
+ if file_size < 50: # Too small to be useful
743
+ logger.debug(f"Plan file too small ({file_size} bytes), skipping: {plan_path}")
744
+ return None
745
+
746
+ content = plan_path.read_text(encoding="utf-8")
747
+
748
+ return PlanFile(
749
+ slug=slug,
750
+ path=plan_path,
751
+ content=content,
752
+ )
753
+ except (IOError, OSError) as e:
754
+ logger.warning(f"Failed to read plan file {plan_path}: {e}")
755
+ return None
756
+
757
+
758
+ @dataclass
759
+ class EnrichedTranscript(ParsedTranscript):
760
+ """ParsedTranscript with additional extracted data.
761
+
762
+ Adds subagent summaries, summary labels, and plan file content.
763
+ """
764
+ summary_labels: list[SummaryLabel] = field(default_factory=list)
765
+ agent_summaries: list[AgentSummary] = field(default_factory=list)
766
+ plan: Optional[PlanFile] = None
767
+ slug: Optional[str] = None
768
+
769
+
770
+ def parse_jsonl_file_enriched(filepath: Path) -> EnrichedTranscript:
771
+ """Parse a Claude session with all enriched data.
772
+
773
+ Extracts:
774
+ - Messages (user, assistant)
775
+ - Summary labels (compaction chapter titles)
776
+ - Subagent summaries (gold context from agent outputs)
777
+ - Plan file (if linked via slug)
778
+
779
+ Args:
780
+ filepath: Path to the .jsonl session file
781
+
782
+ Returns:
783
+ EnrichedTranscript with all extracted data
784
+
785
+ Examples:
786
+ >>> # transcript = parse_jsonl_file_enriched(Path('session.jsonl')) # doctest: +SKIP
787
+ """
788
+ # Parse base transcript
789
+ base = parse_jsonl_file(filepath)
790
+
791
+ # Extract summary labels
792
+ labels = extract_summary_labels(filepath)
793
+
794
+ # Extract subagent summaries
795
+ agent_summaries = []
796
+ for agent_file in find_subagent_files(filepath):
797
+ summary = extract_agent_summary(agent_file)
798
+ if summary:
799
+ agent_summaries.append(summary)
800
+
801
+ # Extract plan file via slug
802
+ slug = extract_session_slug(filepath)
803
+ plan = find_plan_file(slug) if slug else None
804
+
805
+ return EnrichedTranscript(
806
+ session_id=base.session_id,
807
+ messages=base.messages,
808
+ user_messages=base.user_messages,
809
+ full_text=base.full_text,
810
+ intent_text=base.intent_text,
811
+ timestamp=base.timestamp,
812
+ summary_labels=labels,
813
+ agent_summaries=agent_summaries,
814
+ plan=plan,
815
+ slug=slug,
816
+ )