claude-jacked 0.2.3__py3-none-any.whl → 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_jacked-0.2.9.dist-info/METADATA +523 -0
- claude_jacked-0.2.9.dist-info/RECORD +33 -0
- jacked/cli.py +752 -47
- jacked/client.py +196 -29
- jacked/data/agents/code-simplicity-reviewer.md +87 -0
- jacked/data/agents/defensive-error-handler.md +93 -0
- jacked/data/agents/double-check-reviewer.md +214 -0
- jacked/data/agents/git-pr-workflow-manager.md +149 -0
- jacked/data/agents/issue-pr-coordinator.md +131 -0
- jacked/data/agents/pr-workflow-checker.md +199 -0
- jacked/data/agents/readme-maintainer.md +123 -0
- jacked/data/agents/test-coverage-engineer.md +155 -0
- jacked/data/agents/test-coverage-improver.md +139 -0
- jacked/data/agents/wiki-documentation-architect.md +580 -0
- jacked/data/commands/audit-rules.md +103 -0
- jacked/data/commands/dc.md +155 -0
- jacked/data/commands/learn.md +89 -0
- jacked/data/commands/pr.md +4 -0
- jacked/data/commands/redo.md +85 -0
- jacked/data/commands/techdebt.md +115 -0
- jacked/data/prompts/security_gatekeeper.txt +58 -0
- jacked/data/rules/jacked_behaviors.md +11 -0
- jacked/data/skills/jacked/SKILL.md +162 -0
- jacked/index_write_tracker.py +227 -0
- jacked/indexer.py +255 -129
- jacked/retriever.py +389 -137
- jacked/searcher.py +65 -13
- jacked/transcript.py +339 -0
- claude_jacked-0.2.3.dist-info/METADATA +0 -483
- claude_jacked-0.2.3.dist-info/RECORD +0 -13
- {claude_jacked-0.2.3.dist-info → claude_jacked-0.2.9.dist-info}/WHEEL +0 -0
- {claude_jacked-0.2.3.dist-info → claude_jacked-0.2.9.dist-info}/entry_points.txt +0 -0
- {claude_jacked-0.2.3.dist-info → claude_jacked-0.2.9.dist-info}/licenses/LICENSE +0 -0
jacked/searcher.py
CHANGED
|
@@ -3,11 +3,18 @@ Session searching for Jacked.
|
|
|
3
3
|
|
|
4
4
|
Handles semantic search across indexed sessions using Qdrant Cloud Inference.
|
|
5
5
|
Implements multi-factor ranking: ownership, repo, recency, and semantic similarity.
|
|
6
|
+
|
|
7
|
+
Supports filtering by content_type:
|
|
8
|
+
- plan: Full implementation strategy (gold)
|
|
9
|
+
- subagent_summary: Rich summaries from agent outputs (gold)
|
|
10
|
+
- summary_label: Tiny chapter titles from compaction
|
|
11
|
+
- user_message: User messages for intent matching
|
|
12
|
+
- chunk: Full transcript chunks
|
|
6
13
|
"""
|
|
7
14
|
|
|
8
15
|
import logging
|
|
9
16
|
import math
|
|
10
|
-
from dataclasses import dataclass
|
|
17
|
+
from dataclasses import dataclass, field
|
|
11
18
|
from datetime import datetime, timezone
|
|
12
19
|
from typing import Optional
|
|
13
20
|
from collections import defaultdict
|
|
@@ -16,6 +23,15 @@ from jacked.config import SmartForkConfig, get_repo_id
|
|
|
16
23
|
from jacked.client import QdrantSessionClient
|
|
17
24
|
|
|
18
25
|
|
|
26
|
+
# Default content types for search (high-value content)
|
|
27
|
+
DEFAULT_SEARCH_CONTENT_TYPES = [
|
|
28
|
+
"plan",
|
|
29
|
+
"subagent_summary",
|
|
30
|
+
"summary_label",
|
|
31
|
+
"user_message",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
|
|
19
35
|
logger = logging.getLogger(__name__)
|
|
20
36
|
|
|
21
37
|
|
|
@@ -37,6 +53,9 @@ class SearchResult:
|
|
|
37
53
|
is_current_repo: Whether this is from the current repo
|
|
38
54
|
intent_preview: Preview of the matched intent text
|
|
39
55
|
chunk_count: Number of transcript chunks stored
|
|
56
|
+
has_plan: Whether this session has a plan file indexed
|
|
57
|
+
has_agent_summaries: Whether this session has agent summaries
|
|
58
|
+
content_types_found: Set of content types found in this session
|
|
40
59
|
"""
|
|
41
60
|
session_id: str
|
|
42
61
|
repo_name: str
|
|
@@ -50,13 +69,23 @@ class SearchResult:
|
|
|
50
69
|
is_current_repo: bool
|
|
51
70
|
intent_preview: str
|
|
52
71
|
chunk_count: int
|
|
72
|
+
has_plan: bool = False
|
|
73
|
+
has_agent_summaries: bool = False
|
|
74
|
+
content_types_found: set = field(default_factory=set)
|
|
53
75
|
|
|
54
76
|
def __str__(self) -> str:
|
|
55
77
|
"""Format result for display."""
|
|
56
78
|
ts_str = self.timestamp.strftime("%Y-%m-%d") if self.timestamp else "unknown"
|
|
57
79
|
owner = "YOU" if self.is_own else f"@{self.user_name}"
|
|
80
|
+
# Add indicators for rich content
|
|
81
|
+
indicators = []
|
|
82
|
+
if self.has_plan:
|
|
83
|
+
indicators.append("📋")
|
|
84
|
+
if self.has_agent_summaries:
|
|
85
|
+
indicators.append("🤖")
|
|
86
|
+
indicator_str = " ".join(indicators)
|
|
58
87
|
return (
|
|
59
|
-
f"[{self.score:.0f}%] {owner} - {self.repo_name} - {ts_str}\n"
|
|
88
|
+
f"[{self.score:.0f}%] {owner} - {self.repo_name} - {ts_str} {indicator_str}\n"
|
|
60
89
|
f" {self.intent_preview[:80]}..."
|
|
61
90
|
)
|
|
62
91
|
|
|
@@ -96,6 +125,7 @@ class SessionSearcher:
|
|
|
96
125
|
min_score: float = 0.3,
|
|
97
126
|
mine_only: bool = False,
|
|
98
127
|
user_filter: Optional[str] = None,
|
|
128
|
+
content_types: Optional[list[str]] = None,
|
|
99
129
|
) -> list[SearchResult]:
|
|
100
130
|
"""
|
|
101
131
|
Search for sessions similar to the query with multi-factor ranking.
|
|
@@ -113,6 +143,8 @@ class SessionSearcher:
|
|
|
113
143
|
min_score: Minimum cosine similarity score (0-1)
|
|
114
144
|
mine_only: If True, only return current user's sessions
|
|
115
145
|
user_filter: If set, only return sessions from this user
|
|
146
|
+
content_types: Content types to search (default: plan, subagent_summary,
|
|
147
|
+
summary_label, user_message). Use ["chunk"] for full-text search.
|
|
116
148
|
|
|
117
149
|
Returns:
|
|
118
150
|
List of SearchResult objects, sorted by multi-factor relevance
|
|
@@ -126,18 +158,31 @@ class SessionSearcher:
|
|
|
126
158
|
current_repo_id = get_repo_id(repo_path) if repo_path else None
|
|
127
159
|
current_user = self.config.user_name
|
|
128
160
|
|
|
129
|
-
#
|
|
161
|
+
# Default to high-value content types
|
|
162
|
+
if content_types is None:
|
|
163
|
+
content_types = DEFAULT_SEARCH_CONTENT_TYPES
|
|
164
|
+
|
|
165
|
+
# Build user filter for Qdrant (more efficient than post-filter)
|
|
166
|
+
qdrant_user_filter = None
|
|
167
|
+
if mine_only:
|
|
168
|
+
qdrant_user_filter = current_user
|
|
169
|
+
elif user_filter:
|
|
170
|
+
qdrant_user_filter = user_filter
|
|
171
|
+
|
|
172
|
+
# Search using content_types filter
|
|
130
173
|
# Get more results than needed since we'll aggregate and re-rank
|
|
131
174
|
raw_results = self.client.search(
|
|
132
175
|
query_text=query,
|
|
133
176
|
repo_id=None, # Don't filter in Qdrant, we'll boost instead
|
|
134
|
-
|
|
177
|
+
content_types=content_types,
|
|
178
|
+
user_name=qdrant_user_filter,
|
|
135
179
|
limit=limit * 10, # Get extra for aggregation and filtering
|
|
136
180
|
)
|
|
137
181
|
|
|
138
|
-
# Aggregate by session (multiple
|
|
182
|
+
# Aggregate by session (multiple points per session)
|
|
139
183
|
session_scores: dict[str, list[float]] = defaultdict(list)
|
|
140
184
|
session_data: dict[str, dict] = {}
|
|
185
|
+
session_content_types: dict[str, set] = defaultdict(set)
|
|
141
186
|
|
|
142
187
|
for result in raw_results:
|
|
143
188
|
if result.score < min_score:
|
|
@@ -146,17 +191,13 @@ class SessionSearcher:
|
|
|
146
191
|
payload = result.payload or {}
|
|
147
192
|
session_id = payload.get("session_id")
|
|
148
193
|
session_user = payload.get("user_name", "unknown")
|
|
194
|
+
content_type = payload.get("content_type", payload.get("type", ""))
|
|
149
195
|
|
|
150
196
|
if not session_id:
|
|
151
197
|
continue
|
|
152
198
|
|
|
153
|
-
# Apply filters
|
|
154
|
-
if mine_only and session_user != current_user:
|
|
155
|
-
continue
|
|
156
|
-
if user_filter and session_user != user_filter:
|
|
157
|
-
continue
|
|
158
|
-
|
|
159
199
|
session_scores[session_id].append(result.score)
|
|
200
|
+
session_content_types[session_id].add(content_type)
|
|
160
201
|
|
|
161
202
|
# Keep the best payload data (highest score)
|
|
162
203
|
if session_id not in session_data or result.score > max(session_scores[session_id][:-1], default=0):
|
|
@@ -168,6 +209,7 @@ class SessionSearcher:
|
|
|
168
209
|
# Use max score for semantic ranking (best match in session)
|
|
169
210
|
semantic_score = max(scores)
|
|
170
211
|
payload = session_data[session_id]
|
|
212
|
+
found_types = session_content_types[session_id]
|
|
171
213
|
|
|
172
214
|
# Parse timestamp
|
|
173
215
|
timestamp = None
|
|
@@ -192,6 +234,9 @@ class SessionSearcher:
|
|
|
192
234
|
timestamp=timestamp,
|
|
193
235
|
)
|
|
194
236
|
|
|
237
|
+
# Get preview text - prefer content field over intent_text
|
|
238
|
+
preview = payload.get("content", payload.get("intent_text", ""))[:200]
|
|
239
|
+
|
|
195
240
|
results.append(
|
|
196
241
|
SearchResult(
|
|
197
242
|
session_id=session_id,
|
|
@@ -204,8 +249,11 @@ class SessionSearcher:
|
|
|
204
249
|
semantic_score=semantic_score * 100,
|
|
205
250
|
is_own=is_own,
|
|
206
251
|
is_current_repo=is_current_repo,
|
|
207
|
-
intent_preview=
|
|
208
|
-
chunk_count=payload.get("
|
|
252
|
+
intent_preview=preview,
|
|
253
|
+
chunk_count=payload.get("total_chunks", 0),
|
|
254
|
+
has_plan="plan" in found_types,
|
|
255
|
+
has_agent_summaries="subagent_summary" in found_types,
|
|
256
|
+
content_types_found=found_types,
|
|
209
257
|
)
|
|
210
258
|
)
|
|
211
259
|
|
|
@@ -285,9 +333,13 @@ class SessionSearcher:
|
|
|
285
333
|
session_id=session.get("session_id", ""),
|
|
286
334
|
repo_name=session.get("repo_name", "unknown"),
|
|
287
335
|
repo_path=session.get("repo_path", ""),
|
|
336
|
+
user_name=session.get("user_name", "unknown"),
|
|
288
337
|
machine=session.get("machine", "unknown"),
|
|
289
338
|
timestamp=timestamp,
|
|
290
339
|
score=100, # No relevance score for list
|
|
340
|
+
semantic_score=0.0, # Not applicable for list
|
|
341
|
+
is_own=session.get("user_name") == self.config.user_name,
|
|
342
|
+
is_current_repo=True, # We're filtering by repo
|
|
291
343
|
intent_preview="", # Not available in list
|
|
292
344
|
chunk_count=session.get("chunk_count", 0),
|
|
293
345
|
)
|
jacked/transcript.py
CHANGED
|
@@ -475,3 +475,342 @@ def _is_uuid_format(name: str) -> bool:
|
|
|
475
475
|
"""
|
|
476
476
|
uuid_pattern = r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$"
|
|
477
477
|
return bool(re.match(uuid_pattern, name.lower()))
|
|
478
|
+
|
|
479
|
+
|
|
480
|
+
# =============================================================================
|
|
481
|
+
# NEW: Extraction functions for subagent summaries, labels, and plans
|
|
482
|
+
# =============================================================================
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
@dataclass
|
|
486
|
+
class SummaryLabel:
|
|
487
|
+
"""A compaction summary label (tiny chapter title).
|
|
488
|
+
|
|
489
|
+
Examples:
|
|
490
|
+
>>> label = SummaryLabel("Implementing auth flow", "abc123", None)
|
|
491
|
+
>>> label.label
|
|
492
|
+
'Implementing auth flow'
|
|
493
|
+
"""
|
|
494
|
+
label: str
|
|
495
|
+
leaf_uuid: Optional[str] = None
|
|
496
|
+
timestamp: Optional[datetime] = None
|
|
497
|
+
|
|
498
|
+
|
|
499
|
+
@dataclass
|
|
500
|
+
class AgentSummary:
|
|
501
|
+
"""Summary extracted from a subagent's final output.
|
|
502
|
+
|
|
503
|
+
Examples:
|
|
504
|
+
>>> summary = AgentSummary("a4e75d5", "Explore", "## Summary\\n...", None)
|
|
505
|
+
>>> summary.agent_id
|
|
506
|
+
'a4e75d5'
|
|
507
|
+
"""
|
|
508
|
+
agent_id: str
|
|
509
|
+
agent_type: Optional[str]
|
|
510
|
+
summary_text: str
|
|
511
|
+
timestamp: Optional[datetime] = None
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
@dataclass
|
|
515
|
+
class PlanFile:
|
|
516
|
+
"""A plan file linked to a session via slug.
|
|
517
|
+
|
|
518
|
+
Examples:
|
|
519
|
+
>>> plan = PlanFile("hidden-finding-goose", Path("..."), "# Plan content")
|
|
520
|
+
>>> plan.slug
|
|
521
|
+
'hidden-finding-goose'
|
|
522
|
+
"""
|
|
523
|
+
slug: str
|
|
524
|
+
path: Path
|
|
525
|
+
content: str
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
def find_subagent_files(session_path: Path) -> list[Path]:
|
|
529
|
+
"""Find all subagent JSONL files for a session.
|
|
530
|
+
|
|
531
|
+
Subagents are stored in {session-id}/subagents/agent-*.jsonl
|
|
532
|
+
|
|
533
|
+
Args:
|
|
534
|
+
session_path: Path to the main session JSONL file
|
|
535
|
+
|
|
536
|
+
Returns:
|
|
537
|
+
List of paths to subagent JSONL files
|
|
538
|
+
|
|
539
|
+
Examples:
|
|
540
|
+
>>> # files = find_subagent_files(Path('session.jsonl')) # doctest: +SKIP
|
|
541
|
+
"""
|
|
542
|
+
# Session dir is {session-id}/ next to {session-id}.jsonl
|
|
543
|
+
session_dir = session_path.parent / session_path.stem
|
|
544
|
+
subagents_dir = session_dir / "subagents"
|
|
545
|
+
|
|
546
|
+
if not subagents_dir.exists():
|
|
547
|
+
return []
|
|
548
|
+
|
|
549
|
+
# Use iterator to avoid memory issues with large directories
|
|
550
|
+
return sorted(subagents_dir.glob("agent-*.jsonl"))
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
def extract_agent_summary(agent_file: Path) -> Optional[AgentSummary]:
|
|
554
|
+
"""Extract the final summary text from an agent's session.
|
|
555
|
+
|
|
556
|
+
The summary is the text content from the LAST assistant message.
|
|
557
|
+
|
|
558
|
+
Args:
|
|
559
|
+
agent_file: Path to an agent-*.jsonl file
|
|
560
|
+
|
|
561
|
+
Returns:
|
|
562
|
+
AgentSummary or None if no usable summary found
|
|
563
|
+
|
|
564
|
+
Examples:
|
|
565
|
+
>>> # summary = extract_agent_summary(Path('agent-abc.jsonl')) # doctest: +SKIP
|
|
566
|
+
"""
|
|
567
|
+
# Extract agent_id from filename: agent-a4e75d5.jsonl -> a4e75d5
|
|
568
|
+
agent_id = agent_file.stem.replace("agent-", "")
|
|
569
|
+
|
|
570
|
+
last_assistant_msg = None
|
|
571
|
+
timestamp = None
|
|
572
|
+
|
|
573
|
+
try:
|
|
574
|
+
with open(agent_file, "r", encoding="utf-8") as f:
|
|
575
|
+
for line in f:
|
|
576
|
+
line = line.strip()
|
|
577
|
+
if not line:
|
|
578
|
+
continue
|
|
579
|
+
try:
|
|
580
|
+
data = json.loads(line)
|
|
581
|
+
except json.JSONDecodeError:
|
|
582
|
+
continue
|
|
583
|
+
|
|
584
|
+
if data.get("type") == "assistant":
|
|
585
|
+
last_assistant_msg = data
|
|
586
|
+
ts_str = data.get("timestamp")
|
|
587
|
+
if ts_str:
|
|
588
|
+
try:
|
|
589
|
+
timestamp = datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
|
|
590
|
+
except ValueError:
|
|
591
|
+
pass
|
|
592
|
+
except (IOError, OSError) as e:
|
|
593
|
+
logger.warning(f"Failed to read agent file {agent_file}: {e}")
|
|
594
|
+
return None
|
|
595
|
+
|
|
596
|
+
if not last_assistant_msg:
|
|
597
|
+
return None
|
|
598
|
+
|
|
599
|
+
# Extract text content from the message
|
|
600
|
+
message = last_assistant_msg.get("message", {})
|
|
601
|
+
content = message.get("content", [])
|
|
602
|
+
|
|
603
|
+
text_parts = []
|
|
604
|
+
if isinstance(content, list):
|
|
605
|
+
for block in content:
|
|
606
|
+
if isinstance(block, dict) and block.get("type") == "text":
|
|
607
|
+
text = block.get("text", "")
|
|
608
|
+
if text:
|
|
609
|
+
text_parts.append(text)
|
|
610
|
+
elif isinstance(content, str):
|
|
611
|
+
text_parts.append(content)
|
|
612
|
+
|
|
613
|
+
summary_text = "\n".join(text_parts).strip()
|
|
614
|
+
|
|
615
|
+
# Skip if too short (likely not a real summary)
|
|
616
|
+
if len(summary_text) < 200:
|
|
617
|
+
logger.debug(f"Agent {agent_id} summary too short ({len(summary_text)} chars), skipping")
|
|
618
|
+
return None
|
|
619
|
+
|
|
620
|
+
# Agent type is hard to detect from the file itself
|
|
621
|
+
# Could infer from content patterns but skip for MVP
|
|
622
|
+
agent_type = None
|
|
623
|
+
|
|
624
|
+
return AgentSummary(
|
|
625
|
+
agent_id=agent_id,
|
|
626
|
+
agent_type=agent_type,
|
|
627
|
+
summary_text=summary_text,
|
|
628
|
+
timestamp=timestamp,
|
|
629
|
+
)
|
|
630
|
+
|
|
631
|
+
|
|
632
|
+
def extract_summary_labels(session_path: Path) -> list[SummaryLabel]:
|
|
633
|
+
"""Extract summary labels from a session JSONL file.
|
|
634
|
+
|
|
635
|
+
These are the tiny "chapter titles" from compaction events.
|
|
636
|
+
|
|
637
|
+
Args:
|
|
638
|
+
session_path: Path to the main session JSONL file
|
|
639
|
+
|
|
640
|
+
Returns:
|
|
641
|
+
List of SummaryLabel objects
|
|
642
|
+
|
|
643
|
+
Examples:
|
|
644
|
+
>>> # labels = extract_summary_labels(Path('session.jsonl')) # doctest: +SKIP
|
|
645
|
+
"""
|
|
646
|
+
labels = []
|
|
647
|
+
|
|
648
|
+
try:
|
|
649
|
+
with open(session_path, "r", encoding="utf-8") as f:
|
|
650
|
+
for line in f:
|
|
651
|
+
line = line.strip()
|
|
652
|
+
if not line:
|
|
653
|
+
continue
|
|
654
|
+
try:
|
|
655
|
+
data = json.loads(line)
|
|
656
|
+
except json.JSONDecodeError:
|
|
657
|
+
continue
|
|
658
|
+
|
|
659
|
+
if data.get("type") == "summary":
|
|
660
|
+
label_text = data.get("summary", "")
|
|
661
|
+
if label_text:
|
|
662
|
+
timestamp = None
|
|
663
|
+
ts_str = data.get("timestamp")
|
|
664
|
+
if ts_str:
|
|
665
|
+
try:
|
|
666
|
+
timestamp = datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
|
|
667
|
+
except ValueError:
|
|
668
|
+
pass
|
|
669
|
+
|
|
670
|
+
labels.append(SummaryLabel(
|
|
671
|
+
label=label_text,
|
|
672
|
+
leaf_uuid=data.get("leafUuid"),
|
|
673
|
+
timestamp=timestamp,
|
|
674
|
+
))
|
|
675
|
+
except (IOError, OSError) as e:
|
|
676
|
+
logger.warning(f"Failed to read session file {session_path}: {e}")
|
|
677
|
+
|
|
678
|
+
return labels
|
|
679
|
+
|
|
680
|
+
|
|
681
|
+
def extract_session_slug(session_path: Path) -> Optional[str]:
|
|
682
|
+
"""Extract the slug from a session JSONL file.
|
|
683
|
+
|
|
684
|
+
The slug links the session to its plan file in ~/.claude/plans/
|
|
685
|
+
|
|
686
|
+
Args:
|
|
687
|
+
session_path: Path to the main session JSONL file
|
|
688
|
+
|
|
689
|
+
Returns:
|
|
690
|
+
The slug string or None if not found
|
|
691
|
+
|
|
692
|
+
Examples:
|
|
693
|
+
>>> # slug = extract_session_slug(Path('session.jsonl')) # doctest: +SKIP
|
|
694
|
+
"""
|
|
695
|
+
try:
|
|
696
|
+
with open(session_path, "r", encoding="utf-8") as f:
|
|
697
|
+
for line in f:
|
|
698
|
+
line = line.strip()
|
|
699
|
+
if not line:
|
|
700
|
+
continue
|
|
701
|
+
try:
|
|
702
|
+
data = json.loads(line)
|
|
703
|
+
except json.JSONDecodeError:
|
|
704
|
+
continue
|
|
705
|
+
|
|
706
|
+
if "slug" in data:
|
|
707
|
+
return data["slug"]
|
|
708
|
+
except (IOError, OSError) as e:
|
|
709
|
+
logger.warning(f"Failed to read session file {session_path}: {e}")
|
|
710
|
+
|
|
711
|
+
return None
|
|
712
|
+
|
|
713
|
+
|
|
714
|
+
def find_plan_file(slug: str, plans_dir: Optional[Path] = None) -> Optional[PlanFile]:
|
|
715
|
+
"""Find and read the plan file for a session slug.
|
|
716
|
+
|
|
717
|
+
Args:
|
|
718
|
+
slug: The session slug (e.g., "hidden-finding-goose")
|
|
719
|
+
plans_dir: Path to plans directory (default: ~/.claude/plans/)
|
|
720
|
+
|
|
721
|
+
Returns:
|
|
722
|
+
PlanFile or None if not found or invalid
|
|
723
|
+
|
|
724
|
+
Examples:
|
|
725
|
+
>>> # plan = find_plan_file("hidden-finding-goose") # doctest: +SKIP
|
|
726
|
+
"""
|
|
727
|
+
if plans_dir is None:
|
|
728
|
+
plans_dir = Path.home() / ".claude" / "plans"
|
|
729
|
+
|
|
730
|
+
plan_path = plans_dir / f"{slug}.md"
|
|
731
|
+
|
|
732
|
+
if not plan_path.exists():
|
|
733
|
+
return None
|
|
734
|
+
|
|
735
|
+
try:
|
|
736
|
+
# Size validation - skip huge files
|
|
737
|
+
file_size = plan_path.stat().st_size
|
|
738
|
+
if file_size > 100_000: # 100KB sanity check
|
|
739
|
+
logger.warning(f"Plan file too large ({file_size} bytes), skipping: {plan_path}")
|
|
740
|
+
return None
|
|
741
|
+
|
|
742
|
+
if file_size < 50: # Too small to be useful
|
|
743
|
+
logger.debug(f"Plan file too small ({file_size} bytes), skipping: {plan_path}")
|
|
744
|
+
return None
|
|
745
|
+
|
|
746
|
+
content = plan_path.read_text(encoding="utf-8")
|
|
747
|
+
|
|
748
|
+
return PlanFile(
|
|
749
|
+
slug=slug,
|
|
750
|
+
path=plan_path,
|
|
751
|
+
content=content,
|
|
752
|
+
)
|
|
753
|
+
except (IOError, OSError) as e:
|
|
754
|
+
logger.warning(f"Failed to read plan file {plan_path}: {e}")
|
|
755
|
+
return None
|
|
756
|
+
|
|
757
|
+
|
|
758
|
+
@dataclass
|
|
759
|
+
class EnrichedTranscript(ParsedTranscript):
|
|
760
|
+
"""ParsedTranscript with additional extracted data.
|
|
761
|
+
|
|
762
|
+
Adds subagent summaries, summary labels, and plan file content.
|
|
763
|
+
"""
|
|
764
|
+
summary_labels: list[SummaryLabel] = field(default_factory=list)
|
|
765
|
+
agent_summaries: list[AgentSummary] = field(default_factory=list)
|
|
766
|
+
plan: Optional[PlanFile] = None
|
|
767
|
+
slug: Optional[str] = None
|
|
768
|
+
|
|
769
|
+
|
|
770
|
+
def parse_jsonl_file_enriched(filepath: Path) -> EnrichedTranscript:
|
|
771
|
+
"""Parse a Claude session with all enriched data.
|
|
772
|
+
|
|
773
|
+
Extracts:
|
|
774
|
+
- Messages (user, assistant)
|
|
775
|
+
- Summary labels (compaction chapter titles)
|
|
776
|
+
- Subagent summaries (gold context from agent outputs)
|
|
777
|
+
- Plan file (if linked via slug)
|
|
778
|
+
|
|
779
|
+
Args:
|
|
780
|
+
filepath: Path to the .jsonl session file
|
|
781
|
+
|
|
782
|
+
Returns:
|
|
783
|
+
EnrichedTranscript with all extracted data
|
|
784
|
+
|
|
785
|
+
Examples:
|
|
786
|
+
>>> # transcript = parse_jsonl_file_enriched(Path('session.jsonl')) # doctest: +SKIP
|
|
787
|
+
"""
|
|
788
|
+
# Parse base transcript
|
|
789
|
+
base = parse_jsonl_file(filepath)
|
|
790
|
+
|
|
791
|
+
# Extract summary labels
|
|
792
|
+
labels = extract_summary_labels(filepath)
|
|
793
|
+
|
|
794
|
+
# Extract subagent summaries
|
|
795
|
+
agent_summaries = []
|
|
796
|
+
for agent_file in find_subagent_files(filepath):
|
|
797
|
+
summary = extract_agent_summary(agent_file)
|
|
798
|
+
if summary:
|
|
799
|
+
agent_summaries.append(summary)
|
|
800
|
+
|
|
801
|
+
# Extract plan file via slug
|
|
802
|
+
slug = extract_session_slug(filepath)
|
|
803
|
+
plan = find_plan_file(slug) if slug else None
|
|
804
|
+
|
|
805
|
+
return EnrichedTranscript(
|
|
806
|
+
session_id=base.session_id,
|
|
807
|
+
messages=base.messages,
|
|
808
|
+
user_messages=base.user_messages,
|
|
809
|
+
full_text=base.full_text,
|
|
810
|
+
intent_text=base.intent_text,
|
|
811
|
+
timestamp=base.timestamp,
|
|
812
|
+
summary_labels=labels,
|
|
813
|
+
agent_summaries=agent_summaries,
|
|
814
|
+
plan=plan,
|
|
815
|
+
slug=slug,
|
|
816
|
+
)
|