claude-jacked 0.2.7__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_jacked-0.3.0.dist-info/METADATA +667 -0
- claude_jacked-0.3.0.dist-info/RECORD +33 -0
- jacked/__init__.py +34 -14
- jacked/cli.py +513 -60
- jacked/client.py +78 -28
- jacked/data/agents/double-check-reviewer.md +42 -0
- jacked/data/commands/audit-rules.md +103 -0
- jacked/data/commands/dc.md +36 -3
- jacked/data/commands/learn.md +89 -0
- jacked/data/commands/redo.md +85 -0
- jacked/data/commands/techdebt.md +115 -0
- jacked/data/hooks/security_gatekeeper.py +415 -0
- jacked/data/rules/jacked_behaviors.md +11 -0
- jacked/index_write_tracker.py +227 -0
- jacked/indexer.py +189 -163
- jacked/searcher.py +4 -0
- claude_jacked-0.2.7.dist-info/METADATA +0 -580
- claude_jacked-0.2.7.dist-info/RECORD +0 -26
- {claude_jacked-0.2.7.dist-info → claude_jacked-0.3.0.dist-info}/WHEEL +0 -0
- {claude_jacked-0.2.7.dist-info → claude_jacked-0.3.0.dist-info}/entry_points.txt +0 -0
- {claude_jacked-0.2.7.dist-info → claude_jacked-0.3.0.dist-info}/licenses/LICENSE +0 -0
jacked/indexer.py
CHANGED
|
@@ -31,6 +31,7 @@ from jacked.transcript import (
|
|
|
31
31
|
chunk_text,
|
|
32
32
|
EnrichedTranscript,
|
|
33
33
|
)
|
|
34
|
+
from jacked.index_write_tracker import IndexWriteTracker
|
|
34
35
|
|
|
35
36
|
|
|
36
37
|
logger = logging.getLogger(__name__)
|
|
@@ -69,6 +70,10 @@ class SessionIndexer:
|
|
|
69
70
|
"""
|
|
70
71
|
self.config = config
|
|
71
72
|
self.client = client or QdrantSessionClient(config)
|
|
73
|
+
# Config hash for detecting chunk_size/overlap changes
|
|
74
|
+
self._config_hash = content_hash(f"{config.chunk_size}:{config.chunk_overlap}")
|
|
75
|
+
# Write tracker for incremental indexing (NOT for retrieval!)
|
|
76
|
+
self._tracker = IndexWriteTracker(self._config_hash)
|
|
72
77
|
|
|
73
78
|
def index_session(
|
|
74
79
|
self,
|
|
@@ -77,23 +82,24 @@ class SessionIndexer:
|
|
|
77
82
|
force: bool = False,
|
|
78
83
|
) -> dict:
|
|
79
84
|
"""
|
|
80
|
-
Index a single session to Qdrant with
|
|
85
|
+
Index a single session to Qdrant with incremental updates.
|
|
86
|
+
|
|
87
|
+
Uses local SQLite tracker to avoid re-pushing unchanged content.
|
|
88
|
+
Only indexes NEW or CHANGED points - much more efficient than
|
|
89
|
+
the old delete-all-and-replace approach.
|
|
81
90
|
|
|
82
91
|
Args:
|
|
83
92
|
session_path: Path to the .jsonl session file
|
|
84
93
|
repo_path: Full path to the repository
|
|
85
|
-
force: If True, re-
|
|
94
|
+
force: If True, clear tracker and re-seed from Qdrant
|
|
86
95
|
|
|
87
96
|
Returns:
|
|
88
97
|
Dict with indexing results:
|
|
89
98
|
- session_id: The session ID
|
|
90
|
-
- indexed: Whether
|
|
91
|
-
- skipped: Whether it was skipped (
|
|
92
|
-
-
|
|
93
|
-
- subagent_summaries
|
|
94
|
-
- summary_labels: Number of summary label points
|
|
95
|
-
- user_messages: Number of user message points
|
|
96
|
-
- chunks: Number of transcript chunk points
|
|
99
|
+
- indexed: Whether new content was indexed
|
|
100
|
+
- skipped: Whether it was skipped (no new content)
|
|
101
|
+
- new_points: Number of new/changed points indexed
|
|
102
|
+
- plans, subagent_summaries, etc.: Counts by content type
|
|
97
103
|
- error: Error message if failed
|
|
98
104
|
|
|
99
105
|
Examples:
|
|
@@ -104,6 +110,7 @@ class SessionIndexer:
|
|
|
104
110
|
"session_id": session_path.stem,
|
|
105
111
|
"indexed": False,
|
|
106
112
|
"skipped": False,
|
|
113
|
+
"new_points": 0,
|
|
107
114
|
"plans": 0,
|
|
108
115
|
"subagent_summaries": 0,
|
|
109
116
|
"summary_labels": 0,
|
|
@@ -118,36 +125,58 @@ class SessionIndexer:
|
|
|
118
125
|
|
|
119
126
|
# Parse the transcript with enriched data
|
|
120
127
|
transcript = parse_jsonl_file_enriched(session_path)
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
128
|
+
session_id = transcript.session_id
|
|
129
|
+
result["session_id"] = session_id
|
|
130
|
+
|
|
131
|
+
# Check session metadata from tracker
|
|
132
|
+
meta = self._tracker.get_session_meta(session_id)
|
|
133
|
+
|
|
134
|
+
# Config changed? Clear and re-seed from Qdrant
|
|
135
|
+
if meta and meta["config_hash"] != self._config_hash:
|
|
136
|
+
logger.info(f"Config changed for session {session_id}, re-seeding from Qdrant")
|
|
137
|
+
self._tracker.clear_session(session_id)
|
|
138
|
+
meta = None
|
|
139
|
+
|
|
140
|
+
# Previous crash mid-indexing? Force re-index
|
|
141
|
+
if meta and meta["status"] == "indexing":
|
|
142
|
+
logger.info(f"Session {session_id} was interrupted mid-index, forcing re-seed")
|
|
143
|
+
force = True
|
|
144
|
+
|
|
145
|
+
# Cache miss or force? Seed from Qdrant (source of truth, THIS USER ONLY)
|
|
146
|
+
if meta is None or force:
|
|
147
|
+
self._tracker.clear_session(session_id)
|
|
148
|
+
self._tracker.seed_from_qdrant(session_id, self.client, self.config.user_name)
|
|
149
|
+
|
|
150
|
+
# Get what's already indexed
|
|
151
|
+
indexed = self._tracker.get_session_state(session_id)
|
|
152
|
+
|
|
153
|
+
# Mark as indexing BEFORE doing work (crash safety)
|
|
154
|
+
self._tracker.mark_indexing(session_id)
|
|
155
|
+
|
|
156
|
+
# Build only NEW/CHANGED points
|
|
157
|
+
points_to_index, points_metadata = self._build_incremental_points(
|
|
158
|
+
transcript, repo_path, indexed
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
if not points_to_index:
|
|
162
|
+
self._tracker.mark_complete(session_id)
|
|
163
|
+
result["skipped"] = True
|
|
164
|
+
logger.debug(f"Session {session_id}: no new content to index")
|
|
138
165
|
return result
|
|
139
166
|
|
|
140
|
-
#
|
|
141
|
-
self.client.
|
|
167
|
+
# Upsert to Qdrant (no delete needed - deterministic IDs handle overwrites)
|
|
168
|
+
self.client.upsert_points(points_to_index)
|
|
142
169
|
|
|
143
|
-
#
|
|
144
|
-
|
|
170
|
+
# Record what we indexed in tracker
|
|
171
|
+
for content_type, idx, hash_val, point_id in points_metadata:
|
|
172
|
+
self._tracker.record_indexed(session_id, content_type, idx, hash_val, str(point_id))
|
|
173
|
+
|
|
174
|
+
self._tracker.mark_complete(session_id)
|
|
145
175
|
|
|
146
176
|
# Count results by content_type
|
|
147
177
|
result["indexed"] = True
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
content_type = payload.get("content_type", payload.get("type"))
|
|
178
|
+
result["new_points"] = len(points_to_index)
|
|
179
|
+
for content_type, _, _, _ in points_metadata:
|
|
151
180
|
if content_type == "plan":
|
|
152
181
|
result["plans"] += 1
|
|
153
182
|
elif content_type == "subagent_summary":
|
|
@@ -160,12 +189,13 @@ class SessionIndexer:
|
|
|
160
189
|
result["chunks"] += 1
|
|
161
190
|
|
|
162
191
|
logger.info(
|
|
163
|
-
f"Indexed session {
|
|
192
|
+
f"Indexed session {session_id}: "
|
|
193
|
+
f"{result['new_points']} new points ("
|
|
164
194
|
f"{result['plans']} plan, "
|
|
165
|
-
f"{result['subagent_summaries']}
|
|
195
|
+
f"{result['subagent_summaries']} summaries, "
|
|
166
196
|
f"{result['summary_labels']} labels, "
|
|
167
|
-
f"{result['user_messages']}
|
|
168
|
-
f"{result['chunks']} chunks"
|
|
197
|
+
f"{result['user_messages']} msgs, "
|
|
198
|
+
f"{result['chunks']} chunks)"
|
|
169
199
|
)
|
|
170
200
|
|
|
171
201
|
return result
|
|
@@ -175,23 +205,6 @@ class SessionIndexer:
|
|
|
175
205
|
result["error"] = str(e)
|
|
176
206
|
return result
|
|
177
207
|
|
|
178
|
-
def _get_existing_hash(self, session_id: str) -> Optional[str]:
|
|
179
|
-
"""
|
|
180
|
-
Get the content hash of an existing indexed session.
|
|
181
|
-
|
|
182
|
-
Args:
|
|
183
|
-
session_id: Session ID to check
|
|
184
|
-
|
|
185
|
-
Returns:
|
|
186
|
-
Content hash string or None if not found
|
|
187
|
-
"""
|
|
188
|
-
# Look for the first user_message point using deterministic UUID
|
|
189
|
-
point_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, f"{session_id}:user_message:0"))
|
|
190
|
-
point = self.client.get_point_by_id(point_id)
|
|
191
|
-
if point and point.payload:
|
|
192
|
-
return point.payload.get("content_hash")
|
|
193
|
-
return None
|
|
194
|
-
|
|
195
208
|
def _make_point_id(self, session_id: str, content_type: str, index: int) -> str:
|
|
196
209
|
"""Generate deterministic point ID.
|
|
197
210
|
|
|
@@ -205,29 +218,27 @@ class SessionIndexer:
|
|
|
205
218
|
"""
|
|
206
219
|
return str(uuid.uuid5(uuid.NAMESPACE_DNS, f"{session_id}:{content_type}:{index}"))
|
|
207
220
|
|
|
208
|
-
def
|
|
221
|
+
def _build_incremental_points(
|
|
209
222
|
self,
|
|
210
223
|
transcript: EnrichedTranscript,
|
|
211
224
|
repo_path: str,
|
|
212
|
-
|
|
225
|
+
indexed: dict,
|
|
226
|
+
) -> tuple[list[models.PointStruct], list[tuple]]:
|
|
213
227
|
"""
|
|
214
|
-
Build
|
|
215
|
-
|
|
216
|
-
Creates points for:
|
|
217
|
-
- plan: Full implementation strategy (if exists)
|
|
218
|
-
- subagent_summary: Rich summaries from agent outputs
|
|
219
|
-
- summary_label: Tiny chapter titles from compaction
|
|
220
|
-
- user_message: First few user messages for intent matching
|
|
221
|
-
- chunk: Full transcript chunks for full retrieval
|
|
228
|
+
Build only NEW or CHANGED points by comparing against what's already indexed.
|
|
222
229
|
|
|
223
230
|
Args:
|
|
224
231
|
transcript: EnrichedTranscript with all extracted data
|
|
225
232
|
repo_path: Full path to the repository
|
|
233
|
+
indexed: Dict mapping (content_type, index) -> content_hash from tracker
|
|
226
234
|
|
|
227
235
|
Returns:
|
|
228
|
-
|
|
236
|
+
Tuple of (points_to_index, points_metadata) where points_metadata is
|
|
237
|
+
a list of (content_type, index, content_hash, point_id) tuples
|
|
229
238
|
"""
|
|
230
|
-
|
|
239
|
+
points_to_index = []
|
|
240
|
+
points_metadata = [] # (content_type, index, hash, point_id)
|
|
241
|
+
|
|
231
242
|
repo_id = get_repo_id(repo_path)
|
|
232
243
|
repo_name = get_repo_name(repo_path)
|
|
233
244
|
full_hash = content_hash(transcript.full_text)
|
|
@@ -250,93 +261,106 @@ class SessionIndexer:
|
|
|
250
261
|
"slug": transcript.slug,
|
|
251
262
|
}
|
|
252
263
|
|
|
253
|
-
# 1. Plan
|
|
264
|
+
# 1. Plan - check hash
|
|
254
265
|
if transcript.plan:
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
for i, agent_summary in enumerate(transcript.agent_summaries):
|
|
275
|
-
point_id = self._make_point_id(transcript.session_id, "subagent_summary", i)
|
|
276
|
-
points.append(
|
|
277
|
-
models.PointStruct(
|
|
278
|
-
id=point_id,
|
|
279
|
-
vector=models.Document(
|
|
280
|
-
text=agent_summary.summary_text[:8000], # Limit for embedding
|
|
281
|
-
model=INFERENCE_MODEL,
|
|
282
|
-
),
|
|
283
|
-
payload={
|
|
284
|
-
**base_payload,
|
|
285
|
-
"type": "subagent_summary",
|
|
286
|
-
"content_type": "subagent_summary",
|
|
287
|
-
"content": agent_summary.summary_text,
|
|
288
|
-
"agent_id": agent_summary.agent_id,
|
|
289
|
-
"agent_type": agent_summary.agent_type,
|
|
290
|
-
"chunk_index": i,
|
|
291
|
-
},
|
|
266
|
+
plan_hash = content_hash(transcript.plan.content)
|
|
267
|
+
if indexed.get(("plan", 0)) != plan_hash:
|
|
268
|
+
point_id = self._make_point_id(transcript.session_id, "plan", 0)
|
|
269
|
+
points_to_index.append(
|
|
270
|
+
models.PointStruct(
|
|
271
|
+
id=point_id,
|
|
272
|
+
vector=models.Document(
|
|
273
|
+
text=transcript.plan.content[:8000],
|
|
274
|
+
model=INFERENCE_MODEL,
|
|
275
|
+
),
|
|
276
|
+
payload={
|
|
277
|
+
**base_payload,
|
|
278
|
+
"type": "plan",
|
|
279
|
+
"content_type": "plan",
|
|
280
|
+
"content": transcript.plan.content,
|
|
281
|
+
"plan_path": str(transcript.plan.path),
|
|
282
|
+
"chunk_index": 0,
|
|
283
|
+
},
|
|
284
|
+
)
|
|
292
285
|
)
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
# 3. Summary labels (chapter titles from compaction)
|
|
296
|
-
for i, label in enumerate(transcript.summary_labels):
|
|
297
|
-
point_id = self._make_point_id(transcript.session_id, "summary_label", i)
|
|
298
|
-
points.append(
|
|
299
|
-
models.PointStruct(
|
|
300
|
-
id=point_id,
|
|
301
|
-
vector=models.Document(
|
|
302
|
-
text=label.label,
|
|
303
|
-
model=INFERENCE_MODEL,
|
|
304
|
-
),
|
|
305
|
-
payload={
|
|
306
|
-
**base_payload,
|
|
307
|
-
"type": "summary_label",
|
|
308
|
-
"content_type": "summary_label",
|
|
309
|
-
"content": label.label,
|
|
310
|
-
"leaf_uuid": label.leaf_uuid,
|
|
311
|
-
"chunk_index": i,
|
|
312
|
-
},
|
|
313
|
-
)
|
|
314
|
-
)
|
|
286
|
+
points_metadata.append(("plan", 0, plan_hash, point_id))
|
|
315
287
|
|
|
316
|
-
#
|
|
288
|
+
# 2. User messages - compare by content hash
|
|
317
289
|
max_user_messages = 5
|
|
318
290
|
for i, msg in enumerate(transcript.user_messages[:max_user_messages]):
|
|
319
291
|
if not msg.content or len(msg.content) < 20:
|
|
320
292
|
continue
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
293
|
+
msg_hash = content_hash(msg.content)
|
|
294
|
+
if indexed.get(("user_message", i)) != msg_hash:
|
|
295
|
+
point_id = self._make_point_id(transcript.session_id, "user_message", i)
|
|
296
|
+
points_to_index.append(
|
|
297
|
+
models.PointStruct(
|
|
298
|
+
id=point_id,
|
|
299
|
+
vector=models.Document(
|
|
300
|
+
text=msg.content[:2000],
|
|
301
|
+
model=INFERENCE_MODEL,
|
|
302
|
+
),
|
|
303
|
+
payload={
|
|
304
|
+
**base_payload,
|
|
305
|
+
"type": "user_message",
|
|
306
|
+
"content_type": "user_message",
|
|
307
|
+
"content": msg.content,
|
|
308
|
+
"chunk_index": i,
|
|
309
|
+
},
|
|
310
|
+
)
|
|
336
311
|
)
|
|
337
|
-
|
|
312
|
+
points_metadata.append(("user_message", i, msg_hash, point_id))
|
|
338
313
|
|
|
339
|
-
#
|
|
314
|
+
# 3. Agent summaries - compare by hash
|
|
315
|
+
for i, agent_summary in enumerate(transcript.agent_summaries):
|
|
316
|
+
summary_hash = content_hash(agent_summary.summary_text)
|
|
317
|
+
if indexed.get(("subagent_summary", i)) != summary_hash:
|
|
318
|
+
point_id = self._make_point_id(transcript.session_id, "subagent_summary", i)
|
|
319
|
+
points_to_index.append(
|
|
320
|
+
models.PointStruct(
|
|
321
|
+
id=point_id,
|
|
322
|
+
vector=models.Document(
|
|
323
|
+
text=agent_summary.summary_text[:8000],
|
|
324
|
+
model=INFERENCE_MODEL,
|
|
325
|
+
),
|
|
326
|
+
payload={
|
|
327
|
+
**base_payload,
|
|
328
|
+
"type": "subagent_summary",
|
|
329
|
+
"content_type": "subagent_summary",
|
|
330
|
+
"content": agent_summary.summary_text,
|
|
331
|
+
"agent_id": agent_summary.agent_id,
|
|
332
|
+
"agent_type": agent_summary.agent_type,
|
|
333
|
+
"chunk_index": i,
|
|
334
|
+
},
|
|
335
|
+
)
|
|
336
|
+
)
|
|
337
|
+
points_metadata.append(("subagent_summary", i, summary_hash, point_id))
|
|
338
|
+
|
|
339
|
+
# 4. Summary labels - compare by hash
|
|
340
|
+
for i, label in enumerate(transcript.summary_labels):
|
|
341
|
+
label_hash = content_hash(label.label)
|
|
342
|
+
if indexed.get(("summary_label", i)) != label_hash:
|
|
343
|
+
point_id = self._make_point_id(transcript.session_id, "summary_label", i)
|
|
344
|
+
points_to_index.append(
|
|
345
|
+
models.PointStruct(
|
|
346
|
+
id=point_id,
|
|
347
|
+
vector=models.Document(
|
|
348
|
+
text=label.label,
|
|
349
|
+
model=INFERENCE_MODEL,
|
|
350
|
+
),
|
|
351
|
+
payload={
|
|
352
|
+
**base_payload,
|
|
353
|
+
"type": "summary_label",
|
|
354
|
+
"content_type": "summary_label",
|
|
355
|
+
"content": label.label,
|
|
356
|
+
"leaf_uuid": label.leaf_uuid,
|
|
357
|
+
"chunk_index": i,
|
|
358
|
+
},
|
|
359
|
+
)
|
|
360
|
+
)
|
|
361
|
+
points_metadata.append(("summary_label", i, label_hash, point_id))
|
|
362
|
+
|
|
363
|
+
# 5. Chunks - compare by hash (handles boundary drift)
|
|
340
364
|
transcript_chunks = chunk_text(
|
|
341
365
|
transcript.full_text,
|
|
342
366
|
chunk_size=self.config.chunk_size,
|
|
@@ -346,27 +370,29 @@ class SessionIndexer:
|
|
|
346
370
|
for i, chunk in enumerate(transcript_chunks):
|
|
347
371
|
if not chunk.strip():
|
|
348
372
|
continue
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
373
|
+
chunk_hash = content_hash(chunk)
|
|
374
|
+
if indexed.get(("chunk", i)) != chunk_hash:
|
|
375
|
+
point_id = self._make_point_id(transcript.session_id, "chunk", i)
|
|
376
|
+
points_to_index.append(
|
|
377
|
+
models.PointStruct(
|
|
378
|
+
id=point_id,
|
|
379
|
+
vector=models.Document(
|
|
380
|
+
text=chunk[:4000],
|
|
381
|
+
model=INFERENCE_MODEL,
|
|
382
|
+
),
|
|
383
|
+
payload={
|
|
384
|
+
**base_payload,
|
|
385
|
+
"type": "chunk",
|
|
386
|
+
"content_type": "chunk",
|
|
387
|
+
"content": chunk,
|
|
388
|
+
"chunk_index": i,
|
|
389
|
+
"total_chunks": len(transcript_chunks),
|
|
390
|
+
},
|
|
391
|
+
)
|
|
366
392
|
)
|
|
367
|
-
|
|
393
|
+
points_metadata.append(("chunk", i, chunk_hash, point_id))
|
|
368
394
|
|
|
369
|
-
return
|
|
395
|
+
return points_to_index, points_metadata
|
|
370
396
|
|
|
371
397
|
def index_all_sessions(
|
|
372
398
|
self,
|
jacked/searcher.py
CHANGED
|
@@ -333,9 +333,13 @@ class SessionSearcher:
|
|
|
333
333
|
session_id=session.get("session_id", ""),
|
|
334
334
|
repo_name=session.get("repo_name", "unknown"),
|
|
335
335
|
repo_path=session.get("repo_path", ""),
|
|
336
|
+
user_name=session.get("user_name", "unknown"),
|
|
336
337
|
machine=session.get("machine", "unknown"),
|
|
337
338
|
timestamp=timestamp,
|
|
338
339
|
score=100, # No relevance score for list
|
|
340
|
+
semantic_score=0.0, # Not applicable for list
|
|
341
|
+
is_own=session.get("user_name") == self.config.user_name,
|
|
342
|
+
is_current_repo=True, # We're filtering by repo
|
|
339
343
|
intent_preview="", # Not available in list
|
|
340
344
|
chunk_count=session.get("chunk_count", 0),
|
|
341
345
|
)
|