claude-self-reflect 2.5.19 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -1
- package/package.json +7 -1
- package/scripts/import-conversations-unified.py +174 -3
package/README.md
CHANGED
|
@@ -1,6 +1,34 @@
|
|
|
1
1
|
# Claude Self-Reflect
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
<div align="center">
|
|
4
|
+
|
|
5
|
+
[](https://www.npmjs.com/package/claude-self-reflect)
|
|
6
|
+
[](https://www.npmjs.com/package/claude-self-reflect)
|
|
7
|
+
[](https://opensource.org/licenses/MIT)
|
|
8
|
+
[](https://github.com/ramakay/claude-self-reflect/actions/workflows/ci.yml)
|
|
9
|
+
|
|
10
|
+
[](https://github.com/anthropics/claude-code)
|
|
11
|
+
[](https://modelcontextprotocol.io/)
|
|
12
|
+
[](https://www.docker.com/)
|
|
13
|
+
[](https://github.com/ramakay/claude-self-reflect)
|
|
14
|
+
|
|
15
|
+
[](https://github.com/ramakay/claude-self-reflect/stargazers)
|
|
16
|
+
[](https://github.com/ramakay/claude-self-reflect/issues)
|
|
17
|
+
[](https://github.com/ramakay/claude-self-reflect/pulls)
|
|
18
|
+
|
|
19
|
+
</div>
|
|
20
|
+
|
|
21
|
+
**Claude forgets everything. This fixes that.**
|
|
22
|
+
|
|
23
|
+
Give Claude perfect memory of all your conversations. Search past discussions instantly. Never lose context again.
|
|
24
|
+
|
|
25
|
+
**100% Local by Default** - Your conversations never leave your machine. No cloud services required, no API keys needed, complete privacy out of the box.
|
|
26
|
+
|
|
27
|
+
**Blazing Fast Search** - Semantic search across thousands of conversations in milliseconds. Find that discussion about database schemas from three weeks ago in seconds.
|
|
28
|
+
|
|
29
|
+
**Zero Configuration** - Works immediately after installation. Smart auto-detection handles everything. No manual setup, no environment variables, just install and use.
|
|
30
|
+
|
|
31
|
+
**Production Ready** - Battle-tested with 600+ conversations across 24 projects. Handles mixed embedding types automatically. Scales from personal use to team deployments.
|
|
4
32
|
|
|
5
33
|
## Table of Contents
|
|
6
34
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-self-reflect",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.6.0",
|
|
4
4
|
"description": "Give Claude perfect memory of all your conversations - Installation wizard for Python MCP server",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"claude",
|
|
@@ -13,6 +13,12 @@
|
|
|
13
13
|
"ai-memory",
|
|
14
14
|
"claude-code"
|
|
15
15
|
],
|
|
16
|
+
"badges": {
|
|
17
|
+
"npm": "https://badge.fury.io/js/claude-self-reflect.svg",
|
|
18
|
+
"license": "https://img.shields.io/badge/License-MIT-yellow.svg",
|
|
19
|
+
"docker": "https://img.shields.io/badge/Docker-Required-blue.svg",
|
|
20
|
+
"claude": "https://img.shields.io/badge/Claude%20Code-Compatible-green.svg"
|
|
21
|
+
},
|
|
16
22
|
"homepage": "https://github.com/ramakay/claude-self-reflect#readme",
|
|
17
23
|
"bugs": {
|
|
18
24
|
"url": "https://github.com/ramakay/claude-self-reflect/issues"
|
|
@@ -42,6 +42,20 @@ PREFER_LOCAL_EMBEDDINGS = os.getenv("PREFER_LOCAL_EMBEDDINGS", "false").lower()
|
|
|
42
42
|
VOYAGE_API_KEY = os.getenv("VOYAGE_KEY")
|
|
43
43
|
CURRENT_METADATA_VERSION = 2 # Version 2: Added tool output extraction
|
|
44
44
|
|
|
45
|
+
# Token limit configuration for Voyage AI
|
|
46
|
+
MAX_TOKENS_PER_BATCH = int(os.getenv("MAX_TOKENS_PER_BATCH", "100000")) # Safe limit (120k - 20k buffer)
|
|
47
|
+
if MAX_TOKENS_PER_BATCH > 120000 or MAX_TOKENS_PER_BATCH < 1000:
|
|
48
|
+
logger.warning(f"MAX_TOKENS_PER_BATCH={MAX_TOKENS_PER_BATCH} outside safe range [1000, 120000], using 100000")
|
|
49
|
+
MAX_TOKENS_PER_BATCH = 100000
|
|
50
|
+
|
|
51
|
+
TOKEN_ESTIMATION_RATIO = int(os.getenv("TOKEN_ESTIMATION_RATIO", "3")) # chars per token estimate
|
|
52
|
+
if TOKEN_ESTIMATION_RATIO < 2 or TOKEN_ESTIMATION_RATIO > 10:
|
|
53
|
+
logger.warning(f"TOKEN_ESTIMATION_RATIO={TOKEN_ESTIMATION_RATIO} outside normal range [2, 10], using 3")
|
|
54
|
+
TOKEN_ESTIMATION_RATIO = 3
|
|
55
|
+
|
|
56
|
+
USE_TOKEN_AWARE_BATCHING = os.getenv("USE_TOKEN_AWARE_BATCHING", "true").lower() == "true"
|
|
57
|
+
MAX_RECURSION_DEPTH = 10 # Maximum depth for recursive chunk splitting
|
|
58
|
+
|
|
45
59
|
# Set up logging
|
|
46
60
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
47
61
|
logger = logging.getLogger(__name__)
|
|
@@ -381,6 +395,38 @@ def log_retry_state(retry_state):
|
|
|
381
395
|
def embed_with_backoff(**kwargs):
|
|
382
396
|
return voyage_client.embed(**kwargs)
|
|
383
397
|
|
|
398
|
+
def estimate_tokens(text: str) -> int:
|
|
399
|
+
"""Estimate token count for text with content-aware heuristics.
|
|
400
|
+
Base estimate uses TOKEN_ESTIMATION_RATIO, adjusted for content type.
|
|
401
|
+
"""
|
|
402
|
+
# Base estimate
|
|
403
|
+
base_tokens = len(text) // TOKEN_ESTIMATION_RATIO
|
|
404
|
+
|
|
405
|
+
# Adjust for code/JSON content (typically more tokens per char)
|
|
406
|
+
# Count indicators of structured content
|
|
407
|
+
structure_indicators = text.count('{') + text.count('[') + text.count('```')
|
|
408
|
+
if structure_indicators > 10: # Likely JSON/code
|
|
409
|
+
base_tokens = int(base_tokens * 1.3)
|
|
410
|
+
|
|
411
|
+
# Add 10% safety margin
|
|
412
|
+
return int(base_tokens * 1.1)
|
|
413
|
+
|
|
414
|
+
def extract_message_content(msg: Dict[str, Any]) -> str:
|
|
415
|
+
"""Extract text content from a message."""
|
|
416
|
+
content = msg.get("content", "")
|
|
417
|
+
|
|
418
|
+
if isinstance(content, list):
|
|
419
|
+
# Handle structured content
|
|
420
|
+
text_parts = []
|
|
421
|
+
for item in content:
|
|
422
|
+
if isinstance(item, dict) and item.get("type") == "text":
|
|
423
|
+
text_parts.append(item.get("text", ""))
|
|
424
|
+
elif isinstance(item, str):
|
|
425
|
+
text_parts.append(item)
|
|
426
|
+
content = " ".join(text_parts)
|
|
427
|
+
|
|
428
|
+
return content
|
|
429
|
+
|
|
384
430
|
def generate_embeddings(texts: List[str]) -> List[List[float]]:
|
|
385
431
|
"""Generate embeddings for a list of texts."""
|
|
386
432
|
if PREFER_LOCAL_EMBEDDINGS or not VOYAGE_API_KEY:
|
|
@@ -432,6 +478,125 @@ def chunk_conversation(messages: List[Dict[str, Any]], chunk_size: int = 10) ->
|
|
|
432
478
|
|
|
433
479
|
return chunks
|
|
434
480
|
|
|
481
|
+
def split_large_chunk(chunk: Dict[str, Any], max_tokens: int, depth: int = 0) -> List[Dict[str, Any]]:
|
|
482
|
+
"""Split a large chunk into smaller pieces that fit token limit."""
|
|
483
|
+
# Check recursion depth to prevent stack overflow
|
|
484
|
+
if depth >= MAX_RECURSION_DEPTH:
|
|
485
|
+
logger.error(f"Max recursion depth {MAX_RECURSION_DEPTH} reached while splitting chunk")
|
|
486
|
+
# Force truncate as last resort
|
|
487
|
+
max_chars = max_tokens * TOKEN_ESTIMATION_RATIO
|
|
488
|
+
chunk["text"] = chunk["text"][:max_chars] + "\n[TRUNCATED - MAX DEPTH REACHED]"
|
|
489
|
+
chunk["was_truncated"] = True
|
|
490
|
+
return [chunk]
|
|
491
|
+
|
|
492
|
+
text = chunk["text"]
|
|
493
|
+
messages = chunk["messages"]
|
|
494
|
+
|
|
495
|
+
# First, check if we can split by messages
|
|
496
|
+
if len(messages) > 1:
|
|
497
|
+
# Try splitting messages into smaller groups
|
|
498
|
+
mid = len(messages) // 2
|
|
499
|
+
chunk1_messages = messages[:mid]
|
|
500
|
+
chunk2_messages = messages[mid:]
|
|
501
|
+
|
|
502
|
+
# Recreate text for each split
|
|
503
|
+
texts1 = []
|
|
504
|
+
texts2 = []
|
|
505
|
+
|
|
506
|
+
for msg in chunk1_messages:
|
|
507
|
+
role = msg.get("role", "unknown")
|
|
508
|
+
content = extract_message_content(msg)
|
|
509
|
+
if content:
|
|
510
|
+
texts1.append(f"{role.upper()}: {content}")
|
|
511
|
+
|
|
512
|
+
for msg in chunk2_messages:
|
|
513
|
+
role = msg.get("role", "unknown")
|
|
514
|
+
content = extract_message_content(msg)
|
|
515
|
+
if content:
|
|
516
|
+
texts2.append(f"{role.upper()}: {content}")
|
|
517
|
+
|
|
518
|
+
split_chunks = []
|
|
519
|
+
if texts1:
|
|
520
|
+
split_chunks.append({
|
|
521
|
+
"text": "\n".join(texts1),
|
|
522
|
+
"messages": chunk1_messages,
|
|
523
|
+
"chunk_index": f"{chunk['chunk_index']}_a",
|
|
524
|
+
"start_role": chunk["start_role"]
|
|
525
|
+
})
|
|
526
|
+
if texts2:
|
|
527
|
+
split_chunks.append({
|
|
528
|
+
"text": "\n".join(texts2),
|
|
529
|
+
"messages": chunk2_messages,
|
|
530
|
+
"chunk_index": f"{chunk['chunk_index']}_b",
|
|
531
|
+
"start_role": chunk2_messages[0].get("role", "unknown") if chunk2_messages else "unknown"
|
|
532
|
+
})
|
|
533
|
+
|
|
534
|
+
# Recursively split if still too large
|
|
535
|
+
result = []
|
|
536
|
+
for split_chunk in split_chunks:
|
|
537
|
+
if estimate_tokens(split_chunk["text"]) > max_tokens:
|
|
538
|
+
result.extend(split_large_chunk(split_chunk, max_tokens, depth + 1))
|
|
539
|
+
else:
|
|
540
|
+
result.append(split_chunk)
|
|
541
|
+
return result
|
|
542
|
+
else:
|
|
543
|
+
# Single message too large - truncate with warning
|
|
544
|
+
max_chars = max_tokens * TOKEN_ESTIMATION_RATIO
|
|
545
|
+
if len(text) > max_chars:
|
|
546
|
+
truncated_size = len(text) - max_chars
|
|
547
|
+
logger.warning(f"Single message exceeds token limit, truncating {truncated_size} chars from {len(text)} total")
|
|
548
|
+
chunk["text"] = text[:max_chars] + f"\n[TRUNCATED {truncated_size} CHARS]"
|
|
549
|
+
chunk["was_truncated"] = True
|
|
550
|
+
chunk["original_size"] = len(text)
|
|
551
|
+
return [chunk]
|
|
552
|
+
|
|
553
|
+
def create_token_aware_batches(chunks: List[Dict[str, Any]], max_tokens: int = MAX_TOKENS_PER_BATCH) -> List[List[Dict[str, Any]]]:
|
|
554
|
+
"""Create batches that respect token limits."""
|
|
555
|
+
if not USE_TOKEN_AWARE_BATCHING:
|
|
556
|
+
# Fall back to old batching method
|
|
557
|
+
batches = []
|
|
558
|
+
for i in range(0, len(chunks), BATCH_SIZE):
|
|
559
|
+
batches.append(chunks[i:i + BATCH_SIZE])
|
|
560
|
+
return batches
|
|
561
|
+
|
|
562
|
+
batches = []
|
|
563
|
+
current_batch = []
|
|
564
|
+
current_tokens = 0
|
|
565
|
+
|
|
566
|
+
for chunk in chunks:
|
|
567
|
+
chunk_tokens = estimate_tokens(chunk["text"])
|
|
568
|
+
|
|
569
|
+
# If single chunk exceeds limit, split it
|
|
570
|
+
if chunk_tokens > max_tokens:
|
|
571
|
+
logger.warning(f"Chunk with {chunk_tokens} estimated tokens exceeds limit of {max_tokens}, splitting...")
|
|
572
|
+
split_chunks = split_large_chunk(chunk, max_tokens)
|
|
573
|
+
for split_chunk in split_chunks:
|
|
574
|
+
split_tokens = estimate_tokens(split_chunk["text"])
|
|
575
|
+
if split_tokens > max_tokens:
|
|
576
|
+
logger.error(f"Split chunk still exceeds limit: {split_tokens} tokens")
|
|
577
|
+
batches.append([split_chunk])
|
|
578
|
+
# If adding chunk would exceed limit, start new batch
|
|
579
|
+
elif current_tokens + chunk_tokens > max_tokens:
|
|
580
|
+
if current_batch:
|
|
581
|
+
batches.append(current_batch)
|
|
582
|
+
current_batch = [chunk]
|
|
583
|
+
current_tokens = chunk_tokens
|
|
584
|
+
else:
|
|
585
|
+
current_batch.append(chunk)
|
|
586
|
+
current_tokens += chunk_tokens
|
|
587
|
+
|
|
588
|
+
if current_batch:
|
|
589
|
+
batches.append(current_batch)
|
|
590
|
+
|
|
591
|
+
# Log batch statistics
|
|
592
|
+
if batches:
|
|
593
|
+
batch_sizes = [len(batch) for batch in batches]
|
|
594
|
+
batch_tokens = [sum(estimate_tokens(chunk["text"]) for chunk in batch) for batch in batches]
|
|
595
|
+
logger.debug(f"Created {len(batches)} batches, chunk counts: min={min(batch_sizes)}, max={max(batch_sizes)}, "
|
|
596
|
+
f"estimated tokens: min={min(batch_tokens)}, max={max(batch_tokens)}, avg={sum(batch_tokens)//len(batches)}")
|
|
597
|
+
|
|
598
|
+
return batches
|
|
599
|
+
|
|
435
600
|
def import_project(project_path: Path, collection_name: str, state: dict) -> int:
|
|
436
601
|
"""Import all conversations from a project."""
|
|
437
602
|
jsonl_files = list(project_path.glob("*.jsonl"))
|
|
@@ -524,11 +689,17 @@ def import_project(project_path: Path, collection_name: str, state: dict) -> int
|
|
|
524
689
|
if not chunks:
|
|
525
690
|
continue
|
|
526
691
|
|
|
527
|
-
# Process in batches
|
|
528
|
-
|
|
529
|
-
|
|
692
|
+
# Process in batches (token-aware if enabled)
|
|
693
|
+
token_aware_batches = create_token_aware_batches(chunks)
|
|
694
|
+
|
|
695
|
+
for batch_idx, batch in enumerate(token_aware_batches):
|
|
530
696
|
texts = [chunk["text"] for chunk in batch]
|
|
531
697
|
|
|
698
|
+
# Log batch info for debugging
|
|
699
|
+
if USE_TOKEN_AWARE_BATCHING:
|
|
700
|
+
total_tokens = sum(estimate_tokens(text) for text in texts)
|
|
701
|
+
logger.debug(f"Batch {batch_idx + 1}/{len(token_aware_batches)}: {len(texts)} chunks, ~{total_tokens} estimated tokens")
|
|
702
|
+
|
|
532
703
|
# Generate embeddings
|
|
533
704
|
embeddings = generate_embeddings(texts)
|
|
534
705
|
|