claude-self-reflect 2.5.19 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,34 @@
1
1
  # Claude Self-Reflect
2
2
 
3
- Claude forgets everything. This fixes that.
3
+ <div align="center">
4
+
5
+ [![npm version](https://badge.fury.io/js/claude-self-reflect.svg)](https://www.npmjs.com/package/claude-self-reflect)
6
+ [![npm downloads](https://img.shields.io/npm/dm/claude-self-reflect.svg)](https://www.npmjs.com/package/claude-self-reflect)
7
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
8
+ [![GitHub CI](https://github.com/ramakay/claude-self-reflect/actions/workflows/ci.yml/badge.svg)](https://github.com/ramakay/claude-self-reflect/actions/workflows/ci.yml)
9
+
10
+ [![Claude Code](https://img.shields.io/badge/Claude%20Code-Compatible-6B4FBB)](https://github.com/anthropics/claude-code)
11
+ [![MCP Protocol](https://img.shields.io/badge/MCP-Enabled-FF6B6B)](https://modelcontextprotocol.io/)
12
+ [![Docker](https://img.shields.io/badge/Docker-Ready-2496ED?logo=docker&logoColor=white)](https://www.docker.com/)
13
+ [![Local First](https://img.shields.io/badge/Local%20First-Privacy-4A90E2)](https://github.com/ramakay/claude-self-reflect)
14
+
15
+ [![GitHub stars](https://img.shields.io/github/stars/ramakay/claude-self-reflect.svg?style=social)](https://github.com/ramakay/claude-self-reflect/stargazers)
16
+ [![GitHub issues](https://img.shields.io/github/issues/ramakay/claude-self-reflect.svg)](https://github.com/ramakay/claude-self-reflect/issues)
17
+ [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](https://github.com/ramakay/claude-self-reflect/pulls)
18
+
19
+ </div>
20
+
21
+ **Claude forgets everything. This fixes that.**
22
+
23
+ Give Claude perfect memory of all your conversations. Search past discussions instantly. Never lose context again.
24
+
25
+ **100% Local by Default** - Your conversations never leave your machine. No cloud services required, no API keys needed, complete privacy out of the box.
26
+
27
+ **Blazing Fast Search** - Semantic search across thousands of conversations in milliseconds. Find that discussion about database schemas from three weeks ago in seconds.
28
+
29
+ **Zero Configuration** - Works immediately after installation. Smart auto-detection handles everything. No manual setup, no environment variables, just install and use.
30
+
31
+ **Production Ready** - Battle-tested with 600+ conversations across 24 projects. Handles mixed embedding types automatically. Scales from personal use to team deployments.
4
32
 
5
33
  ## Table of Contents
6
34
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-self-reflect",
3
- "version": "2.5.19",
3
+ "version": "2.6.0",
4
4
  "description": "Give Claude perfect memory of all your conversations - Installation wizard for Python MCP server",
5
5
  "keywords": [
6
6
  "claude",
@@ -13,6 +13,12 @@
13
13
  "ai-memory",
14
14
  "claude-code"
15
15
  ],
16
+ "badges": {
17
+ "npm": "https://badge.fury.io/js/claude-self-reflect.svg",
18
+ "license": "https://img.shields.io/badge/License-MIT-yellow.svg",
19
+ "docker": "https://img.shields.io/badge/Docker-Required-blue.svg",
20
+ "claude": "https://img.shields.io/badge/Claude%20Code-Compatible-green.svg"
21
+ },
16
22
  "homepage": "https://github.com/ramakay/claude-self-reflect#readme",
17
23
  "bugs": {
18
24
  "url": "https://github.com/ramakay/claude-self-reflect/issues"
@@ -42,6 +42,20 @@ PREFER_LOCAL_EMBEDDINGS = os.getenv("PREFER_LOCAL_EMBEDDINGS", "false").lower()
42
42
  VOYAGE_API_KEY = os.getenv("VOYAGE_KEY")
43
43
  CURRENT_METADATA_VERSION = 2 # Version 2: Added tool output extraction
44
44
 
45
+ # Token limit configuration for Voyage AI
46
+ MAX_TOKENS_PER_BATCH = int(os.getenv("MAX_TOKENS_PER_BATCH", "100000")) # Safe limit (120k - 20k buffer)
47
+ if MAX_TOKENS_PER_BATCH > 120000 or MAX_TOKENS_PER_BATCH < 1000:
48
+ logger.warning(f"MAX_TOKENS_PER_BATCH={MAX_TOKENS_PER_BATCH} outside safe range [1000, 120000], using 100000")
49
+ MAX_TOKENS_PER_BATCH = 100000
50
+
51
+ TOKEN_ESTIMATION_RATIO = int(os.getenv("TOKEN_ESTIMATION_RATIO", "3")) # chars per token estimate
52
+ if TOKEN_ESTIMATION_RATIO < 2 or TOKEN_ESTIMATION_RATIO > 10:
53
+ logger.warning(f"TOKEN_ESTIMATION_RATIO={TOKEN_ESTIMATION_RATIO} outside normal range [2, 10], using 3")
54
+ TOKEN_ESTIMATION_RATIO = 3
55
+
56
+ USE_TOKEN_AWARE_BATCHING = os.getenv("USE_TOKEN_AWARE_BATCHING", "true").lower() == "true"
57
+ MAX_RECURSION_DEPTH = 10 # Maximum depth for recursive chunk splitting
58
+
45
59
  # Set up logging
46
60
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
47
61
  logger = logging.getLogger(__name__)
@@ -381,6 +395,38 @@ def log_retry_state(retry_state):
381
395
  def embed_with_backoff(**kwargs):
382
396
  return voyage_client.embed(**kwargs)
383
397
 
398
+ def estimate_tokens(text: str) -> int:
399
+ """Estimate token count for text with content-aware heuristics.
400
+ Base estimate uses TOKEN_ESTIMATION_RATIO, adjusted for content type.
401
+ """
402
+ # Base estimate
403
+ base_tokens = len(text) // TOKEN_ESTIMATION_RATIO
404
+
405
+ # Adjust for code/JSON content (typically more tokens per char)
406
+ # Count indicators of structured content
407
+ structure_indicators = text.count('{') + text.count('[') + text.count('```')
408
+ if structure_indicators > 10: # Likely JSON/code
409
+ base_tokens = int(base_tokens * 1.3)
410
+
411
+ # Add 10% safety margin
412
+ return int(base_tokens * 1.1)
413
+
414
+ def extract_message_content(msg: Dict[str, Any]) -> str:
415
+ """Extract text content from a message."""
416
+ content = msg.get("content", "")
417
+
418
+ if isinstance(content, list):
419
+ # Handle structured content
420
+ text_parts = []
421
+ for item in content:
422
+ if isinstance(item, dict) and item.get("type") == "text":
423
+ text_parts.append(item.get("text", ""))
424
+ elif isinstance(item, str):
425
+ text_parts.append(item)
426
+ content = " ".join(text_parts)
427
+
428
+ return content
429
+
384
430
  def generate_embeddings(texts: List[str]) -> List[List[float]]:
385
431
  """Generate embeddings for a list of texts."""
386
432
  if PREFER_LOCAL_EMBEDDINGS or not VOYAGE_API_KEY:
@@ -432,6 +478,125 @@ def chunk_conversation(messages: List[Dict[str, Any]], chunk_size: int = 10) ->
432
478
 
433
479
  return chunks
434
480
 
481
+ def split_large_chunk(chunk: Dict[str, Any], max_tokens: int, depth: int = 0) -> List[Dict[str, Any]]:
482
+ """Split a large chunk into smaller pieces that fit token limit."""
483
+ # Check recursion depth to prevent stack overflow
484
+ if depth >= MAX_RECURSION_DEPTH:
485
+ logger.error(f"Max recursion depth {MAX_RECURSION_DEPTH} reached while splitting chunk")
486
+ # Force truncate as last resort
487
+ max_chars = max_tokens * TOKEN_ESTIMATION_RATIO
488
+ chunk["text"] = chunk["text"][:max_chars] + "\n[TRUNCATED - MAX DEPTH REACHED]"
489
+ chunk["was_truncated"] = True
490
+ return [chunk]
491
+
492
+ text = chunk["text"]
493
+ messages = chunk["messages"]
494
+
495
+ # First, check if we can split by messages
496
+ if len(messages) > 1:
497
+ # Try splitting messages into smaller groups
498
+ mid = len(messages) // 2
499
+ chunk1_messages = messages[:mid]
500
+ chunk2_messages = messages[mid:]
501
+
502
+ # Recreate text for each split
503
+ texts1 = []
504
+ texts2 = []
505
+
506
+ for msg in chunk1_messages:
507
+ role = msg.get("role", "unknown")
508
+ content = extract_message_content(msg)
509
+ if content:
510
+ texts1.append(f"{role.upper()}: {content}")
511
+
512
+ for msg in chunk2_messages:
513
+ role = msg.get("role", "unknown")
514
+ content = extract_message_content(msg)
515
+ if content:
516
+ texts2.append(f"{role.upper()}: {content}")
517
+
518
+ split_chunks = []
519
+ if texts1:
520
+ split_chunks.append({
521
+ "text": "\n".join(texts1),
522
+ "messages": chunk1_messages,
523
+ "chunk_index": f"{chunk['chunk_index']}_a",
524
+ "start_role": chunk["start_role"]
525
+ })
526
+ if texts2:
527
+ split_chunks.append({
528
+ "text": "\n".join(texts2),
529
+ "messages": chunk2_messages,
530
+ "chunk_index": f"{chunk['chunk_index']}_b",
531
+ "start_role": chunk2_messages[0].get("role", "unknown") if chunk2_messages else "unknown"
532
+ })
533
+
534
+ # Recursively split if still too large
535
+ result = []
536
+ for split_chunk in split_chunks:
537
+ if estimate_tokens(split_chunk["text"]) > max_tokens:
538
+ result.extend(split_large_chunk(split_chunk, max_tokens, depth + 1))
539
+ else:
540
+ result.append(split_chunk)
541
+ return result
542
+ else:
543
+ # Single message too large - truncate with warning
544
+ max_chars = max_tokens * TOKEN_ESTIMATION_RATIO
545
+ if len(text) > max_chars:
546
+ truncated_size = len(text) - max_chars
547
+ logger.warning(f"Single message exceeds token limit, truncating {truncated_size} chars from {len(text)} total")
548
+ chunk["text"] = text[:max_chars] + f"\n[TRUNCATED {truncated_size} CHARS]"
549
+ chunk["was_truncated"] = True
550
+ chunk["original_size"] = len(text)
551
+ return [chunk]
552
+
553
+ def create_token_aware_batches(chunks: List[Dict[str, Any]], max_tokens: int = MAX_TOKENS_PER_BATCH) -> List[List[Dict[str, Any]]]:
554
+ """Create batches that respect token limits."""
555
+ if not USE_TOKEN_AWARE_BATCHING:
556
+ # Fall back to old batching method
557
+ batches = []
558
+ for i in range(0, len(chunks), BATCH_SIZE):
559
+ batches.append(chunks[i:i + BATCH_SIZE])
560
+ return batches
561
+
562
+ batches = []
563
+ current_batch = []
564
+ current_tokens = 0
565
+
566
+ for chunk in chunks:
567
+ chunk_tokens = estimate_tokens(chunk["text"])
568
+
569
+ # If single chunk exceeds limit, split it
570
+ if chunk_tokens > max_tokens:
571
+ logger.warning(f"Chunk with {chunk_tokens} estimated tokens exceeds limit of {max_tokens}, splitting...")
572
+ split_chunks = split_large_chunk(chunk, max_tokens)
573
+ for split_chunk in split_chunks:
574
+ split_tokens = estimate_tokens(split_chunk["text"])
575
+ if split_tokens > max_tokens:
576
+ logger.error(f"Split chunk still exceeds limit: {split_tokens} tokens")
577
+ batches.append([split_chunk])
578
+ # If adding chunk would exceed limit, start new batch
579
+ elif current_tokens + chunk_tokens > max_tokens:
580
+ if current_batch:
581
+ batches.append(current_batch)
582
+ current_batch = [chunk]
583
+ current_tokens = chunk_tokens
584
+ else:
585
+ current_batch.append(chunk)
586
+ current_tokens += chunk_tokens
587
+
588
+ if current_batch:
589
+ batches.append(current_batch)
590
+
591
+ # Log batch statistics
592
+ if batches:
593
+ batch_sizes = [len(batch) for batch in batches]
594
+ batch_tokens = [sum(estimate_tokens(chunk["text"]) for chunk in batch) for batch in batches]
595
+ logger.debug(f"Created {len(batches)} batches, chunk counts: min={min(batch_sizes)}, max={max(batch_sizes)}, "
596
+ f"estimated tokens: min={min(batch_tokens)}, max={max(batch_tokens)}, avg={sum(batch_tokens)//len(batches)}")
597
+
598
+ return batches
599
+
435
600
  def import_project(project_path: Path, collection_name: str, state: dict) -> int:
436
601
  """Import all conversations from a project."""
437
602
  jsonl_files = list(project_path.glob("*.jsonl"))
@@ -524,11 +689,17 @@ def import_project(project_path: Path, collection_name: str, state: dict) -> int
524
689
  if not chunks:
525
690
  continue
526
691
 
527
- # Process in batches
528
- for batch_start in range(0, len(chunks), BATCH_SIZE):
529
- batch = chunks[batch_start:batch_start + BATCH_SIZE]
692
+ # Process in batches (token-aware if enabled)
693
+ token_aware_batches = create_token_aware_batches(chunks)
694
+
695
+ for batch_idx, batch in enumerate(token_aware_batches):
530
696
  texts = [chunk["text"] for chunk in batch]
531
697
 
698
+ # Log batch info for debugging
699
+ if USE_TOKEN_AWARE_BATCHING:
700
+ total_tokens = sum(estimate_tokens(text) for text in texts)
701
+ logger.debug(f"Batch {batch_idx + 1}/{len(token_aware_batches)}: {len(texts)} chunks, ~{total_tokens} estimated tokens")
702
+
532
703
  # Generate embeddings
533
704
  embeddings = generate_embeddings(texts)
534
705