claude-self-reflect 3.3.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -147,7 +147,15 @@ def generate_embeddings(texts: List[str]) -> List[List[float]]:
147
147
  """Generate embeddings for texts."""
148
148
  # Use the global embedding_provider which gets updated by command-line args
149
149
  if PREFER_LOCAL_EMBEDDINGS:
150
- embeddings = list(embedding_provider.passage_embed(texts))
150
+ # FastEmbed uses 'embed' method, not 'passage_embed'
151
+ # Try 'embed' first, fall back to 'passage_embed' for compatibility
152
+ if hasattr(embedding_provider, 'embed'):
153
+ embeddings = list(embedding_provider.embed(texts))
154
+ elif hasattr(embedding_provider, 'passage_embed'):
155
+ # Fallback for older versions (shouldn't exist but kept for safety)
156
+ embeddings = list(embedding_provider.passage_embed(texts))
157
+ else:
158
+ raise AttributeError("FastEmbed provider has neither 'embed' nor 'passage_embed' method")
151
159
  return [emb.tolist() if hasattr(emb, 'tolist') else emb for emb in embeddings]
152
160
  else:
153
161
  response = embedding_provider.embed(texts, model="voyage-3")
@@ -327,22 +335,27 @@ def extract_metadata_single_pass(file_path: str) -> tuple[Dict[str, Any], str, i
327
335
  "concepts": [],
328
336
  "ast_elements": [],
329
337
  "has_code_blocks": False,
330
- "total_messages": 0
338
+ "total_messages": 0,
339
+ "project_path": None # Add project path from cwd
331
340
  }
332
-
341
+
333
342
  first_timestamp = None
334
343
  message_count = 0
335
344
  all_text = []
336
-
345
+
337
346
  try:
338
347
  with open(file_path, 'r', encoding='utf-8') as f:
339
348
  for line in f:
340
349
  if not line.strip():
341
350
  continue
342
-
351
+
343
352
  try:
344
353
  data = json.loads(line)
345
-
354
+
355
+ # Extract cwd (current working directory) as project path
356
+ if metadata["project_path"] is None and 'cwd' in data:
357
+ metadata["project_path"] = data.get('cwd')
358
+
346
359
  # Get timestamp from first valid entry
347
360
  if first_timestamp is None and 'timestamp' in data:
348
361
  first_timestamp = data.get('timestamp')
@@ -368,7 +381,8 @@ def extract_metadata_single_pass(file_path: str) -> tuple[Dict[str, Any], str, i
368
381
  # Extract code for AST analysis with bounds checking
369
382
  if len(metadata['ast_elements']) < MAX_AST_ELEMENTS:
370
383
  # Fix: More permissive regex to handle various fence formats
371
- code_blocks = re.findall(r'```[^`]*?\n(.*?)```', item.get('text', ''), re.DOTALL)
384
+ # Handles both ```\n and ```python\n cases, with optional newline
385
+ code_blocks = re.findall(r'```[^`\n]*\n?(.*?)```', item.get('text', ''), re.DOTALL)
372
386
  for code_block in code_blocks[:MAX_CODE_BLOCKS]: # Use defined constant
373
387
  if len(metadata['ast_elements']) >= MAX_AST_ELEMENTS:
374
388
  break
@@ -376,7 +390,11 @@ def extract_metadata_single_pass(file_path: str) -> tuple[Dict[str, Any], str, i
376
390
  for elem in list(ast_elems)[:MAX_ELEMENTS_PER_BLOCK]: # Use defined constant
377
391
  if elem not in metadata['ast_elements'] and len(metadata['ast_elements']) < MAX_AST_ELEMENTS:
378
392
  metadata['ast_elements'].append(elem)
379
-
393
+
394
+ elif item.get('type') == 'thinking':
395
+ # Also include thinking content in metadata extraction
396
+ text_content += item.get('thinking', '')
397
+
380
398
  elif item.get('type') == 'tool_use':
381
399
  tool_name = item.get('name', '')
382
400
  if tool_name and tool_name not in metadata['tools_used']:
@@ -423,39 +441,77 @@ def extract_metadata_single_pass(file_path: str) -> tuple[Dict[str, Any], str, i
423
441
  if all_text:
424
442
  combined_text = ' '.join(all_text[:MAX_CONCEPT_MESSAGES]) # Limit messages for concept extraction
425
443
  metadata['concepts'] = extract_concepts(combined_text)
426
-
444
+
445
+ # MANDATORY: AST-GREP Pattern Analysis
446
+ # Analyze code quality for files mentioned in conversation
447
+ pattern_quality = {}
448
+ avg_quality_score = 0.0
449
+
450
+ try:
451
+ # Update patterns first (uses 24h cache, <100ms)
452
+ from update_patterns import check_and_update_patterns
453
+ check_and_update_patterns()
454
+
455
+ # Import analyzer
456
+ from ast_grep_final_analyzer import FinalASTGrepAnalyzer
457
+ analyzer = FinalASTGrepAnalyzer()
458
+
459
+ # Analyze edited and analyzed files
460
+ files_to_analyze = list(set(metadata['files_edited'] + metadata['files_analyzed'][:10]))
461
+ quality_scores = []
462
+
463
+ for file_path in files_to_analyze:
464
+ # Only analyze code files
465
+ if file_path and any(file_path.endswith(ext) for ext in ['.py', '.ts', '.js', '.tsx', '.jsx']):
466
+ try:
467
+ # Check if file exists and is accessible
468
+ if os.path.exists(file_path):
469
+ result = analyzer.analyze_file(file_path)
470
+ metrics = result['quality_metrics']
471
+ pattern_quality[file_path] = {
472
+ 'score': metrics['quality_score'],
473
+ 'good_patterns': metrics['good_patterns_found'],
474
+ 'bad_patterns': metrics['bad_patterns_found'],
475
+ 'issues': metrics['total_issues']
476
+ }
477
+ quality_scores.append(metrics['quality_score'])
478
+ except Exception as e:
479
+ logger.debug(f"Could not analyze {file_path}: {e}")
480
+
481
+ # Calculate average quality
482
+ if quality_scores:
483
+ avg_quality_score = sum(quality_scores) / len(quality_scores)
484
+
485
+ except Exception as e:
486
+ logger.debug(f"AST analysis not available: {e}")
487
+
488
+ # Add pattern analysis to metadata
489
+ metadata['pattern_analysis'] = pattern_quality
490
+ metadata['avg_quality_score'] = round(avg_quality_score, 3)
491
+
427
492
  # Set total messages
428
493
  metadata['total_messages'] = message_count
429
-
494
+
430
495
  # Limit arrays
431
496
  metadata['files_analyzed'] = metadata['files_analyzed'][:MAX_FILES_ANALYZED]
432
497
  metadata['files_edited'] = metadata['files_edited'][:MAX_FILES_EDITED]
433
498
  metadata['tools_used'] = metadata['tools_used'][:MAX_TOOLS_USED]
434
499
  metadata['ast_elements'] = metadata['ast_elements'][:MAX_AST_ELEMENTS]
435
-
500
+
436
501
  return metadata, first_timestamp or datetime.now().isoformat(), message_count
437
502
 
438
503
  def stream_import_file(jsonl_file: Path, collection_name: str, project_path: Path) -> int:
439
504
  """Stream import a single JSONL file without loading it into memory."""
440
505
  logger.info(f"Streaming import of {jsonl_file.name}")
441
-
442
- # Delete existing points for this conversation to prevent stale data
506
+
507
+ # Extract conversation ID
443
508
  conversation_id = jsonl_file.stem
444
- try:
445
- from qdrant_client.models import Filter, FieldCondition, MatchValue
446
- client.delete(
447
- collection_name=collection_name,
448
- points_selector=Filter(
449
- must=[FieldCondition(key="conversation_id", match=MatchValue(value=conversation_id))]
450
- ),
451
- wait=True
452
- )
453
- logger.info(f"Deleted existing points for conversation {conversation_id}")
454
- except Exception as e:
455
- logger.warning(f"Could not delete existing points for {conversation_id}: {e}")
456
-
509
+
457
510
  # Extract metadata in first pass (lightweight)
458
511
  metadata, created_at, total_messages = extract_metadata_single_pass(str(jsonl_file))
512
+
513
+ # Track whether we should delete old points (only after successful import)
514
+ should_delete_old = False
459
515
 
460
516
  # Reset counters for each conversation (critical for correct indexing)
461
517
  current_message_index = 0 # Must be reset before processing each conversation
@@ -493,6 +549,11 @@ def stream_import_file(jsonl_file: Path, collection_name: str, project_path: Pat
493
549
  item_type = item.get('type', '')
494
550
  if item_type == 'text':
495
551
  text_parts.append(item.get('text', ''))
552
+ elif item_type == 'thinking':
553
+ # Include thinking content (from Claude's thinking blocks)
554
+ thinking_content = item.get('thinking', '')
555
+ if thinking_content:
556
+ text_parts.append(f"[Thinking] {thinking_content[:1000]}") # Limit size
496
557
  elif item_type == 'tool_use':
497
558
  # Include tool use information
498
559
  tool_name = item.get('name', 'unknown')
@@ -594,10 +655,35 @@ def stream_import_file(jsonl_file: Path, collection_name: str, project_path: Pat
594
655
  created_at, metadata, collection_name, project_path, total_messages
595
656
  )
596
657
  total_chunks += chunks
597
-
658
+
659
+ # Only delete old points after successful import verification
660
+ if total_chunks > 0:
661
+ try:
662
+ from qdrant_client.models import Filter, FieldCondition, MatchValue
663
+ # Count old points before deletion for verification
664
+ old_count_filter = Filter(
665
+ must=[FieldCondition(key="conversation_id", match=MatchValue(value=conversation_id))]
666
+ )
667
+ old_points = client.scroll(
668
+ collection_name=collection_name,
669
+ scroll_filter=old_count_filter,
670
+ limit=1
671
+ )[0]
672
+
673
+ if len(old_points) > total_chunks + 5: # Allow some tolerance
674
+ # Only delete if we have significantly more old points than new
675
+ client.delete(
676
+ collection_name=collection_name,
677
+ points_selector=old_count_filter,
678
+ wait=True
679
+ )
680
+ logger.info(f"Deleted old points for conversation {conversation_id} after verifying new import")
681
+ except Exception as e:
682
+ logger.warning(f"Could not clean up old points for {conversation_id}: {e}")
683
+
598
684
  logger.info(f"Imported {total_chunks} chunks from {jsonl_file.name}")
599
685
  return total_chunks
600
-
686
+
601
687
  except Exception as e:
602
688
  logger.error(f"Failed to import {jsonl_file}: {e}")
603
689
  return 0