code-finder 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. claude_context/__init__.py +33 -0
  2. claude_context/agentic_integration.py +309 -0
  3. claude_context/ast_chunker.py +646 -0
  4. claude_context/config.py +239 -0
  5. claude_context/context_manager.py +627 -0
  6. claude_context/embeddings.py +307 -0
  7. claude_context/embeddings_interface.py +226 -0
  8. claude_context/enhanced_ast_chunker.py +1129 -0
  9. claude_context/explorer.py +951 -0
  10. claude_context/explorer_with_context.py +1008 -0
  11. claude_context/indexer.py +893 -0
  12. claude_context/markdown_chunker.py +421 -0
  13. claude_context/mode_handler.py +1774 -0
  14. claude_context/query_metrics.py +164 -0
  15. claude_context/question_generator.py +800 -0
  16. claude_context/readme_extractor.py +485 -0
  17. claude_context/repository_adapter.py +399 -0
  18. claude_context/search.py +493 -0
  19. claude_context/skills/__init__.py +11 -0
  20. claude_context/skills/_cli_common.py +74 -0
  21. claude_context/skills/_index_manager.py +98 -0
  22. claude_context/skills/api_surface.py +219 -0
  23. claude_context/skills/evidence_retrieval.py +151 -0
  24. claude_context/skills/grounded_review.py +212 -0
  25. claude_context/synthesis/__init__.py +8 -0
  26. claude_context/synthesis/editor_agent.py +391 -0
  27. claude_context/synthesis/llm_synthesizer.py +153 -0
  28. claude_context/synthesis/logic_explainer.py +235 -0
  29. claude_context/synthesis/multi_review_pipeline.py +717 -0
  30. claude_context/synthesis/prompt_builder.py +439 -0
  31. claude_context/synthesis/providers.py +115 -0
  32. claude_context/synthesis/validators.py +458 -0
  33. code_finder-0.1.0.dist-info/METADATA +823 -0
  34. code_finder-0.1.0.dist-info/RECORD +37 -0
  35. code_finder-0.1.0.dist-info/WHEEL +5 -0
  36. code_finder-0.1.0.dist-info/entry_points.txt +4 -0
  37. code_finder-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,627 @@
1
+ """
2
+ External Context Manager for Claude Context
3
+
4
+ Manages external documentation, requirements, tickets, and other context
5
+ that helps explain the "why" behind code decisions.
6
+ """
7
+
8
+ import logging
9
+ import json
10
+ import re
11
+ from datetime import datetime
12
+ from pathlib import Path
13
+ from typing import List, Dict, Any, Optional, Tuple
14
+ from dataclasses import dataclass, field
15
+ import hashlib
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ @dataclass
21
+ class ContextItem:
22
+ """Represents a single piece of external context"""
23
+ id: str
24
+ type: str # requirements, ticket, design_doc, decision, custom
25
+ content: str
26
+ metadata: Dict[str, Any] = field(default_factory=dict)
27
+ source: str = "" # file path, URL, or API source
28
+ added_at: datetime = field(default_factory=datetime.now)
29
+ embedding: Optional[Any] = None # For future semantic search
30
+
31
+ def get_summary(self, max_length: int = 100) -> str:
32
+ """Get a brief summary of the context"""
33
+ if 'title' in self.metadata:
34
+ return self.metadata['title']
35
+ # Return first line or N characters
36
+ first_line = self.content.split('\n')[0]
37
+ if len(first_line) > max_length:
38
+ return first_line[:max_length] + "..."
39
+ return first_line
40
+
41
+
42
+ class ExternalContextManager:
43
+ """
44
+ Manages all external context (requirements, tickets, docs, etc.)
45
+ Follows fail-fast principles with clear logging.
46
+ """
47
+
48
+ def __init__(self):
49
+ """Initialize the context manager"""
50
+ self.contexts = {
51
+ "requirements": [],
52
+ "tickets": [],
53
+ "design_docs": [],
54
+ "decisions": [],
55
+ "custom": []
56
+ }
57
+ self.context_index = {} # Quick lookup by ID
58
+ self.total_items = 0
59
+ logger.info("ExternalContextManager initialized")
60
+
61
+ def add_context(
62
+ self,
63
+ context_type: str,
64
+ content: str,
65
+ metadata: Optional[Dict[str, Any]] = None,
66
+ source: str = ""
67
+ ) -> str:
68
+ """
69
+ Add a piece of external context.
70
+
71
+ Args:
72
+ context_type: Type of context (requirements, tickets, etc.)
73
+ content: The actual content text
74
+ metadata: Additional metadata (title, ticket_id, etc.)
75
+ source: Where this came from (file path, URL, etc.)
76
+
77
+ Returns:
78
+ Context ID for reference
79
+
80
+ Raises:
81
+ ValueError: If context_type is invalid or content is empty
82
+ """
83
+ # Validate inputs
84
+ if context_type not in self.contexts:
85
+ logger.error(f"Invalid context type: {context_type}")
86
+ raise ValueError(f"Context type must be one of: {list(self.contexts.keys())}")
87
+
88
+ if not content or not content.strip():
89
+ logger.error("Attempted to add empty context")
90
+ raise ValueError("Context content cannot be empty")
91
+
92
+ # Generate unique ID
93
+ content_hash = hashlib.md5(content.encode()).hexdigest()[:8]
94
+ context_id = f"{context_type}_{self.total_items}_{content_hash}"
95
+
96
+ # Create context item
97
+ context_item = ContextItem(
98
+ id=context_id,
99
+ type=context_type,
100
+ content=content,
101
+ metadata=metadata or {},
102
+ source=source
103
+ )
104
+
105
+ # Store in both structures
106
+ self.contexts[context_type].append(context_item)
107
+ self.context_index[context_id] = context_item
108
+ self.total_items += 1
109
+
110
+ logger.info(f"Added {context_type}: {context_item.get_summary()}")
111
+ logger.debug(f"Context ID: {context_id}, Source: {source}")
112
+
113
+ return context_id
114
+
115
+ def add_file(self, file_path: str, context_type: str = "custom") -> str:
116
+ """
117
+ Add context from a file.
118
+
119
+ Args:
120
+ file_path: Path to the file
121
+ context_type: Type of context
122
+
123
+ Returns:
124
+ Context ID
125
+
126
+ Raises:
127
+ FileNotFoundError: If file doesn't exist
128
+ ValueError: If file is empty
129
+ """
130
+ path = Path(file_path)
131
+ if not path.exists():
132
+ logger.error(f"File not found: {file_path}")
133
+ raise FileNotFoundError(f"File not found: {file_path}")
134
+
135
+ try:
136
+ content = path.read_text(encoding='utf-8')
137
+ except Exception as e:
138
+ logger.error(f"Failed to read file {file_path}: {e}")
139
+ raise RuntimeError(f"Failed to read file: {e}") from e
140
+
141
+ metadata = {
142
+ "filename": path.name,
143
+ "file_path": str(path.absolute()),
144
+ "file_size": path.stat().st_size
145
+ }
146
+
147
+ # Try to extract title from markdown
148
+ if path.suffix == '.md':
149
+ lines = content.split('\n')
150
+ for line in lines:
151
+ if line.startswith('# '):
152
+ metadata['title'] = line[2:].strip()
153
+ break
154
+
155
+ return self.add_context(
156
+ context_type=context_type,
157
+ content=content,
158
+ metadata=metadata,
159
+ source=str(path)
160
+ )
161
+
162
+ def add_jira_ticket(
163
+ self,
164
+ ticket_id: str,
165
+ summary: str,
166
+ description: str,
167
+ additional_fields: Optional[Dict] = None
168
+ ) -> str:
169
+ """
170
+ Add a Jira ticket as context.
171
+
172
+ Args:
173
+ ticket_id: Jira ticket ID (e.g., PROJ-123)
174
+ summary: Ticket summary/title
175
+ description: Full ticket description
176
+ additional_fields: Other Jira fields (priority, components, etc.)
177
+
178
+ Returns:
179
+ Context ID
180
+ """
181
+ # Format ticket content
182
+ content = f"""Jira Ticket: {ticket_id}
183
+ Summary: {summary}
184
+
185
+ Description:
186
+ {description}"""
187
+
188
+ if additional_fields:
189
+ if 'acceptance_criteria' in additional_fields:
190
+ content += f"\n\nAcceptance Criteria:\n{additional_fields['acceptance_criteria']}"
191
+ if 'components' in additional_fields:
192
+ content += f"\n\nComponents: {', '.join(additional_fields['components'])}"
193
+
194
+ metadata = {
195
+ "ticket_id": ticket_id,
196
+ "title": summary,
197
+ "type": "jira",
198
+ **additional_fields
199
+ } if additional_fields else {
200
+ "ticket_id": ticket_id,
201
+ "title": summary,
202
+ "type": "jira"
203
+ }
204
+
205
+ return self.add_context(
206
+ context_type="tickets",
207
+ content=content,
208
+ metadata=metadata,
209
+ source=f"jira:{ticket_id}"
210
+ )
211
+
212
+ def search_context(
213
+ self,
214
+ query: str,
215
+ context_types: Optional[List[str]] = None,
216
+ limit: int = 5
217
+ ) -> List[Tuple[ContextItem, float]]:
218
+ """
219
+ Search external context for relevant information.
220
+
221
+ Currently uses simple keyword matching. Future versions
222
+ will use semantic search with embeddings.
223
+
224
+ Args:
225
+ query: Search query
226
+ context_types: Types to search (None = all)
227
+ limit: Maximum results to return
228
+
229
+ Returns:
230
+ List of (ContextItem, relevance_score) tuples
231
+ """
232
+ results = []
233
+ types_to_search = context_types or list(self.contexts.keys())
234
+
235
+ query_lower = query.lower()
236
+ query_words = set(query_lower.split())
237
+
238
+ for context_type in types_to_search:
239
+ for item in self.contexts[context_type]:
240
+ # Simple relevance scoring
241
+ content_lower = item.content.lower()
242
+
243
+ # Exact match scores highest
244
+ if query_lower in content_lower:
245
+ score = 1.0
246
+ else:
247
+ # Word overlap scoring
248
+ content_words = set(content_lower.split())
249
+ overlap = len(query_words & content_words)
250
+ score = overlap / len(query_words) if query_words else 0
251
+
252
+ # Boost score if query matches metadata
253
+ if item.metadata:
254
+ if 'title' in item.metadata and query_lower in item.metadata['title'].lower():
255
+ score += 0.5
256
+ if 'ticket_id' in item.metadata and query_lower in item.metadata['ticket_id'].lower():
257
+ score += 0.3
258
+
259
+ if score > 0:
260
+ results.append((item, score))
261
+
262
+ # Sort by relevance and limit
263
+ results.sort(key=lambda x: x[1], reverse=True)
264
+ return results[:limit]
265
+
266
+ def get_context_by_id(self, context_id: str) -> Optional[ContextItem]:
267
+ """Get a specific context item by ID"""
268
+ return self.context_index.get(context_id)
269
+
270
+ def get_all_context(self, context_type: Optional[str] = None) -> List[ContextItem]:
271
+ """
272
+ Get all context items, optionally filtered by type.
273
+
274
+ Args:
275
+ context_type: Type to filter by (None = all)
276
+
277
+ Returns:
278
+ List of context items
279
+ """
280
+ if context_type:
281
+ if context_type not in self.contexts:
282
+ logger.warning(f"Unknown context type: {context_type}")
283
+ return []
284
+ return self.contexts[context_type].copy()
285
+
286
+ # Return all contexts
287
+ all_items = []
288
+ for items in self.contexts.values():
289
+ all_items.extend(items)
290
+ return all_items
291
+
292
+ def get_summary(self) -> Dict[str, Any]:
293
+ """Get a summary of all loaded context"""
294
+ summary = {
295
+ "total_items": self.total_items,
296
+ "by_type": {}
297
+ }
298
+
299
+ for context_type, items in self.contexts.items():
300
+ if items:
301
+ summary["by_type"][context_type] = {
302
+ "count": len(items),
303
+ "items": [item.get_summary() for item in items[:3]] # First 3
304
+ }
305
+
306
+ return summary
307
+
308
+ def clear(self, context_type: Optional[str] = None):
309
+ """
310
+ Clear context, optionally by type.
311
+
312
+ Args:
313
+ context_type: Type to clear (None = all)
314
+ """
315
+ if context_type:
316
+ if context_type in self.contexts:
317
+ # Remove from index
318
+ for item in self.contexts[context_type]:
319
+ del self.context_index[item.id]
320
+ # Clear the list
321
+ self.contexts[context_type] = []
322
+ logger.info(f"Cleared {context_type} context")
323
+ else:
324
+ # Clear everything
325
+ for key in self.contexts:
326
+ self.contexts[key] = []
327
+ self.context_index = {}
328
+ self.total_items = 0
329
+ logger.info("Cleared all context")
330
+
331
+ def save_to_file(self, file_path: str):
332
+ """
333
+ Save context to a JSON file for later reuse.
334
+
335
+ Args:
336
+ file_path: Path to save to
337
+ """
338
+ data = {
339
+ "saved_at": datetime.now().isoformat(),
340
+ "total_items": self.total_items,
341
+ "contexts": {}
342
+ }
343
+
344
+ for context_type, items in self.contexts.items():
345
+ data["contexts"][context_type] = [
346
+ {
347
+ "id": item.id,
348
+ "content": item.content,
349
+ "metadata": item.metadata,
350
+ "source": item.source,
351
+ "added_at": item.added_at.isoformat()
352
+ }
353
+ for item in items
354
+ ]
355
+
356
+ path = Path(file_path)
357
+ path.parent.mkdir(parents=True, exist_ok=True)
358
+
359
+ with open(path, 'w') as f:
360
+ json.dump(data, f, indent=2)
361
+
362
+ logger.info(f"Saved context to {file_path}")
363
+
364
+ def load_from_file(self, file_path: str):
365
+ """
366
+ Load context from a JSON file.
367
+
368
+ Args:
369
+ file_path: Path to load from
370
+
371
+ Raises:
372
+ FileNotFoundError: If file doesn't exist
373
+ ValueError: If file format is invalid
374
+ """
375
+ path = Path(file_path)
376
+ if not path.exists():
377
+ raise FileNotFoundError(f"Context file not found: {file_path}")
378
+
379
+ try:
380
+ with open(path) as f:
381
+ data = json.load(f)
382
+ except json.JSONDecodeError as e:
383
+ raise ValueError(f"Invalid JSON in context file: {e}") from e
384
+
385
+ # Clear existing context
386
+ self.clear()
387
+
388
+ # Load contexts
389
+ for context_type, items in data.get("contexts", {}).items():
390
+ if context_type not in self.contexts:
391
+ logger.warning(f"Unknown context type in file: {context_type}")
392
+ continue
393
+
394
+ for item_data in items:
395
+ self.add_context(
396
+ context_type=context_type,
397
+ content=item_data["content"],
398
+ metadata=item_data.get("metadata", {}),
399
+ source=item_data.get("source", "")
400
+ )
401
+
402
+ logger.info(f"Loaded {self.total_items} context items from {file_path}")
403
+
404
+ def extract_code_blocks(self, source_filter: Optional[str] = None) -> List[Dict[str, str]]:
405
+ """
406
+ Extract all code blocks from context items (especially README).
407
+
408
+ Useful for getting exact installation commands, usage examples, etc.
409
+
410
+ Args:
411
+ source_filter: Optional filter (e.g., "readme" to only get README code)
412
+
413
+ Returns:
414
+ List of {"language": str, "code": str, "source": str, "context": str}
415
+ """
416
+ code_blocks = []
417
+ items = self.get_all_context()
418
+
419
+ for item in items:
420
+ # Apply source filter
421
+ if source_filter and source_filter.lower() not in item.source.lower():
422
+ continue
423
+
424
+ # Extract markdown code blocks using regex
425
+ # Pattern: ```language\ncode\n```
426
+ pattern = r'```(\w+)?\n(.*?)\n```'
427
+ matches = re.findall(pattern, item.content, re.DOTALL)
428
+
429
+ for lang, code in matches:
430
+ # Get context around the code block (preceding heading/text)
431
+ context_text = self._get_code_block_context(item.content, code)
432
+
433
+ code_blocks.append({
434
+ "language": lang or "text",
435
+ "code": code.strip(),
436
+ "source": item.source,
437
+ "context": context_text,
438
+ "length": len(code.strip().split('\n'))
439
+ })
440
+
441
+ logger.debug(f"Extracted {len(code_blocks)} code blocks")
442
+ return code_blocks
443
+
444
+ def _get_code_block_context(self, full_content: str, code_snippet: str, lines_before: int = 3) -> str:
445
+ """Get text context before a code block (usually a heading or description)."""
446
+ try:
447
+ # Find where the code block appears
448
+ idx = full_content.find(code_snippet)
449
+ if idx == -1:
450
+ return ""
451
+
452
+ # Get text before the code block
453
+ text_before = full_content[:idx]
454
+ lines = text_before.split('\n')
455
+
456
+ # Get last heading or last few lines
457
+ context_lines = []
458
+ for line in reversed(lines[-lines_before:]):
459
+ if line.strip():
460
+ context_lines.insert(0, line.strip())
461
+ # Stop at heading
462
+ if line.strip().startswith('#'):
463
+ break
464
+
465
+ return ' '.join(context_lines)
466
+ except Exception:
467
+ return ""
468
+
469
+ def extract_installation_info(self) -> Dict[str, Any]:
470
+ """
471
+ Extract installation information from README and config files.
472
+
473
+ Returns:
474
+ {
475
+ "commands": ["llama-stack-client", ...],
476
+ "package_managers": ["pip", ...],
477
+ "requirements": ["Python 3.7+", ...],
478
+ "examples": [{"command": "pip install X", "source": "README"}]
479
+ }
480
+ """
481
+ install_info = {
482
+ "commands": [],
483
+ "package_managers": [],
484
+ "requirements": [],
485
+ "examples": []
486
+ }
487
+
488
+ items = self.get_all_context()
489
+
490
+ for item in items:
491
+ content = item.content
492
+
493
+ # Find pip install commands
494
+ pip_matches = re.findall(r'pip install ([\w\-\[\]\.]+)', content, re.IGNORECASE)
495
+ for match in pip_matches:
496
+ install_info["commands"].append(match)
497
+ install_info["package_managers"].append("pip")
498
+ install_info["examples"].append({
499
+ "command": f"pip install {match}",
500
+ "source": item.source,
501
+ "type": "pip"
502
+ })
503
+
504
+ # Find npm install commands
505
+ npm_matches = re.findall(r'npm install ([\w\-@/\.]+)', content, re.IGNORECASE)
506
+ for match in npm_matches:
507
+ install_info["commands"].append(match)
508
+ install_info["package_managers"].append("npm")
509
+ install_info["examples"].append({
510
+ "command": f"npm install {match}",
511
+ "source": item.source,
512
+ "type": "npm"
513
+ })
514
+
515
+ # Find Python version requirements
516
+ py_version = re.findall(r'Python (\d+\.\d+)\+', content, re.IGNORECASE)
517
+ if py_version:
518
+ for version in py_version:
519
+ req = f"Python {version}+"
520
+ if req not in install_info["requirements"]:
521
+ install_info["requirements"].append(req)
522
+
523
+ # Find Node version requirements
524
+ node_version = re.findall(r'Node(?:\.js)? (\d+\.\d+)\+?', content, re.IGNORECASE)
525
+ if node_version:
526
+ for version in node_version:
527
+ req = f"Node.js {version}+"
528
+ if req not in install_info["requirements"]:
529
+ install_info["requirements"].append(req)
530
+
531
+ # Deduplicate
532
+ install_info["commands"] = list(set(install_info["commands"]))
533
+ install_info["package_managers"] = list(set(install_info["package_managers"]))
534
+
535
+ logger.debug(f"Extracted installation info: {len(install_info['commands'])} packages")
536
+ return install_info
537
+
538
+ def extract_quickstart_example(self) -> Optional[Dict[str, str]]:
539
+ """
540
+ Extract the main quickstart/usage example from README.
541
+
542
+ Looks for the first substantial code block under Usage, Quickstart,
543
+ or Example sections.
544
+
545
+ Returns:
546
+ {"language": str, "code": str, "section": str, "source": str} or None
547
+ """
548
+ items = self.get_all_context()
549
+
550
+ for item in items:
551
+ # Prioritize README files
552
+ if "readme" not in item.source.lower():
553
+ continue
554
+
555
+ # Split into sections
556
+ content = item.content
557
+
558
+ # Look for Usage, Quickstart, or Example sections
559
+ section_pattern = r'##\s+(Usage|Quickstart|Quick Start|Example|Getting Started)\s*\n(.*?)(?=\n##|\Z)'
560
+ sections = re.findall(section_pattern, content, re.DOTALL | re.IGNORECASE)
561
+
562
+ for section_name, section_content in sections:
563
+ # Find code blocks in this section
564
+ code_pattern = r'```(\w+)?\n(.*?)\n```'
565
+ code_matches = re.findall(code_pattern, section_content, re.DOTALL)
566
+
567
+ if code_matches:
568
+ # Return the first substantial code block (> 2 lines)
569
+ for lang, code in code_matches:
570
+ if len(code.strip().split('\n')) > 2:
571
+ return {
572
+ "language": lang or "text",
573
+ "code": code.strip(),
574
+ "section": section_name,
575
+ "source": item.source
576
+ }
577
+
578
+ logger.debug("No quickstart example found in README")
579
+ return None
580
+
581
+ def has_type(self, context_type: str) -> bool:
582
+ """Check if context manager has items of a specific type."""
583
+ return len(self.contexts.get(context_type, [])) > 0
584
+
585
+
586
+ if __name__ == "__main__":
587
+ # Test the context manager
588
+ print("Testing ExternalContextManager")
589
+ print("=" * 50)
590
+
591
+ manager = ExternalContextManager()
592
+
593
+ # Add a requirement
594
+ req_id = manager.add_context(
595
+ context_type="requirements",
596
+ content="The system must support horizontal scaling with Redis-based sessions",
597
+ metadata={"title": "Scaling Requirements"},
598
+ source="requirements.md"
599
+ )
600
+ print(f"Added requirement: {req_id}")
601
+
602
+ # Add a Jira ticket
603
+ ticket_id = manager.add_jira_ticket(
604
+ ticket_id="PROJ-123",
605
+ summary="Implement distributed session management",
606
+ description="We need to support multiple server instances",
607
+ additional_fields={
608
+ "acceptance_criteria": "Sessions work across servers",
609
+ "priority": "High"
610
+ }
611
+ )
612
+ print(f"Added ticket: {ticket_id}")
613
+
614
+ # Search context
615
+ print("\nSearching for 'Redis':")
616
+ results = manager.search_context("Redis")
617
+ for item, score in results:
618
+ print(f" - {item.get_summary()} (score: {score:.2f})")
619
+
620
+ # Get summary
621
+ print("\nContext Summary:")
622
+ summary = manager.get_summary()
623
+ print(f"Total items: {summary['total_items']}")
624
+ for type_name, info in summary["by_type"].items():
625
+ print(f" {type_name}: {info['count']} items")
626
+
627
+ print("\n✅ Context manager test complete!")