code-finder 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. claude_context/__init__.py +33 -0
  2. claude_context/agentic_integration.py +309 -0
  3. claude_context/ast_chunker.py +646 -0
  4. claude_context/config.py +239 -0
  5. claude_context/context_manager.py +627 -0
  6. claude_context/embeddings.py +307 -0
  7. claude_context/embeddings_interface.py +226 -0
  8. claude_context/enhanced_ast_chunker.py +1129 -0
  9. claude_context/explorer.py +951 -0
  10. claude_context/explorer_with_context.py +1008 -0
  11. claude_context/indexer.py +893 -0
  12. claude_context/markdown_chunker.py +421 -0
  13. claude_context/mode_handler.py +1774 -0
  14. claude_context/query_metrics.py +164 -0
  15. claude_context/question_generator.py +800 -0
  16. claude_context/readme_extractor.py +485 -0
  17. claude_context/repository_adapter.py +399 -0
  18. claude_context/search.py +493 -0
  19. claude_context/skills/__init__.py +11 -0
  20. claude_context/skills/_cli_common.py +74 -0
  21. claude_context/skills/_index_manager.py +98 -0
  22. claude_context/skills/api_surface.py +219 -0
  23. claude_context/skills/evidence_retrieval.py +151 -0
  24. claude_context/skills/grounded_review.py +212 -0
  25. claude_context/synthesis/__init__.py +8 -0
  26. claude_context/synthesis/editor_agent.py +391 -0
  27. claude_context/synthesis/llm_synthesizer.py +153 -0
  28. claude_context/synthesis/logic_explainer.py +235 -0
  29. claude_context/synthesis/multi_review_pipeline.py +717 -0
  30. claude_context/synthesis/prompt_builder.py +439 -0
  31. claude_context/synthesis/providers.py +115 -0
  32. claude_context/synthesis/validators.py +458 -0
  33. code_finder-0.1.0.dist-info/METADATA +823 -0
  34. code_finder-0.1.0.dist-info/RECORD +37 -0
  35. code_finder-0.1.0.dist-info/WHEEL +5 -0
  36. code_finder-0.1.0.dist-info/entry_points.txt +4 -0
  37. code_finder-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,485 @@
1
+ """
2
+ README Extraction Module
3
+
4
+ Extracts essential user-facing content from README files:
5
+ - Installation commands
6
+ - Quickstart code examples
7
+ - Authentication setup
8
+ - Configuration examples
9
+
10
+ This is the CRITICAL missing piece for evidence-first documentation.
11
+ """
12
+
13
+ import re
14
+ import logging
15
+ from typing import Dict, List, Optional, Tuple
16
+ from dataclasses import dataclass
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ @dataclass
22
+ class InstallationInfo:
23
+ """Structured installation information"""
24
+ command: str
25
+ requirements: List[str]
26
+ source: str = "README"
27
+
28
+
29
+ @dataclass
30
+ class CodeExample:
31
+ """Structured code example"""
32
+ code: str
33
+ language: str
34
+ context: str # What this example demonstrates
35
+ source: str = "README"
36
+
37
+
38
+ class ReadmeExtractor:
39
+ """
40
+ Extracts essential content from README files.
41
+
42
+ Uses markdown parsing + heuristics to find:
43
+ - Installation commands (pip, npm, cargo, etc.)
44
+ - Quickstart code examples
45
+ - Authentication setup
46
+ """
47
+
48
+ # Section headers that typically contain installation info
49
+ INSTALL_SECTION_PATTERNS = [
50
+ r"^#{1,3}\s*installation",
51
+ r"^#{1,3}\s*install",
52
+ r"^#{1,3}\s*getting\s+started",
53
+ r"^#{1,3}\s*quick\s+start",
54
+ r"^#{1,3}\s*setup",
55
+ ]
56
+
57
+ # Installation command patterns
58
+ INSTALL_COMMAND_PATTERNS = [
59
+ r"pip\s+install\s+[\w\-\[\]]+",
60
+ r"npm\s+install\s+[\w\-@/]+",
61
+ r"yarn\s+add\s+[\w\-@/]+",
62
+ r"cargo\s+install\s+[\w\-]+",
63
+ r"gem\s+install\s+[\w\-]+",
64
+ r"go\s+get\s+[\w\-./]+",
65
+ r"conda\s+install\s+[\w\-]+",
66
+ ]
67
+
68
+ # Quickstart section patterns
69
+ QUICKSTART_SECTION_PATTERNS = [
70
+ r"^#{1,3}\s*quick\s+start",
71
+ r"^#{1,3}\s*quickstart",
72
+ r"^#{1,3}\s*getting\s+started",
73
+ r"^#{1,3}\s*usage",
74
+ r"^#{1,3}\s*example",
75
+ ]
76
+
77
+ # Auth-related section patterns
78
+ AUTH_SECTION_PATTERNS = [
79
+ r"^#{1,3}\s*authentication",
80
+ r"^#{1,3}\s*auth",
81
+ r"^#{1,3}\s*api\s+key",
82
+ r"^#{1,3}\s*credentials",
83
+ ]
84
+
85
+ def __init__(self, readme_content: str):
86
+ """Initialize with README content"""
87
+ self.content = readme_content
88
+ self.lines = readme_content.split('\n')
89
+ logger.debug(f"ReadmeExtractor initialized with {len(self.lines)} lines")
90
+
91
+ def extract_installation_info(self) -> Optional[InstallationInfo]:
92
+ """
93
+ Extract installation command from README.
94
+
95
+ Returns:
96
+ InstallationInfo with command and requirements, or None if not found
97
+ """
98
+ # Find installation section
99
+ install_section = self._find_section(self.INSTALL_SECTION_PATTERNS)
100
+
101
+ if not install_section:
102
+ logger.warning("No installation section found in README")
103
+ return None
104
+
105
+ # Look for installation commands in this section
106
+ for line in install_section:
107
+ # Check code blocks first
108
+ if line.strip().startswith('```'):
109
+ continue
110
+
111
+ for pattern in self.INSTALL_COMMAND_PATTERNS:
112
+ match = re.search(pattern, line, re.IGNORECASE)
113
+ if match:
114
+ command = match.group(0)
115
+ logger.info(f"Found installation command: {command}")
116
+
117
+ # Extract requirements if mentioned
118
+ requirements = self._extract_requirements(install_section)
119
+
120
+ return InstallationInfo(
121
+ command=command,
122
+ requirements=requirements,
123
+ source="README"
124
+ )
125
+
126
+ logger.warning("Installation section found but no install command extracted")
127
+ return None
128
+
129
+ def extract_quickstart_example(self) -> Optional[CodeExample]:
130
+ """
131
+ Extract the first substantial code example from README.
132
+
133
+ Prioritizes:
134
+ 1. Code in "Quick Start" section
135
+ 2. Code in "Usage" section
136
+ 3. First Python/JS/etc code block (not bash/shell)
137
+
138
+ Returns:
139
+ CodeExample with code, language, and context
140
+ """
141
+ # Find quickstart/usage section
142
+ quickstart_section = self._find_section(self.QUICKSTART_SECTION_PATTERNS)
143
+
144
+ if quickstart_section:
145
+ code_example = self._extract_first_substantial_code(quickstart_section)
146
+ if code_example:
147
+ code_example.context = "Quickstart example from README"
148
+ logger.info(f"Found quickstart example: {len(code_example.code)} chars, language: {code_example.language}")
149
+ return code_example
150
+
151
+ # Fallback: Find first substantial code block (skip install commands)
152
+ code_example = self._extract_first_substantial_code(self.lines, skip_install=True)
153
+ if code_example:
154
+ code_example.context = "First code example from README"
155
+ logger.info(f"Found code example: {len(code_example.code)} chars")
156
+ return code_example
157
+
158
+ logger.warning("No code examples found in README")
159
+ return None
160
+
161
+ def extract_authentication_example(self) -> Optional[CodeExample]:
162
+ """
163
+ Extract authentication setup code from README.
164
+
165
+ Returns:
166
+ CodeExample showing how to authenticate, or None
167
+ """
168
+ # Find auth section
169
+ auth_section = self._find_section(self.AUTH_SECTION_PATTERNS)
170
+
171
+ if not auth_section:
172
+ # Look for auth keywords in any code block
173
+ for i, line in enumerate(self.lines):
174
+ if any(keyword in line.lower() for keyword in ['api_key', 'apikey', 'token', 'auth', 'credential']):
175
+ # Check surrounding lines for code block
176
+ context_lines = self.lines[max(0, i-10):min(len(self.lines), i+10)]
177
+ code = self._extract_first_code_block(context_lines)
178
+ if code:
179
+ code.context = "Authentication setup"
180
+ logger.info("Found authentication example via keyword search")
181
+ return code
182
+ return None
183
+
184
+ # Extract code from auth section
185
+ code_example = self._extract_first_code_block(auth_section)
186
+ if code_example:
187
+ code_example.context = "Authentication setup from README"
188
+ logger.info("Found authentication example in auth section")
189
+ return code_example
190
+
191
+ return None
192
+
193
+ def extract_all_code_examples(self, limit: int = 5) -> List[CodeExample]:
194
+ """
195
+ Extract all code examples from README.
196
+
197
+ Args:
198
+ limit: Maximum number of examples to extract
199
+
200
+ Returns:
201
+ List of CodeExample objects
202
+ """
203
+ examples = []
204
+ in_code_block = False
205
+ current_code = []
206
+ current_language = ""
207
+ block_start_line = 0
208
+
209
+ for i, line in enumerate(self.lines):
210
+ # Detect code block start
211
+ if line.strip().startswith('```'):
212
+ if not in_code_block:
213
+ # Starting a code block
214
+ in_code_block = True
215
+ current_code = []
216
+ # Extract language
217
+ lang_match = line.strip()[3:].strip()
218
+ current_language = lang_match if lang_match else "text"
219
+ block_start_line = i
220
+ else:
221
+ # Ending a code block
222
+ in_code_block = False
223
+ if current_code and len('\n'.join(current_code)) > 20: # Substantial code
224
+ # Get context from preceding lines
225
+ context = self._get_context_before_line(block_start_line)
226
+
227
+ examples.append(CodeExample(
228
+ code='\n'.join(current_code),
229
+ language=current_language,
230
+ context=context,
231
+ source="README"
232
+ ))
233
+
234
+ if len(examples) >= limit:
235
+ break
236
+ elif in_code_block:
237
+ current_code.append(line)
238
+
239
+ logger.info(f"Extracted {len(examples)} code examples from README")
240
+ return examples
241
+
242
+ def _find_section(self, header_patterns: List[str]) -> Optional[List[str]]:
243
+ """
244
+ Find a section by header patterns.
245
+
246
+ Returns:
247
+ List of lines in the section, or None if not found
248
+ """
249
+ best_match = None
250
+ best_match_level = 0
251
+ best_match_index = -1
252
+
253
+ # First pass: find all matching sections and pick the most specific one
254
+ for i, line in enumerate(self.lines):
255
+ stripped = line.strip()
256
+ for pattern in header_patterns:
257
+ if re.match(pattern, stripped, re.IGNORECASE):
258
+ # Count the header level (number of #)
259
+ level_match = re.match(r'^(#{1,6})\s+', stripped)
260
+ level = len(level_match.group(1)) if level_match else 0
261
+
262
+ # Prefer more specific (deeper) sections for install commands
263
+ # e.g., "### Installation" (level 3) over "## Getting Started" (level 2)
264
+ if level > best_match_level:
265
+ best_match = (i, pattern, level)
266
+ best_match_level = level
267
+ best_match_index = i
268
+
269
+ if best_match is None:
270
+ return None
271
+
272
+ i, pattern, matched_level = best_match
273
+
274
+ # Extract section content - stop at same or higher level header
275
+ section_lines = []
276
+ for j in range(i + 1, len(self.lines)):
277
+ next_line = self.lines[j]
278
+ # Check if this is a header
279
+ header_match = re.match(r'^(#{1,6})\s+', next_line)
280
+ if header_match:
281
+ next_level = len(header_match.group(1))
282
+ # Stop at same or higher level (fewer or equal #)
283
+ if next_level <= matched_level:
284
+ break
285
+ # Include subsections (more # than matched level)
286
+ section_lines.append(next_line)
287
+
288
+ logger.debug(f"Found section matching '{pattern}' at level {matched_level}: {len(section_lines)} lines")
289
+ return section_lines
290
+
291
+ def _extract_first_code_block(self, lines: List[str]) -> Optional[CodeExample]:
292
+ """Extract the first code block from lines (any language)"""
293
+ in_code_block = False
294
+ current_code = []
295
+ current_language = ""
296
+
297
+ for line in lines:
298
+ if line.strip().startswith('```'):
299
+ if not in_code_block:
300
+ in_code_block = True
301
+ current_code = []
302
+ lang_match = line.strip()[3:].strip()
303
+ current_language = lang_match if lang_match else "text"
304
+ else:
305
+ # End of code block
306
+ if current_code and len('\n'.join(current_code)) > 20:
307
+ return CodeExample(
308
+ code='\n'.join(current_code),
309
+ language=current_language,
310
+ context="",
311
+ source="README"
312
+ )
313
+ in_code_block = False
314
+ elif in_code_block:
315
+ current_code.append(line)
316
+
317
+ return None
318
+
319
+ def _extract_first_substantial_code(self, lines: List[str], skip_install: bool = False) -> Optional[CodeExample]:
320
+ """
321
+ Extract first substantial code block, preferring actual code over install commands.
322
+
323
+ Args:
324
+ lines: Lines to search
325
+ skip_install: If True, skip bash/shell blocks with install commands
326
+
327
+ Returns:
328
+ CodeExample with substantial code
329
+ """
330
+ in_code_block = False
331
+ current_code = []
332
+ current_language = ""
333
+
334
+ for line in lines:
335
+ if line.strip().startswith('```'):
336
+ if not in_code_block:
337
+ in_code_block = True
338
+ current_code = []
339
+ lang_match = line.strip()[3:].strip()
340
+ current_language = lang_match if lang_match else "text"
341
+ else:
342
+ # End of code block
343
+ if current_code:
344
+ code_text = '\n'.join(current_code)
345
+
346
+ # Skip install commands if requested
347
+ if skip_install:
348
+ if current_language in ['bash', 'sh', 'shell', 'console']:
349
+ # Check if this is just an install command
350
+ if any(cmd in code_text for cmd in ['pip install', 'npm install', 'yarn add']):
351
+ if len(code_text) < 100: # Short install-only block
352
+ in_code_block = False
353
+ continue
354
+
355
+ # Return if substantial (>30 chars for quickstart)
356
+ if len(code_text) > 30:
357
+ return CodeExample(
358
+ code=code_text,
359
+ language=current_language,
360
+ context="",
361
+ source="README"
362
+ )
363
+
364
+ in_code_block = False
365
+ elif in_code_block:
366
+ current_code.append(line)
367
+
368
+ return None
369
+
370
+ def _extract_requirements(self, section_lines: List[str]) -> List[str]:
371
+ """Extract requirements from installation section"""
372
+ requirements = []
373
+
374
+ for line in section_lines:
375
+ # Look for Python version patterns (handles markdown formatting like **Python:** 3.10)
376
+ # Matches: "Python 3.10", "Python: 3.10", "**Python:** 3.10", "python >= 3.8"
377
+ python_patterns = [
378
+ r'python[:\s*]+(\d+\.\d+)(?:\s*(?:or\s+)?(?:higher|\+|>=))?',
379
+ r'python\s*>=?\s*(\d+\.\d+)',
380
+ r'python\s+(\d+\.\d+)[\+\w]*',
381
+ ]
382
+ for pattern in python_patterns:
383
+ match = re.search(pattern, line, re.IGNORECASE)
384
+ if match:
385
+ version = match.group(1)
386
+ req = f"Python {version}+"
387
+ if req not in requirements:
388
+ requirements.append(req)
389
+ break
390
+
391
+ # Look for Node.js version patterns
392
+ node_patterns = [
393
+ r'node[:\s*]+(\d+(?:\.\d+)?)',
394
+ r'node\.?js[:\s*]+(\d+(?:\.\d+)?)',
395
+ r'node\s*>=?\s*(\d+(?:\.\d+)?)',
396
+ ]
397
+ for pattern in node_patterns:
398
+ match = re.search(pattern, line, re.IGNORECASE)
399
+ if match:
400
+ version = match.group(1)
401
+ req = f"Node.js {version}+"
402
+ if req not in requirements:
403
+ requirements.append(req)
404
+ break
405
+
406
+ return requirements
407
+
408
+ def _get_context_before_line(self, line_num: int, lookback: int = 3) -> str:
409
+ """Get context from lines before a code block"""
410
+ start = max(0, line_num - lookback)
411
+ context_lines = []
412
+
413
+ for i in range(start, line_num):
414
+ line = self.lines[i].strip()
415
+ # Skip empty lines and headers
416
+ if line and not line.startswith('#'):
417
+ context_lines.append(line)
418
+
419
+ return ' '.join(context_lines) if context_lines else "Code example"
420
+
421
+
422
+ def extract_from_readme(readme_content: str) -> Dict:
423
+ """
424
+ Main entry point for README extraction.
425
+
426
+ Returns structured evidence dictionary with:
427
+ - installation: InstallationInfo or None
428
+ - quickstart: CodeExample or None
429
+ - authentication: CodeExample or None
430
+ - examples: List[CodeExample]
431
+ """
432
+ extractor = ReadmeExtractor(readme_content)
433
+
434
+ return {
435
+ "installation": extractor.extract_installation_info(),
436
+ "quickstart": extractor.extract_quickstart_example(),
437
+ "authentication": extractor.extract_authentication_example(),
438
+ "examples": extractor.extract_all_code_examples(limit=3)
439
+ }
440
+
441
+
442
+ # Example usage
443
+ if __name__ == "__main__":
444
+ # Test with sample README
445
+ sample_readme = """
446
+ # My Project
447
+
448
+ A cool project that does things.
449
+
450
+ ## Installation
451
+
452
+ ```bash
453
+ pip install my-project
454
+ ```
455
+
456
+ Requirements:
457
+ - Python 3.8+
458
+ - NumPy
459
+
460
+ ## Quick Start
461
+
462
+ ```python
463
+ from my_project import Client
464
+
465
+ client = Client(api_key="your-key")
466
+ result = client.do_something()
467
+ print(result)
468
+ ```
469
+
470
+ ## Authentication
471
+
472
+ Set your API key:
473
+
474
+ ```python
475
+ import os
476
+ os.environ["API_KEY"] = "your-key-here"
477
+ ```
478
+ """
479
+
480
+ result = extract_from_readme(sample_readme)
481
+ print("Extraction Results:")
482
+ print(f"Installation: {result['installation']}")
483
+ print(f"Quickstart: {result['quickstart']}")
484
+ print(f"Auth: {result['authentication']}")
485
+ print(f"Examples: {len(result['examples'])} found")