code-finder 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. claude_context/__init__.py +33 -0
  2. claude_context/agentic_integration.py +309 -0
  3. claude_context/ast_chunker.py +646 -0
  4. claude_context/config.py +239 -0
  5. claude_context/context_manager.py +627 -0
  6. claude_context/embeddings.py +307 -0
  7. claude_context/embeddings_interface.py +226 -0
  8. claude_context/enhanced_ast_chunker.py +1129 -0
  9. claude_context/explorer.py +951 -0
  10. claude_context/explorer_with_context.py +1008 -0
  11. claude_context/indexer.py +893 -0
  12. claude_context/markdown_chunker.py +421 -0
  13. claude_context/mode_handler.py +1774 -0
  14. claude_context/query_metrics.py +164 -0
  15. claude_context/question_generator.py +800 -0
  16. claude_context/readme_extractor.py +485 -0
  17. claude_context/repository_adapter.py +399 -0
  18. claude_context/search.py +493 -0
  19. claude_context/skills/__init__.py +11 -0
  20. claude_context/skills/_cli_common.py +74 -0
  21. claude_context/skills/_index_manager.py +98 -0
  22. claude_context/skills/api_surface.py +219 -0
  23. claude_context/skills/evidence_retrieval.py +151 -0
  24. claude_context/skills/grounded_review.py +212 -0
  25. claude_context/synthesis/__init__.py +8 -0
  26. claude_context/synthesis/editor_agent.py +391 -0
  27. claude_context/synthesis/llm_synthesizer.py +153 -0
  28. claude_context/synthesis/logic_explainer.py +235 -0
  29. claude_context/synthesis/multi_review_pipeline.py +717 -0
  30. claude_context/synthesis/prompt_builder.py +439 -0
  31. claude_context/synthesis/providers.py +115 -0
  32. claude_context/synthesis/validators.py +458 -0
  33. code_finder-0.1.0.dist-info/METADATA +823 -0
  34. code_finder-0.1.0.dist-info/RECORD +37 -0
  35. code_finder-0.1.0.dist-info/WHEEL +5 -0
  36. code_finder-0.1.0.dist-info/entry_points.txt +4 -0
  37. code_finder-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,439 @@
1
+ from typing import List, Dict, Any, Optional
2
+
3
+
4
+ def _format_docstring_evidence_block(docstring_evidence: List[Dict]) -> str:
5
+ """
6
+ Format extracted docstrings as high-priority evidence (TIER 2.5).
7
+
8
+ Docstrings represent the author's documented intent and often contain:
9
+ - Parameter descriptions with types and defaults
10
+ - Algorithm explanations
11
+ - Academic paper references (arXiv, etc.)
12
+ - Trade-off discussions
13
+ """
14
+ if not docstring_evidence:
15
+ return ""
16
+
17
+ lines = ["=" * 70]
18
+ lines.append("CODE DOCSTRINGS & DESCRIPTIONS (High Priority - Author's Intent):")
19
+ lines.append("=" * 70)
20
+ lines.append("")
21
+ lines.append("These docstrings represent the author's documented explanations.")
22
+ lines.append("Use them for parameter descriptions, algorithms, and trade-offs.")
23
+ lines.append("")
24
+
25
+ for doc in docstring_evidence[:10]: # Limit to top 10
26
+ source = doc.get("source", "unknown")
27
+ chunk_type = doc.get("chunk_type", "code")
28
+ name = doc.get("name", "unnamed")
29
+ docstring = doc.get("docstring", "")
30
+
31
+ lines.append(f"[{source}] ({chunk_type}: {name}):")
32
+ # Truncate long docstrings to 600 chars
33
+ if len(docstring) > 600:
34
+ lines.append(docstring[:600] + "...")
35
+ else:
36
+ lines.append(docstring)
37
+ lines.append("")
38
+
39
+ lines.append("⚠️ INSTRUCTIONS FOR DOCSTRINGS:")
40
+ lines.append(" • Use docstring descriptions for parameters and return values")
41
+ lines.append(" • Include any paper references mentioned (arXiv, DOI, etc.)")
42
+ lines.append(" • Cite as: [CITE:filepath] or [CITE:docstring]")
43
+ lines.append("")
44
+
45
+ return "\n".join(lines)
46
+
47
+
48
+ def _format_qa_insights_block(qa_insights: List[Dict]) -> str:
49
+ """
50
+ Format Q&A insights for prompt (TIER 2 - High Priority).
51
+ These represent user-identified topics and questions.
52
+ """
53
+ if not qa_insights or len(qa_insights) == 0:
54
+ return ""
55
+
56
+ lines = []
57
+ lines.append("=" * 70)
58
+ lines.append("USER Q&A INSIGHTS (High Priority - User-Identified Topics):")
59
+ lines.append("=" * 70)
60
+ lines.append("")
61
+ lines.append("The user explored these topics during interactive session.")
62
+ lines.append("These indicate what users find important/confusing.")
63
+ lines.append("")
64
+
65
+ for i, qa in enumerate(qa_insights[:5], 1): # Limit to top 5
66
+ conf = qa.get("confidence", 0)
67
+ conf_label = "HIGH" if conf > 0.7 else "MEDIUM"
68
+
69
+ lines.append(f"{i}. Q: {qa['question']}")
70
+ lines.append(f" A: {qa['answer'][:300]}..." if len(qa['answer']) > 300 else f" A: {qa['answer']}")
71
+ lines.append(f" Confidence: {conf_label} ({conf:.0%})")
72
+ if qa.get("context_used"):
73
+ lines.append(f" [Based on external context + code analysis]")
74
+ lines.append("")
75
+
76
+ lines.append("⚠️ INSTRUCTIONS FOR Q&A:")
77
+ lines.append(" • Incorporate these insights where relevant to the section")
78
+ lines.append(" • Cite Q&A as: [CITE:Q&A Session]")
79
+ lines.append(" • Focus on high-confidence insights")
80
+ lines.append(" • Address questions that users actually asked")
81
+ lines.append("")
82
+
83
+ return "\n".join(lines)
84
+
85
+
86
+ def _format_essentials_block(essentials: Dict[str, Any]) -> str:
87
+ """
88
+ Format essential content (installation, quickstart) as MANDATORY block.
89
+ This takes priority in the prompt.
90
+ """
91
+ if not essentials:
92
+ return ""
93
+
94
+ lines = ["=" * 70]
95
+ lines.append("MANDATORY CONTENT (include these FIRST in your output):")
96
+ lines.append("=" * 70)
97
+ lines.append("")
98
+
99
+ # Installation
100
+ if essentials.get("installation"):
101
+ inst = essentials["installation"]
102
+ lines.append("1. INSTALLATION (include EXACTLY as shown):")
103
+ lines.append(f" Command: {inst['command']}")
104
+ if inst.get("requirements"):
105
+ lines.append(f" Requirements: {', '.join(inst['requirements'])}")
106
+ lines.append(f" → Include in a code block in your output")
107
+ lines.append("")
108
+
109
+ # Quickstart
110
+ if essentials.get("quickstart"):
111
+ qs = essentials["quickstart"]
112
+ lines.append("2. QUICKSTART EXAMPLE (include EXACTLY as shown):")
113
+ lines.append(f" Language: {qs['language']}")
114
+ lines.append(f" Code ({len(qs['code'].split(chr(10)))} lines):")
115
+ lines.append(f"```{qs['language']}")
116
+ lines.append(qs['code'][:500] + "..." if len(qs['code']) > 500 else qs['code'])
117
+ lines.append("```")
118
+ lines.append(f" → Include this COMPLETE code block in your output")
119
+ lines.append("")
120
+
121
+ # Authentication
122
+ if essentials.get("authentication"):
123
+ auth = essentials["authentication"]
124
+ lines.append("3. AUTHENTICATION (include if relevant):")
125
+ lines.append(f"```{auth['language']}")
126
+ lines.append(auth['code'][:300])
127
+ lines.append("```")
128
+ lines.append("")
129
+
130
+ # Overview/Description
131
+ if essentials.get("overview"):
132
+ lines.append("4. PROJECT OVERVIEW:")
133
+ lines.append(f" {essentials['overview'][:500]}")
134
+ lines.append(" → Use this to introduce the project")
135
+ lines.append("")
136
+
137
+ # Feature list
138
+ if essentials.get("features"):
139
+ features = essentials["features"]
140
+ lines.append(f"5. KEY FEATURES (from '{features.get('section', 'Features')}' section):")
141
+ for item in features.get("items", [])[:10]:
142
+ lines.append(f" • {item}")
143
+ lines.append(" → Include these when describing capabilities")
144
+ lines.append("")
145
+
146
+ lines.append("")
147
+ lines.append("⚠️ CRITICAL REQUIREMENTS:")
148
+ lines.append(" • You MUST include ALL mandatory content above")
149
+ lines.append(" • Include commands and code EXACTLY as shown (no paraphrasing)")
150
+ lines.append(" • Mandatory content comes FIRST, before architecture/internals")
151
+ lines.append(" • If you skip or paraphrase mandatory content, validation WILL FAIL")
152
+ lines.append("=" * 70)
153
+ lines.append("")
154
+
155
+ return "\n".join(lines)
156
+
157
+
158
+ def _format_evidence_block(title: str, items: List[Dict]) -> str:
159
+ if not items:
160
+ return f"{title}:\n (none)\n"
161
+ lines = [f"{title}:"]
162
+ for i, it in enumerate(items, 1):
163
+ src = it.get("source", f"item-{i}")
164
+ snip = (it.get("snippet") or "").strip()
165
+ lines.append(f"- [{src}]")
166
+ if snip:
167
+ lines.append(f" {snip[:800]}")
168
+ return "\n".join(lines) + "\n"
169
+
170
+
171
+ def _format_rationale_block(rationale: Dict[str, Any]) -> str:
172
+ if not rationale:
173
+ return ""
174
+
175
+ logic = rationale.get("logic", [])
176
+ decisions = rationale.get("decisions", [])
177
+ qa_items = rationale.get("qa", [])
178
+
179
+ if not (logic or decisions or qa_items):
180
+ return ""
181
+
182
+ lines: List[str] = ["=" * 70]
183
+ lines.append("RATIONALE & DECISIONS (Explain the WHY):")
184
+ lines.append("=" * 70)
185
+ lines.append("")
186
+
187
+ if logic:
188
+ lines.append("LogicExplainer summaries (use these to explain intent):")
189
+ for entry in logic[:3]:
190
+ lines.append(f"• {entry.get('unit_id')}: {entry.get('rationale', '')[:220]}")
191
+ trade_offs = entry.get("trade_offs") or []
192
+ if trade_offs:
193
+ lines.append(f" Trade-offs: {', '.join(trade_offs[:2])}")
194
+ lines.append(" Cite as [CITE:rationale]")
195
+ lines.append("")
196
+
197
+ if decisions:
198
+ lines.append("Documented decisions / ADRs:")
199
+ for decision in decisions[:3]:
200
+ lines.append(f"• {decision.get('type','decision').upper()}: {decision.get('summary','')[:200]}")
201
+ if decision.get("source"):
202
+ lines.append(f" Source: {decision['source']}")
203
+ lines.append("")
204
+
205
+ if qa_items:
206
+ lines.append("Relevant Q&A insights about rationale:")
207
+ for qa in qa_items[:3]:
208
+ lines.append(f"• Q: {qa.get('question','')[:150]}")
209
+ lines.append(f" A: {qa.get('answer','')[:200]}")
210
+ lines.append("")
211
+
212
+ lines.append("⚠️ Explain decisions, trade-offs, and impacts.")
213
+ lines.append(" • Tie rationale to specific code or context.")
214
+ lines.append(" • Use the provided citations when referencing rationale.")
215
+ lines.append("")
216
+
217
+ return "\n".join(lines)
218
+
219
+
220
+ def build_section_prompt(section_name: str,
221
+ instructions: str,
222
+ code_evidence: List[Dict],
223
+ context_evidence: List[Dict],
224
+ rules: Dict,
225
+ max_words: int = 250,
226
+ structured_evidence: Optional[Dict[str, Any]] = None) -> str:
227
+ """
228
+ Build an evidence-based prompt for one documentation section.
229
+
230
+ NOW WITH EVIDENCE-FIRST approach:
231
+ - If structured_evidence provided, formats essentials as MANDATORY
232
+ - Ensures installation/quickstart appear before code details
233
+
234
+ Args:
235
+ section_name: Name of the section
236
+ instructions: Template instructions
237
+ code_evidence: Code snippets (legacy format)
238
+ context_evidence: Context snippets (legacy format)
239
+ rules: Validation rules
240
+ max_words: Word limit
241
+ structured_evidence: NEW - Tiered evidence (essentials, usage, implementation)
242
+ """
243
+
244
+ # NEW: Use structured evidence if available
245
+ if structured_evidence:
246
+ prompt_parts = []
247
+
248
+ # Add essentials block (MANDATORY - TIER 1)
249
+ essentials_block = _format_essentials_block(structured_evidence.get("essentials", {}))
250
+ if essentials_block:
251
+ prompt_parts.append(essentials_block)
252
+
253
+ # Add Q&A insights (HIGH PRIORITY - TIER 2)
254
+ usage = structured_evidence.get("usage", {})
255
+ if usage.get("qa_insights"):
256
+ qa_block = _format_qa_insights_block(usage["qa_insights"])
257
+ if qa_block:
258
+ prompt_parts.append(qa_block)
259
+
260
+ # Add docstring evidence (HIGH PRIORITY - TIER 2.5)
261
+ if usage.get("docstrings"):
262
+ docstring_block = _format_docstring_evidence_block(usage["docstrings"])
263
+ if docstring_block:
264
+ prompt_parts.append(docstring_block)
265
+
266
+ # Add usage examples (HIGH PRIORITY - TIER 2)
267
+ if usage.get("readme_examples"):
268
+ prompt_parts.append("USAGE EXAMPLES FROM README (include if relevant):")
269
+ for i, ex in enumerate(usage["readme_examples"][:3], 1):
270
+ prompt_parts.append(f"{i}. {ex['context']}:")
271
+ prompt_parts.append(f"```{ex['language']}\n{ex['code'][:300]}\n```")
272
+ prompt_parts.append("")
273
+
274
+ # Add tables (compatibility matrices, status tables) - TIER 2
275
+ if usage.get("tables"):
276
+ prompt_parts.append("=" * 70)
277
+ prompt_parts.append("TABLES FROM README (include EXACTLY as shown if relevant):")
278
+ prompt_parts.append("=" * 70)
279
+ prompt_parts.append("")
280
+ for table in usage["tables"][:3]:
281
+ has_status = table.get("has_status_indicators", False)
282
+ status_note = " [CONTAINS STATUS INDICATORS - preserve exactly]" if has_status else ""
283
+ prompt_parts.append(f"Table from '{table['section']}'{status_note}:")
284
+ prompt_parts.append(table["raw_content"])
285
+ prompt_parts.append("")
286
+ prompt_parts.append("⚠️ IMPORTANT: Include these tables EXACTLY as shown.")
287
+ prompt_parts.append(" • Do NOT summarize or paraphrase tables")
288
+ prompt_parts.append(" • Preserve all status indicators (✅/❌/⏳)")
289
+ prompt_parts.append(" • Include full table structure with headers")
290
+ prompt_parts.append("")
291
+
292
+ # Add callouts (coming soon, experimental, warnings) - TIER 2
293
+ if usage.get("callouts"):
294
+ prompt_parts.append("=" * 70)
295
+ prompt_parts.append("STATUS NOTICES AND CALLOUTS (must be reflected in docs):")
296
+ prompt_parts.append("=" * 70)
297
+ prompt_parts.append("")
298
+ for callout in usage["callouts"][:10]:
299
+ ctype = callout["callout_type"].upper().replace("_", " ")
300
+ prompt_parts.append(f"[{ctype}]: {callout['text']}")
301
+ prompt_parts.append("")
302
+ prompt_parts.append("⚠️ CRITICAL: These notices affect documentation accuracy:")
303
+ prompt_parts.append(" • 'COMING SOON' features must NOT be documented as available")
304
+ prompt_parts.append(" • 'EXPERIMENTAL' features need appropriate warnings")
305
+ prompt_parts.append(" • 'DEPRECATED' features should be clearly marked")
306
+ prompt_parts.append(" • Reflect these statuses accurately in your output")
307
+ prompt_parts.append("")
308
+
309
+ # Add module READMEs (for How To Guides section) - TIER 2
310
+ if usage.get("module_readmes"):
311
+ prompt_parts.append("=" * 70)
312
+ prompt_parts.append("MODULE-SPECIFIC DOCUMENTATION (from subdirectory READMEs):")
313
+ prompt_parts.append("=" * 70)
314
+ prompt_parts.append("")
315
+ prompt_parts.append("Use this evidence for detailed how-to guides and module-specific examples.")
316
+ prompt_parts.append("These provide usage patterns for specific subsystems/components.")
317
+ prompt_parts.append("")
318
+
319
+ for module in usage["module_readmes"][:10]: # Limit to 10 modules
320
+ module_path = module.get('path', 'unknown')
321
+ prompt_parts.append(f"### Module: {module_path}")
322
+
323
+ if module.get('overview'):
324
+ overview_text = module['overview'][:800] # Limit overview length
325
+ fallback_note = " [raw extraction]" if module.get('raw_fallback') else ""
326
+ prompt_parts.append(f"Overview{fallback_note}: {overview_text}")
327
+ prompt_parts.append("")
328
+
329
+ if module.get('features'):
330
+ features = module['features']
331
+ prompt_parts.append(f"Features ({features.get('section', 'Features')}):")
332
+ for item in features.get('items', [])[:5]: # Limit to 5 features
333
+ prompt_parts.append(f" • {item}")
334
+ prompt_parts.append("")
335
+
336
+ if module.get('examples'):
337
+ prompt_parts.append("Code Examples:")
338
+ for ex in module['examples'][:2]: # Limit to 2 examples per module
339
+ prompt_parts.append(f"```{ex['language']}\n{ex['code'][:300]}\n```")
340
+ prompt_parts.append("")
341
+
342
+ if module.get('tables'):
343
+ for table in module['tables'][:1]: # Limit to 1 table per module
344
+ prompt_parts.append(f"Table ({table.get('section', 'table')}):")
345
+ prompt_parts.append(table.get('raw_content', '')[:500])
346
+ prompt_parts.append("")
347
+
348
+ prompt_parts.append("---")
349
+ prompt_parts.append("")
350
+
351
+ prompt_parts.append("⚠️ INSTRUCTION: Incorporate module-specific examples into 'How To Guides'.")
352
+ prompt_parts.append(" • Use module paths as context (e.g., 'examples/quantization/')")
353
+ prompt_parts.append(" • Reference module READMEs when documenting specific features")
354
+ prompt_parts.append(" • Don't duplicate root README content already in Getting Started")
355
+ prompt_parts.append("")
356
+
357
+ rationale_block = _format_rationale_block(structured_evidence.get("rationale", {}))
358
+ if rationale_block:
359
+ prompt_parts.append(rationale_block)
360
+
361
+ # Add implementation details (SUPPLEMENTARY - TIER 3)
362
+ impl = structured_evidence.get("implementation", {})
363
+ if impl.get("code_patterns"):
364
+ prompt_parts.append("CODE IMPLEMENTATION DETAILS (optional, use for depth):")
365
+ for pattern in impl["code_patterns"][:3]:
366
+ prompt_parts.append(f"- [{pattern['source']}]")
367
+ prompt_parts.append(f" {pattern['snippet'][:400]}")
368
+ prompt_parts.append("")
369
+
370
+ evidence_text = "\n".join(prompt_parts)
371
+
372
+ else:
373
+ # Legacy format (backward compatibility)
374
+ evidence_blocks = []
375
+ evidence_blocks.append(_format_evidence_block("CODE EVIDENCE", code_evidence))
376
+ evidence_blocks.append(_format_evidence_block("CONTEXT EVIDENCE", context_evidence))
377
+ evidence_text = "\n".join(evidence_blocks)
378
+
379
+ # Build rules section
380
+ citation_style = rules.get("citation_style") or "[CITE:source]"
381
+ min_citations = rules.get("min_citations", 1)
382
+
383
+ rule_lines = [
384
+ f"Section: {section_name}",
385
+ "Rules:",
386
+ f"1) CITATION REQUIREMENT (MANDATORY - output will be rejected without citations):",
387
+ f" • Every factual claim MUST cite its source using: {citation_style}",
388
+ f" • Minimum {min_citations} citation(s) required in this section",
389
+ f" • Format: [CITE:filepath:line-range] or [CITE:README] or [CITE:source_name]",
390
+ f" • Example: 'The default value is 5 [CITE:src/config.py:42-45]'",
391
+ ]
392
+ if rules.get("mark_inference", True):
393
+ rule_lines.append("2) Mark any speculation with [INFERENCE]")
394
+ else:
395
+ rule_lines.append("2) Do not mark speculation")
396
+ rule_lines.append("3) Follow required elements exactly:")
397
+ for req in rules.get("required_elements", []):
398
+ rule_lines.append(f" - {req}")
399
+ rule_lines.append(f"4) Maximum {max_words} words.")
400
+
401
+ # Add mandatory content rule if structured evidence has essentials
402
+ if structured_evidence and structured_evidence.get("essentials"):
403
+ rule_lines.append("")
404
+ rule_lines.append("5) EVIDENCE-FIRST REQUIREMENT (CRITICAL - FAILURE TO COMPLY = VALIDATION FAILURE):")
405
+ rule_lines.append(" • If MANDATORY CONTENT appears above, it MUST be in your output")
406
+ rule_lines.append(" • Installation commands: Copy EXACTLY, use code blocks")
407
+ rule_lines.append(" • Quickstart code: Include COMPLETE example, not paraphrased")
408
+ rule_lines.append(" • Order: Mandatory content FIRST, then optional details")
409
+ rule_lines.append(" • Validation will REJECT output missing mandatory content")
410
+
411
+ lower_name = section_name.lower()
412
+
413
+ if "usage" in lower_name or "api" in lower_name:
414
+ rule_lines.append("")
415
+ rule_lines.append("6) RATIONALE REQUIREMENT:")
416
+ rule_lines.append(" • Explain at least one design choice, trade-off, or side effect drawn from the rationale block.")
417
+ rule_lines.append(" • Cite that explanation using [CITE:rationale] (this citation is mandatory).")
418
+
419
+ # Build final prompt with citation reminder
420
+ citation_reminder = ""
421
+ if rules.get("require_citations", True):
422
+ citation_reminder = f"""
423
+
424
+ REMINDER: Your output MUST include at least {min_citations} citation(s) in the format {citation_style}.
425
+ Citations should reference the evidence provided above. Without citations, your output will be rejected."""
426
+
427
+ return "\n".join([
428
+ "Based on the following evidence, write the requested section.",
429
+ "",
430
+ evidence_text,
431
+ "",
432
+ "Instructions:",
433
+ instructions.strip(),
434
+ "",
435
+ "\n".join([r for r in rule_lines if r]),
436
+ citation_reminder,
437
+ ])
438
+
439
+
@@ -0,0 +1,115 @@
1
+ import os
2
+ import logging
3
+ from typing import Optional
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+
8
+ class TextGenerator:
9
+ def generate(self, system_prompt: str, user_prompt: str, model: Optional[str] = None, temperature: float = 0.2, max_tokens: int = 1200) -> str:
10
+ raise NotImplementedError
11
+
12
+
13
+ class OpenAITextGenerator(TextGenerator):
14
+ def __init__(self, api_key: Optional[str] = None, default_model: str = "gpt-4o-mini"):
15
+ try:
16
+ from openai import OpenAI # type: ignore
17
+ except Exception as e:
18
+ raise RuntimeError("OpenAI SDK not installed. pip install openai") from e
19
+ self._OpenAI = OpenAI
20
+ self.api_key = api_key or os.getenv("OPENAI_API_KEY")
21
+ if not self.api_key:
22
+ raise RuntimeError("OPENAI_API_KEY is required for OpenAI provider")
23
+ self.client = self._OpenAI(api_key=self.api_key)
24
+ self.default_model = default_model
25
+
26
+ def generate(self, system_prompt: str, user_prompt: str, model: Optional[str] = None, temperature: float = 0.2, max_tokens: int = 1200) -> str:
27
+ mdl = model or self.default_model
28
+ resp = self.client.chat.completions.create(
29
+ model=mdl,
30
+ temperature=temperature,
31
+ max_tokens=max_tokens,
32
+ messages=[
33
+ {"role": "system", "content": system_prompt},
34
+ {"role": "user", "content": user_prompt},
35
+ ],
36
+ )
37
+ content = resp.choices[0].message.content or ""
38
+ if not content.strip():
39
+ raise RuntimeError("OpenAI returned empty content")
40
+ return content
41
+
42
+
43
+ class AnthropicTextGenerator(TextGenerator):
44
+ def __init__(self, api_key: Optional[str] = None, default_model: str = "claude-sonnet-4-6"):
45
+ try:
46
+ import anthropic # type: ignore
47
+ except Exception as e:
48
+ raise RuntimeError("Anthropic SDK not installed. pip install anthropic") from e
49
+ self._anthropic = anthropic
50
+ self.api_key = api_key or os.getenv("ANTHROPIC_API_KEY")
51
+ if not self.api_key:
52
+ raise RuntimeError("ANTHROPIC_API_KEY is required for Anthropic provider")
53
+ self.client = anthropic.Anthropic(api_key=self.api_key)
54
+ self.default_model = default_model
55
+
56
+ def generate(self, system_prompt: str, user_prompt: str, model: Optional[str] = None, temperature: float = 0.2, max_tokens: int = 1200) -> str:
57
+ mdl = model or self.default_model
58
+ msg = self.client.messages.create(
59
+ model=mdl,
60
+ max_tokens=max_tokens,
61
+ temperature=temperature,
62
+ system=system_prompt,
63
+ messages=[{"role": "user", "content": user_prompt}],
64
+ )
65
+ parts = getattr(msg, "content", [])
66
+ text = ""
67
+ for p in parts:
68
+ if getattr(p, "type", "") == "text":
69
+ text += getattr(p, "text", "")
70
+ if not text.strip():
71
+ raise RuntimeError("Anthropic returned empty content")
72
+ return text
73
+
74
+
75
+ def create_generator(provider: Optional[str] = None, model: Optional[str] = None) -> TextGenerator:
76
+ """
77
+ Provider selection with Anthropic-first detection when unspecified.
78
+ Order:
79
+ 1) Explicit provider arg if provided
80
+ 2) V2D_LLM_PROVIDER env
81
+ 3) If ANTHROPIC_API_KEY set -> Anthropic
82
+ 4) Else if OPENAI_API_KEY set -> OpenAI
83
+ 5) Else fail fast
84
+
85
+ Model selection order:
86
+ 1) Explicit model arg if provided
87
+ 2) VIBE2DOC_MODEL env variable
88
+ 3) Provider's default model
89
+ """
90
+ prov = (provider or os.getenv("V2D_LLM_PROVIDER") or "").lower().strip()
91
+
92
+ if not prov:
93
+ if os.getenv("ANTHROPIC_API_KEY"):
94
+ prov = "anthropic"
95
+ elif os.getenv("OPENAI_API_KEY"):
96
+ prov = "openai"
97
+
98
+ # Read model from environment if not explicitly provided
99
+ model_to_use = model or os.getenv("VIBE2DOC_MODEL")
100
+
101
+ if prov == "anthropic":
102
+ gen = AnthropicTextGenerator()
103
+ if model_to_use:
104
+ gen.default_model = model_to_use
105
+ return gen
106
+ elif prov == "openai":
107
+ gen = OpenAITextGenerator()
108
+ if model_to_use:
109
+ gen.default_model = model_to_use
110
+ return gen
111
+
112
+ raise RuntimeError("No LLM provider configured. Set V2D_LLM_PROVIDER or ANTHROPIC_API_KEY/OPENAI_API_KEY")
113
+
114
+
115
+