code-finder 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_context/__init__.py +33 -0
- claude_context/agentic_integration.py +309 -0
- claude_context/ast_chunker.py +646 -0
- claude_context/config.py +239 -0
- claude_context/context_manager.py +627 -0
- claude_context/embeddings.py +307 -0
- claude_context/embeddings_interface.py +226 -0
- claude_context/enhanced_ast_chunker.py +1129 -0
- claude_context/explorer.py +951 -0
- claude_context/explorer_with_context.py +1008 -0
- claude_context/indexer.py +893 -0
- claude_context/markdown_chunker.py +421 -0
- claude_context/mode_handler.py +1774 -0
- claude_context/query_metrics.py +164 -0
- claude_context/question_generator.py +800 -0
- claude_context/readme_extractor.py +485 -0
- claude_context/repository_adapter.py +399 -0
- claude_context/search.py +493 -0
- claude_context/skills/__init__.py +11 -0
- claude_context/skills/_cli_common.py +74 -0
- claude_context/skills/_index_manager.py +98 -0
- claude_context/skills/api_surface.py +219 -0
- claude_context/skills/evidence_retrieval.py +151 -0
- claude_context/skills/grounded_review.py +212 -0
- claude_context/synthesis/__init__.py +8 -0
- claude_context/synthesis/editor_agent.py +391 -0
- claude_context/synthesis/llm_synthesizer.py +153 -0
- claude_context/synthesis/logic_explainer.py +235 -0
- claude_context/synthesis/multi_review_pipeline.py +717 -0
- claude_context/synthesis/prompt_builder.py +439 -0
- claude_context/synthesis/providers.py +115 -0
- claude_context/synthesis/validators.py +458 -0
- code_finder-0.1.0.dist-info/METADATA +823 -0
- code_finder-0.1.0.dist-info/RECORD +37 -0
- code_finder-0.1.0.dist-info/WHEEL +5 -0
- code_finder-0.1.0.dist-info/entry_points.txt +4 -0
- code_finder-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,439 @@
|
|
|
1
|
+
from typing import List, Dict, Any, Optional
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def _format_docstring_evidence_block(docstring_evidence: List[Dict]) -> str:
|
|
5
|
+
"""
|
|
6
|
+
Format extracted docstrings as high-priority evidence (TIER 2.5).
|
|
7
|
+
|
|
8
|
+
Docstrings represent the author's documented intent and often contain:
|
|
9
|
+
- Parameter descriptions with types and defaults
|
|
10
|
+
- Algorithm explanations
|
|
11
|
+
- Academic paper references (arXiv, etc.)
|
|
12
|
+
- Trade-off discussions
|
|
13
|
+
"""
|
|
14
|
+
if not docstring_evidence:
|
|
15
|
+
return ""
|
|
16
|
+
|
|
17
|
+
lines = ["=" * 70]
|
|
18
|
+
lines.append("CODE DOCSTRINGS & DESCRIPTIONS (High Priority - Author's Intent):")
|
|
19
|
+
lines.append("=" * 70)
|
|
20
|
+
lines.append("")
|
|
21
|
+
lines.append("These docstrings represent the author's documented explanations.")
|
|
22
|
+
lines.append("Use them for parameter descriptions, algorithms, and trade-offs.")
|
|
23
|
+
lines.append("")
|
|
24
|
+
|
|
25
|
+
for doc in docstring_evidence[:10]: # Limit to top 10
|
|
26
|
+
source = doc.get("source", "unknown")
|
|
27
|
+
chunk_type = doc.get("chunk_type", "code")
|
|
28
|
+
name = doc.get("name", "unnamed")
|
|
29
|
+
docstring = doc.get("docstring", "")
|
|
30
|
+
|
|
31
|
+
lines.append(f"[{source}] ({chunk_type}: {name}):")
|
|
32
|
+
# Truncate long docstrings to 600 chars
|
|
33
|
+
if len(docstring) > 600:
|
|
34
|
+
lines.append(docstring[:600] + "...")
|
|
35
|
+
else:
|
|
36
|
+
lines.append(docstring)
|
|
37
|
+
lines.append("")
|
|
38
|
+
|
|
39
|
+
lines.append("⚠️ INSTRUCTIONS FOR DOCSTRINGS:")
|
|
40
|
+
lines.append(" • Use docstring descriptions for parameters and return values")
|
|
41
|
+
lines.append(" • Include any paper references mentioned (arXiv, DOI, etc.)")
|
|
42
|
+
lines.append(" • Cite as: [CITE:filepath] or [CITE:docstring]")
|
|
43
|
+
lines.append("")
|
|
44
|
+
|
|
45
|
+
return "\n".join(lines)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _format_qa_insights_block(qa_insights: List[Dict]) -> str:
|
|
49
|
+
"""
|
|
50
|
+
Format Q&A insights for prompt (TIER 2 - High Priority).
|
|
51
|
+
These represent user-identified topics and questions.
|
|
52
|
+
"""
|
|
53
|
+
if not qa_insights or len(qa_insights) == 0:
|
|
54
|
+
return ""
|
|
55
|
+
|
|
56
|
+
lines = []
|
|
57
|
+
lines.append("=" * 70)
|
|
58
|
+
lines.append("USER Q&A INSIGHTS (High Priority - User-Identified Topics):")
|
|
59
|
+
lines.append("=" * 70)
|
|
60
|
+
lines.append("")
|
|
61
|
+
lines.append("The user explored these topics during interactive session.")
|
|
62
|
+
lines.append("These indicate what users find important/confusing.")
|
|
63
|
+
lines.append("")
|
|
64
|
+
|
|
65
|
+
for i, qa in enumerate(qa_insights[:5], 1): # Limit to top 5
|
|
66
|
+
conf = qa.get("confidence", 0)
|
|
67
|
+
conf_label = "HIGH" if conf > 0.7 else "MEDIUM"
|
|
68
|
+
|
|
69
|
+
lines.append(f"{i}. Q: {qa['question']}")
|
|
70
|
+
lines.append(f" A: {qa['answer'][:300]}..." if len(qa['answer']) > 300 else f" A: {qa['answer']}")
|
|
71
|
+
lines.append(f" Confidence: {conf_label} ({conf:.0%})")
|
|
72
|
+
if qa.get("context_used"):
|
|
73
|
+
lines.append(f" [Based on external context + code analysis]")
|
|
74
|
+
lines.append("")
|
|
75
|
+
|
|
76
|
+
lines.append("⚠️ INSTRUCTIONS FOR Q&A:")
|
|
77
|
+
lines.append(" • Incorporate these insights where relevant to the section")
|
|
78
|
+
lines.append(" • Cite Q&A as: [CITE:Q&A Session]")
|
|
79
|
+
lines.append(" • Focus on high-confidence insights")
|
|
80
|
+
lines.append(" • Address questions that users actually asked")
|
|
81
|
+
lines.append("")
|
|
82
|
+
|
|
83
|
+
return "\n".join(lines)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _format_essentials_block(essentials: Dict[str, Any]) -> str:
|
|
87
|
+
"""
|
|
88
|
+
Format essential content (installation, quickstart) as MANDATORY block.
|
|
89
|
+
This takes priority in the prompt.
|
|
90
|
+
"""
|
|
91
|
+
if not essentials:
|
|
92
|
+
return ""
|
|
93
|
+
|
|
94
|
+
lines = ["=" * 70]
|
|
95
|
+
lines.append("MANDATORY CONTENT (include these FIRST in your output):")
|
|
96
|
+
lines.append("=" * 70)
|
|
97
|
+
lines.append("")
|
|
98
|
+
|
|
99
|
+
# Installation
|
|
100
|
+
if essentials.get("installation"):
|
|
101
|
+
inst = essentials["installation"]
|
|
102
|
+
lines.append("1. INSTALLATION (include EXACTLY as shown):")
|
|
103
|
+
lines.append(f" Command: {inst['command']}")
|
|
104
|
+
if inst.get("requirements"):
|
|
105
|
+
lines.append(f" Requirements: {', '.join(inst['requirements'])}")
|
|
106
|
+
lines.append(f" → Include in a code block in your output")
|
|
107
|
+
lines.append("")
|
|
108
|
+
|
|
109
|
+
# Quickstart
|
|
110
|
+
if essentials.get("quickstart"):
|
|
111
|
+
qs = essentials["quickstart"]
|
|
112
|
+
lines.append("2. QUICKSTART EXAMPLE (include EXACTLY as shown):")
|
|
113
|
+
lines.append(f" Language: {qs['language']}")
|
|
114
|
+
lines.append(f" Code ({len(qs['code'].split(chr(10)))} lines):")
|
|
115
|
+
lines.append(f"```{qs['language']}")
|
|
116
|
+
lines.append(qs['code'][:500] + "..." if len(qs['code']) > 500 else qs['code'])
|
|
117
|
+
lines.append("```")
|
|
118
|
+
lines.append(f" → Include this COMPLETE code block in your output")
|
|
119
|
+
lines.append("")
|
|
120
|
+
|
|
121
|
+
# Authentication
|
|
122
|
+
if essentials.get("authentication"):
|
|
123
|
+
auth = essentials["authentication"]
|
|
124
|
+
lines.append("3. AUTHENTICATION (include if relevant):")
|
|
125
|
+
lines.append(f"```{auth['language']}")
|
|
126
|
+
lines.append(auth['code'][:300])
|
|
127
|
+
lines.append("```")
|
|
128
|
+
lines.append("")
|
|
129
|
+
|
|
130
|
+
# Overview/Description
|
|
131
|
+
if essentials.get("overview"):
|
|
132
|
+
lines.append("4. PROJECT OVERVIEW:")
|
|
133
|
+
lines.append(f" {essentials['overview'][:500]}")
|
|
134
|
+
lines.append(" → Use this to introduce the project")
|
|
135
|
+
lines.append("")
|
|
136
|
+
|
|
137
|
+
# Feature list
|
|
138
|
+
if essentials.get("features"):
|
|
139
|
+
features = essentials["features"]
|
|
140
|
+
lines.append(f"5. KEY FEATURES (from '{features.get('section', 'Features')}' section):")
|
|
141
|
+
for item in features.get("items", [])[:10]:
|
|
142
|
+
lines.append(f" • {item}")
|
|
143
|
+
lines.append(" → Include these when describing capabilities")
|
|
144
|
+
lines.append("")
|
|
145
|
+
|
|
146
|
+
lines.append("")
|
|
147
|
+
lines.append("⚠️ CRITICAL REQUIREMENTS:")
|
|
148
|
+
lines.append(" • You MUST include ALL mandatory content above")
|
|
149
|
+
lines.append(" • Include commands and code EXACTLY as shown (no paraphrasing)")
|
|
150
|
+
lines.append(" • Mandatory content comes FIRST, before architecture/internals")
|
|
151
|
+
lines.append(" • If you skip or paraphrase mandatory content, validation WILL FAIL")
|
|
152
|
+
lines.append("=" * 70)
|
|
153
|
+
lines.append("")
|
|
154
|
+
|
|
155
|
+
return "\n".join(lines)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _format_evidence_block(title: str, items: List[Dict]) -> str:
|
|
159
|
+
if not items:
|
|
160
|
+
return f"{title}:\n (none)\n"
|
|
161
|
+
lines = [f"{title}:"]
|
|
162
|
+
for i, it in enumerate(items, 1):
|
|
163
|
+
src = it.get("source", f"item-{i}")
|
|
164
|
+
snip = (it.get("snippet") or "").strip()
|
|
165
|
+
lines.append(f"- [{src}]")
|
|
166
|
+
if snip:
|
|
167
|
+
lines.append(f" {snip[:800]}")
|
|
168
|
+
return "\n".join(lines) + "\n"
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _format_rationale_block(rationale: Dict[str, Any]) -> str:
|
|
172
|
+
if not rationale:
|
|
173
|
+
return ""
|
|
174
|
+
|
|
175
|
+
logic = rationale.get("logic", [])
|
|
176
|
+
decisions = rationale.get("decisions", [])
|
|
177
|
+
qa_items = rationale.get("qa", [])
|
|
178
|
+
|
|
179
|
+
if not (logic or decisions or qa_items):
|
|
180
|
+
return ""
|
|
181
|
+
|
|
182
|
+
lines: List[str] = ["=" * 70]
|
|
183
|
+
lines.append("RATIONALE & DECISIONS (Explain the WHY):")
|
|
184
|
+
lines.append("=" * 70)
|
|
185
|
+
lines.append("")
|
|
186
|
+
|
|
187
|
+
if logic:
|
|
188
|
+
lines.append("LogicExplainer summaries (use these to explain intent):")
|
|
189
|
+
for entry in logic[:3]:
|
|
190
|
+
lines.append(f"• {entry.get('unit_id')}: {entry.get('rationale', '')[:220]}")
|
|
191
|
+
trade_offs = entry.get("trade_offs") or []
|
|
192
|
+
if trade_offs:
|
|
193
|
+
lines.append(f" Trade-offs: {', '.join(trade_offs[:2])}")
|
|
194
|
+
lines.append(" Cite as [CITE:rationale]")
|
|
195
|
+
lines.append("")
|
|
196
|
+
|
|
197
|
+
if decisions:
|
|
198
|
+
lines.append("Documented decisions / ADRs:")
|
|
199
|
+
for decision in decisions[:3]:
|
|
200
|
+
lines.append(f"• {decision.get('type','decision').upper()}: {decision.get('summary','')[:200]}")
|
|
201
|
+
if decision.get("source"):
|
|
202
|
+
lines.append(f" Source: {decision['source']}")
|
|
203
|
+
lines.append("")
|
|
204
|
+
|
|
205
|
+
if qa_items:
|
|
206
|
+
lines.append("Relevant Q&A insights about rationale:")
|
|
207
|
+
for qa in qa_items[:3]:
|
|
208
|
+
lines.append(f"• Q: {qa.get('question','')[:150]}")
|
|
209
|
+
lines.append(f" A: {qa.get('answer','')[:200]}")
|
|
210
|
+
lines.append("")
|
|
211
|
+
|
|
212
|
+
lines.append("⚠️ Explain decisions, trade-offs, and impacts.")
|
|
213
|
+
lines.append(" • Tie rationale to specific code or context.")
|
|
214
|
+
lines.append(" • Use the provided citations when referencing rationale.")
|
|
215
|
+
lines.append("")
|
|
216
|
+
|
|
217
|
+
return "\n".join(lines)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def build_section_prompt(section_name: str,
|
|
221
|
+
instructions: str,
|
|
222
|
+
code_evidence: List[Dict],
|
|
223
|
+
context_evidence: List[Dict],
|
|
224
|
+
rules: Dict,
|
|
225
|
+
max_words: int = 250,
|
|
226
|
+
structured_evidence: Optional[Dict[str, Any]] = None) -> str:
|
|
227
|
+
"""
|
|
228
|
+
Build an evidence-based prompt for one documentation section.
|
|
229
|
+
|
|
230
|
+
NOW WITH EVIDENCE-FIRST approach:
|
|
231
|
+
- If structured_evidence provided, formats essentials as MANDATORY
|
|
232
|
+
- Ensures installation/quickstart appear before code details
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
section_name: Name of the section
|
|
236
|
+
instructions: Template instructions
|
|
237
|
+
code_evidence: Code snippets (legacy format)
|
|
238
|
+
context_evidence: Context snippets (legacy format)
|
|
239
|
+
rules: Validation rules
|
|
240
|
+
max_words: Word limit
|
|
241
|
+
structured_evidence: NEW - Tiered evidence (essentials, usage, implementation)
|
|
242
|
+
"""
|
|
243
|
+
|
|
244
|
+
# NEW: Use structured evidence if available
|
|
245
|
+
if structured_evidence:
|
|
246
|
+
prompt_parts = []
|
|
247
|
+
|
|
248
|
+
# Add essentials block (MANDATORY - TIER 1)
|
|
249
|
+
essentials_block = _format_essentials_block(structured_evidence.get("essentials", {}))
|
|
250
|
+
if essentials_block:
|
|
251
|
+
prompt_parts.append(essentials_block)
|
|
252
|
+
|
|
253
|
+
# Add Q&A insights (HIGH PRIORITY - TIER 2)
|
|
254
|
+
usage = structured_evidence.get("usage", {})
|
|
255
|
+
if usage.get("qa_insights"):
|
|
256
|
+
qa_block = _format_qa_insights_block(usage["qa_insights"])
|
|
257
|
+
if qa_block:
|
|
258
|
+
prompt_parts.append(qa_block)
|
|
259
|
+
|
|
260
|
+
# Add docstring evidence (HIGH PRIORITY - TIER 2.5)
|
|
261
|
+
if usage.get("docstrings"):
|
|
262
|
+
docstring_block = _format_docstring_evidence_block(usage["docstrings"])
|
|
263
|
+
if docstring_block:
|
|
264
|
+
prompt_parts.append(docstring_block)
|
|
265
|
+
|
|
266
|
+
# Add usage examples (HIGH PRIORITY - TIER 2)
|
|
267
|
+
if usage.get("readme_examples"):
|
|
268
|
+
prompt_parts.append("USAGE EXAMPLES FROM README (include if relevant):")
|
|
269
|
+
for i, ex in enumerate(usage["readme_examples"][:3], 1):
|
|
270
|
+
prompt_parts.append(f"{i}. {ex['context']}:")
|
|
271
|
+
prompt_parts.append(f"```{ex['language']}\n{ex['code'][:300]}\n```")
|
|
272
|
+
prompt_parts.append("")
|
|
273
|
+
|
|
274
|
+
# Add tables (compatibility matrices, status tables) - TIER 2
|
|
275
|
+
if usage.get("tables"):
|
|
276
|
+
prompt_parts.append("=" * 70)
|
|
277
|
+
prompt_parts.append("TABLES FROM README (include EXACTLY as shown if relevant):")
|
|
278
|
+
prompt_parts.append("=" * 70)
|
|
279
|
+
prompt_parts.append("")
|
|
280
|
+
for table in usage["tables"][:3]:
|
|
281
|
+
has_status = table.get("has_status_indicators", False)
|
|
282
|
+
status_note = " [CONTAINS STATUS INDICATORS - preserve exactly]" if has_status else ""
|
|
283
|
+
prompt_parts.append(f"Table from '{table['section']}'{status_note}:")
|
|
284
|
+
prompt_parts.append(table["raw_content"])
|
|
285
|
+
prompt_parts.append("")
|
|
286
|
+
prompt_parts.append("⚠️ IMPORTANT: Include these tables EXACTLY as shown.")
|
|
287
|
+
prompt_parts.append(" • Do NOT summarize or paraphrase tables")
|
|
288
|
+
prompt_parts.append(" • Preserve all status indicators (✅/❌/⏳)")
|
|
289
|
+
prompt_parts.append(" • Include full table structure with headers")
|
|
290
|
+
prompt_parts.append("")
|
|
291
|
+
|
|
292
|
+
# Add callouts (coming soon, experimental, warnings) - TIER 2
|
|
293
|
+
if usage.get("callouts"):
|
|
294
|
+
prompt_parts.append("=" * 70)
|
|
295
|
+
prompt_parts.append("STATUS NOTICES AND CALLOUTS (must be reflected in docs):")
|
|
296
|
+
prompt_parts.append("=" * 70)
|
|
297
|
+
prompt_parts.append("")
|
|
298
|
+
for callout in usage["callouts"][:10]:
|
|
299
|
+
ctype = callout["callout_type"].upper().replace("_", " ")
|
|
300
|
+
prompt_parts.append(f"[{ctype}]: {callout['text']}")
|
|
301
|
+
prompt_parts.append("")
|
|
302
|
+
prompt_parts.append("⚠️ CRITICAL: These notices affect documentation accuracy:")
|
|
303
|
+
prompt_parts.append(" • 'COMING SOON' features must NOT be documented as available")
|
|
304
|
+
prompt_parts.append(" • 'EXPERIMENTAL' features need appropriate warnings")
|
|
305
|
+
prompt_parts.append(" • 'DEPRECATED' features should be clearly marked")
|
|
306
|
+
prompt_parts.append(" • Reflect these statuses accurately in your output")
|
|
307
|
+
prompt_parts.append("")
|
|
308
|
+
|
|
309
|
+
# Add module READMEs (for How To Guides section) - TIER 2
|
|
310
|
+
if usage.get("module_readmes"):
|
|
311
|
+
prompt_parts.append("=" * 70)
|
|
312
|
+
prompt_parts.append("MODULE-SPECIFIC DOCUMENTATION (from subdirectory READMEs):")
|
|
313
|
+
prompt_parts.append("=" * 70)
|
|
314
|
+
prompt_parts.append("")
|
|
315
|
+
prompt_parts.append("Use this evidence for detailed how-to guides and module-specific examples.")
|
|
316
|
+
prompt_parts.append("These provide usage patterns for specific subsystems/components.")
|
|
317
|
+
prompt_parts.append("")
|
|
318
|
+
|
|
319
|
+
for module in usage["module_readmes"][:10]: # Limit to 10 modules
|
|
320
|
+
module_path = module.get('path', 'unknown')
|
|
321
|
+
prompt_parts.append(f"### Module: {module_path}")
|
|
322
|
+
|
|
323
|
+
if module.get('overview'):
|
|
324
|
+
overview_text = module['overview'][:800] # Limit overview length
|
|
325
|
+
fallback_note = " [raw extraction]" if module.get('raw_fallback') else ""
|
|
326
|
+
prompt_parts.append(f"Overview{fallback_note}: {overview_text}")
|
|
327
|
+
prompt_parts.append("")
|
|
328
|
+
|
|
329
|
+
if module.get('features'):
|
|
330
|
+
features = module['features']
|
|
331
|
+
prompt_parts.append(f"Features ({features.get('section', 'Features')}):")
|
|
332
|
+
for item in features.get('items', [])[:5]: # Limit to 5 features
|
|
333
|
+
prompt_parts.append(f" • {item}")
|
|
334
|
+
prompt_parts.append("")
|
|
335
|
+
|
|
336
|
+
if module.get('examples'):
|
|
337
|
+
prompt_parts.append("Code Examples:")
|
|
338
|
+
for ex in module['examples'][:2]: # Limit to 2 examples per module
|
|
339
|
+
prompt_parts.append(f"```{ex['language']}\n{ex['code'][:300]}\n```")
|
|
340
|
+
prompt_parts.append("")
|
|
341
|
+
|
|
342
|
+
if module.get('tables'):
|
|
343
|
+
for table in module['tables'][:1]: # Limit to 1 table per module
|
|
344
|
+
prompt_parts.append(f"Table ({table.get('section', 'table')}):")
|
|
345
|
+
prompt_parts.append(table.get('raw_content', '')[:500])
|
|
346
|
+
prompt_parts.append("")
|
|
347
|
+
|
|
348
|
+
prompt_parts.append("---")
|
|
349
|
+
prompt_parts.append("")
|
|
350
|
+
|
|
351
|
+
prompt_parts.append("⚠️ INSTRUCTION: Incorporate module-specific examples into 'How To Guides'.")
|
|
352
|
+
prompt_parts.append(" • Use module paths as context (e.g., 'examples/quantization/')")
|
|
353
|
+
prompt_parts.append(" • Reference module READMEs when documenting specific features")
|
|
354
|
+
prompt_parts.append(" • Don't duplicate root README content already in Getting Started")
|
|
355
|
+
prompt_parts.append("")
|
|
356
|
+
|
|
357
|
+
rationale_block = _format_rationale_block(structured_evidence.get("rationale", {}))
|
|
358
|
+
if rationale_block:
|
|
359
|
+
prompt_parts.append(rationale_block)
|
|
360
|
+
|
|
361
|
+
# Add implementation details (SUPPLEMENTARY - TIER 3)
|
|
362
|
+
impl = structured_evidence.get("implementation", {})
|
|
363
|
+
if impl.get("code_patterns"):
|
|
364
|
+
prompt_parts.append("CODE IMPLEMENTATION DETAILS (optional, use for depth):")
|
|
365
|
+
for pattern in impl["code_patterns"][:3]:
|
|
366
|
+
prompt_parts.append(f"- [{pattern['source']}]")
|
|
367
|
+
prompt_parts.append(f" {pattern['snippet'][:400]}")
|
|
368
|
+
prompt_parts.append("")
|
|
369
|
+
|
|
370
|
+
evidence_text = "\n".join(prompt_parts)
|
|
371
|
+
|
|
372
|
+
else:
|
|
373
|
+
# Legacy format (backward compatibility)
|
|
374
|
+
evidence_blocks = []
|
|
375
|
+
evidence_blocks.append(_format_evidence_block("CODE EVIDENCE", code_evidence))
|
|
376
|
+
evidence_blocks.append(_format_evidence_block("CONTEXT EVIDENCE", context_evidence))
|
|
377
|
+
evidence_text = "\n".join(evidence_blocks)
|
|
378
|
+
|
|
379
|
+
# Build rules section
|
|
380
|
+
citation_style = rules.get("citation_style") or "[CITE:source]"
|
|
381
|
+
min_citations = rules.get("min_citations", 1)
|
|
382
|
+
|
|
383
|
+
rule_lines = [
|
|
384
|
+
f"Section: {section_name}",
|
|
385
|
+
"Rules:",
|
|
386
|
+
f"1) CITATION REQUIREMENT (MANDATORY - output will be rejected without citations):",
|
|
387
|
+
f" • Every factual claim MUST cite its source using: {citation_style}",
|
|
388
|
+
f" • Minimum {min_citations} citation(s) required in this section",
|
|
389
|
+
f" • Format: [CITE:filepath:line-range] or [CITE:README] or [CITE:source_name]",
|
|
390
|
+
f" • Example: 'The default value is 5 [CITE:src/config.py:42-45]'",
|
|
391
|
+
]
|
|
392
|
+
if rules.get("mark_inference", True):
|
|
393
|
+
rule_lines.append("2) Mark any speculation with [INFERENCE]")
|
|
394
|
+
else:
|
|
395
|
+
rule_lines.append("2) Do not mark speculation")
|
|
396
|
+
rule_lines.append("3) Follow required elements exactly:")
|
|
397
|
+
for req in rules.get("required_elements", []):
|
|
398
|
+
rule_lines.append(f" - {req}")
|
|
399
|
+
rule_lines.append(f"4) Maximum {max_words} words.")
|
|
400
|
+
|
|
401
|
+
# Add mandatory content rule if structured evidence has essentials
|
|
402
|
+
if structured_evidence and structured_evidence.get("essentials"):
|
|
403
|
+
rule_lines.append("")
|
|
404
|
+
rule_lines.append("5) EVIDENCE-FIRST REQUIREMENT (CRITICAL - FAILURE TO COMPLY = VALIDATION FAILURE):")
|
|
405
|
+
rule_lines.append(" • If MANDATORY CONTENT appears above, it MUST be in your output")
|
|
406
|
+
rule_lines.append(" • Installation commands: Copy EXACTLY, use code blocks")
|
|
407
|
+
rule_lines.append(" • Quickstart code: Include COMPLETE example, not paraphrased")
|
|
408
|
+
rule_lines.append(" • Order: Mandatory content FIRST, then optional details")
|
|
409
|
+
rule_lines.append(" • Validation will REJECT output missing mandatory content")
|
|
410
|
+
|
|
411
|
+
lower_name = section_name.lower()
|
|
412
|
+
|
|
413
|
+
if "usage" in lower_name or "api" in lower_name:
|
|
414
|
+
rule_lines.append("")
|
|
415
|
+
rule_lines.append("6) RATIONALE REQUIREMENT:")
|
|
416
|
+
rule_lines.append(" • Explain at least one design choice, trade-off, or side effect drawn from the rationale block.")
|
|
417
|
+
rule_lines.append(" • Cite that explanation using [CITE:rationale] (this citation is mandatory).")
|
|
418
|
+
|
|
419
|
+
# Build final prompt with citation reminder
|
|
420
|
+
citation_reminder = ""
|
|
421
|
+
if rules.get("require_citations", True):
|
|
422
|
+
citation_reminder = f"""
|
|
423
|
+
|
|
424
|
+
REMINDER: Your output MUST include at least {min_citations} citation(s) in the format {citation_style}.
|
|
425
|
+
Citations should reference the evidence provided above. Without citations, your output will be rejected."""
|
|
426
|
+
|
|
427
|
+
return "\n".join([
|
|
428
|
+
"Based on the following evidence, write the requested section.",
|
|
429
|
+
"",
|
|
430
|
+
evidence_text,
|
|
431
|
+
"",
|
|
432
|
+
"Instructions:",
|
|
433
|
+
instructions.strip(),
|
|
434
|
+
"",
|
|
435
|
+
"\n".join([r for r in rule_lines if r]),
|
|
436
|
+
citation_reminder,
|
|
437
|
+
])
|
|
438
|
+
|
|
439
|
+
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
logger = logging.getLogger(__name__)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TextGenerator:
|
|
9
|
+
def generate(self, system_prompt: str, user_prompt: str, model: Optional[str] = None, temperature: float = 0.2, max_tokens: int = 1200) -> str:
|
|
10
|
+
raise NotImplementedError
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class OpenAITextGenerator(TextGenerator):
|
|
14
|
+
def __init__(self, api_key: Optional[str] = None, default_model: str = "gpt-4o-mini"):
|
|
15
|
+
try:
|
|
16
|
+
from openai import OpenAI # type: ignore
|
|
17
|
+
except Exception as e:
|
|
18
|
+
raise RuntimeError("OpenAI SDK not installed. pip install openai") from e
|
|
19
|
+
self._OpenAI = OpenAI
|
|
20
|
+
self.api_key = api_key or os.getenv("OPENAI_API_KEY")
|
|
21
|
+
if not self.api_key:
|
|
22
|
+
raise RuntimeError("OPENAI_API_KEY is required for OpenAI provider")
|
|
23
|
+
self.client = self._OpenAI(api_key=self.api_key)
|
|
24
|
+
self.default_model = default_model
|
|
25
|
+
|
|
26
|
+
def generate(self, system_prompt: str, user_prompt: str, model: Optional[str] = None, temperature: float = 0.2, max_tokens: int = 1200) -> str:
|
|
27
|
+
mdl = model or self.default_model
|
|
28
|
+
resp = self.client.chat.completions.create(
|
|
29
|
+
model=mdl,
|
|
30
|
+
temperature=temperature,
|
|
31
|
+
max_tokens=max_tokens,
|
|
32
|
+
messages=[
|
|
33
|
+
{"role": "system", "content": system_prompt},
|
|
34
|
+
{"role": "user", "content": user_prompt},
|
|
35
|
+
],
|
|
36
|
+
)
|
|
37
|
+
content = resp.choices[0].message.content or ""
|
|
38
|
+
if not content.strip():
|
|
39
|
+
raise RuntimeError("OpenAI returned empty content")
|
|
40
|
+
return content
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class AnthropicTextGenerator(TextGenerator):
|
|
44
|
+
def __init__(self, api_key: Optional[str] = None, default_model: str = "claude-sonnet-4-6"):
|
|
45
|
+
try:
|
|
46
|
+
import anthropic # type: ignore
|
|
47
|
+
except Exception as e:
|
|
48
|
+
raise RuntimeError("Anthropic SDK not installed. pip install anthropic") from e
|
|
49
|
+
self._anthropic = anthropic
|
|
50
|
+
self.api_key = api_key or os.getenv("ANTHROPIC_API_KEY")
|
|
51
|
+
if not self.api_key:
|
|
52
|
+
raise RuntimeError("ANTHROPIC_API_KEY is required for Anthropic provider")
|
|
53
|
+
self.client = anthropic.Anthropic(api_key=self.api_key)
|
|
54
|
+
self.default_model = default_model
|
|
55
|
+
|
|
56
|
+
def generate(self, system_prompt: str, user_prompt: str, model: Optional[str] = None, temperature: float = 0.2, max_tokens: int = 1200) -> str:
|
|
57
|
+
mdl = model or self.default_model
|
|
58
|
+
msg = self.client.messages.create(
|
|
59
|
+
model=mdl,
|
|
60
|
+
max_tokens=max_tokens,
|
|
61
|
+
temperature=temperature,
|
|
62
|
+
system=system_prompt,
|
|
63
|
+
messages=[{"role": "user", "content": user_prompt}],
|
|
64
|
+
)
|
|
65
|
+
parts = getattr(msg, "content", [])
|
|
66
|
+
text = ""
|
|
67
|
+
for p in parts:
|
|
68
|
+
if getattr(p, "type", "") == "text":
|
|
69
|
+
text += getattr(p, "text", "")
|
|
70
|
+
if not text.strip():
|
|
71
|
+
raise RuntimeError("Anthropic returned empty content")
|
|
72
|
+
return text
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def create_generator(provider: Optional[str] = None, model: Optional[str] = None) -> TextGenerator:
|
|
76
|
+
"""
|
|
77
|
+
Provider selection with Anthropic-first detection when unspecified.
|
|
78
|
+
Order:
|
|
79
|
+
1) Explicit provider arg if provided
|
|
80
|
+
2) V2D_LLM_PROVIDER env
|
|
81
|
+
3) If ANTHROPIC_API_KEY set -> Anthropic
|
|
82
|
+
4) Else if OPENAI_API_KEY set -> OpenAI
|
|
83
|
+
5) Else fail fast
|
|
84
|
+
|
|
85
|
+
Model selection order:
|
|
86
|
+
1) Explicit model arg if provided
|
|
87
|
+
2) VIBE2DOC_MODEL env variable
|
|
88
|
+
3) Provider's default model
|
|
89
|
+
"""
|
|
90
|
+
prov = (provider or os.getenv("V2D_LLM_PROVIDER") or "").lower().strip()
|
|
91
|
+
|
|
92
|
+
if not prov:
|
|
93
|
+
if os.getenv("ANTHROPIC_API_KEY"):
|
|
94
|
+
prov = "anthropic"
|
|
95
|
+
elif os.getenv("OPENAI_API_KEY"):
|
|
96
|
+
prov = "openai"
|
|
97
|
+
|
|
98
|
+
# Read model from environment if not explicitly provided
|
|
99
|
+
model_to_use = model or os.getenv("VIBE2DOC_MODEL")
|
|
100
|
+
|
|
101
|
+
if prov == "anthropic":
|
|
102
|
+
gen = AnthropicTextGenerator()
|
|
103
|
+
if model_to_use:
|
|
104
|
+
gen.default_model = model_to_use
|
|
105
|
+
return gen
|
|
106
|
+
elif prov == "openai":
|
|
107
|
+
gen = OpenAITextGenerator()
|
|
108
|
+
if model_to_use:
|
|
109
|
+
gen.default_model = model_to_use
|
|
110
|
+
return gen
|
|
111
|
+
|
|
112
|
+
raise RuntimeError("No LLM provider configured. Set V2D_LLM_PROVIDER or ANTHROPIC_API_KEY/OPENAI_API_KEY")
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
|