code-finder 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_context/__init__.py +33 -0
- claude_context/agentic_integration.py +309 -0
- claude_context/ast_chunker.py +646 -0
- claude_context/config.py +239 -0
- claude_context/context_manager.py +627 -0
- claude_context/embeddings.py +307 -0
- claude_context/embeddings_interface.py +226 -0
- claude_context/enhanced_ast_chunker.py +1129 -0
- claude_context/explorer.py +951 -0
- claude_context/explorer_with_context.py +1008 -0
- claude_context/indexer.py +893 -0
- claude_context/markdown_chunker.py +421 -0
- claude_context/mode_handler.py +1774 -0
- claude_context/query_metrics.py +164 -0
- claude_context/question_generator.py +800 -0
- claude_context/readme_extractor.py +485 -0
- claude_context/repository_adapter.py +399 -0
- claude_context/search.py +493 -0
- claude_context/skills/__init__.py +11 -0
- claude_context/skills/_cli_common.py +74 -0
- claude_context/skills/_index_manager.py +98 -0
- claude_context/skills/api_surface.py +219 -0
- claude_context/skills/evidence_retrieval.py +151 -0
- claude_context/skills/grounded_review.py +212 -0
- claude_context/synthesis/__init__.py +8 -0
- claude_context/synthesis/editor_agent.py +391 -0
- claude_context/synthesis/llm_synthesizer.py +153 -0
- claude_context/synthesis/logic_explainer.py +235 -0
- claude_context/synthesis/multi_review_pipeline.py +717 -0
- claude_context/synthesis/prompt_builder.py +439 -0
- claude_context/synthesis/providers.py +115 -0
- claude_context/synthesis/validators.py +458 -0
- code_finder-0.1.0.dist-info/METADATA +823 -0
- code_finder-0.1.0.dist-info/RECORD +37 -0
- code_finder-0.1.0.dist-info/WHEEL +5 -0
- code_finder-0.1.0.dist-info/entry_points.txt +4 -0
- code_finder-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,800 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Question Generator: Auto-generate context-seeking questions from synthesis templates.
|
|
3
|
+
|
|
4
|
+
This module bridges the gap between what templates need and what questions should be asked
|
|
5
|
+
in interactive mode to gather that information.
|
|
6
|
+
|
|
7
|
+
Key features:
|
|
8
|
+
- Template-driven question generation
|
|
9
|
+
- Project type detection (API client, ML framework, CLI tool, etc.)
|
|
10
|
+
- Adaptive questions based on codebase structure
|
|
11
|
+
- README-aware question customization
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from typing import List, Dict, Optional, Tuple, Set
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
from enum import Enum
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
import re
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class QuestionPriority(Enum):
|
|
22
|
+
"""Priority levels for questions in the discovery flow."""
|
|
23
|
+
CRITICAL = "critical" # Must ask these first (purpose, users)
|
|
24
|
+
HIGH = "high" # Architecture, key features
|
|
25
|
+
MEDIUM = "medium" # Implementation patterns
|
|
26
|
+
LOW = "low" # Nice-to-have details
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ProjectType(Enum):
|
|
30
|
+
"""Detected project types based on codebase analysis."""
|
|
31
|
+
API_CLIENT = "api_client" # HTTP client libraries
|
|
32
|
+
ML_FRAMEWORK = "ml_framework" # Machine learning frameworks
|
|
33
|
+
WEB_FRAMEWORK = "web_framework" # Web servers/APIs
|
|
34
|
+
CLI_TOOL = "cli_tool" # Command-line tools
|
|
35
|
+
DATA_PIPELINE = "data_pipeline" # Data processing/ETL
|
|
36
|
+
LIBRARY = "library" # General purpose library
|
|
37
|
+
APPLICATION = "application" # Standalone application
|
|
38
|
+
UNKNOWN = "unknown" # Cannot determine
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class QuestionFocus(Enum):
|
|
42
|
+
"""Question focus: user-facing vs technical deep-dive."""
|
|
43
|
+
USER_FOCUSED = "user_focused" # Quickstart, install, examples
|
|
44
|
+
TECHNICAL = "technical" # Architecture, internals, patterns
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass
|
|
48
|
+
class GeneratedQuestion:
|
|
49
|
+
"""A question auto-generated from template requirements."""
|
|
50
|
+
text: str
|
|
51
|
+
priority: QuestionPriority
|
|
52
|
+
section_target: str # Which template section this helps populate
|
|
53
|
+
evidence_sources: List[str] # Where to look for answers (README, code, etc.)
|
|
54
|
+
rationale: str # Why this question matters
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def detect_project_type(repository_path: str, readme_content: Optional[str] = None) -> ProjectType:
|
|
58
|
+
"""
|
|
59
|
+
Detect project type from repository structure and README content.
|
|
60
|
+
|
|
61
|
+
Uses multiple signals:
|
|
62
|
+
1. README keywords and descriptions (most reliable)
|
|
63
|
+
2. File structure patterns
|
|
64
|
+
3. Common import patterns
|
|
65
|
+
4. Package naming conventions
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
repository_path: Path to repository root
|
|
69
|
+
readme_content: Optional README content (if already loaded)
|
|
70
|
+
|
|
71
|
+
Returns:
|
|
72
|
+
Detected ProjectType
|
|
73
|
+
"""
|
|
74
|
+
repo_path = Path(repository_path)
|
|
75
|
+
|
|
76
|
+
# Signal scores for each type
|
|
77
|
+
scores: Dict[ProjectType, int] = {ptype: 0 for ptype in ProjectType}
|
|
78
|
+
|
|
79
|
+
# 1. Analyze README (highest weight: 5 points per match)
|
|
80
|
+
if readme_content:
|
|
81
|
+
readme_lower = readme_content.lower()
|
|
82
|
+
|
|
83
|
+
# API Client indicators
|
|
84
|
+
if any(phrase in readme_lower for phrase in [
|
|
85
|
+
"api client", "client library", "rest client", "http client",
|
|
86
|
+
"client for", "sdk for", "provides access to"
|
|
87
|
+
]):
|
|
88
|
+
scores[ProjectType.API_CLIENT] += 5
|
|
89
|
+
|
|
90
|
+
# ML Framework indicators
|
|
91
|
+
if any(phrase in readme_lower for phrase in [
|
|
92
|
+
"machine learning", "deep learning", "neural network",
|
|
93
|
+
"training", "inference", "model", "dataset", "pytorch", "tensorflow"
|
|
94
|
+
]):
|
|
95
|
+
scores[ProjectType.ML_FRAMEWORK] += 5
|
|
96
|
+
|
|
97
|
+
# Web Framework indicators
|
|
98
|
+
if any(phrase in readme_lower for phrase in [
|
|
99
|
+
"web framework", "rest api", "web server", "http server",
|
|
100
|
+
"api endpoints", "routes", "fastapi", "flask", "django"
|
|
101
|
+
]):
|
|
102
|
+
scores[ProjectType.WEB_FRAMEWORK] += 5
|
|
103
|
+
|
|
104
|
+
# CLI Tool indicators
|
|
105
|
+
if any(phrase in readme_lower for phrase in [
|
|
106
|
+
"command line", "cli tool", "command-line interface",
|
|
107
|
+
"terminal", "shell", "commands"
|
|
108
|
+
]):
|
|
109
|
+
scores[ProjectType.CLI_TOOL] += 5
|
|
110
|
+
|
|
111
|
+
# Data Pipeline indicators
|
|
112
|
+
if any(phrase in readme_lower for phrase in [
|
|
113
|
+
"data pipeline", "etl", "data processing", "workflow",
|
|
114
|
+
"data transformation", "airflow", "dagster"
|
|
115
|
+
]):
|
|
116
|
+
scores[ProjectType.DATA_PIPELINE] += 5
|
|
117
|
+
|
|
118
|
+
# 2. Analyze file structure (medium weight: 2 points per match)
|
|
119
|
+
if repo_path.exists():
|
|
120
|
+
# Look for characteristic files
|
|
121
|
+
file_patterns = {
|
|
122
|
+
ProjectType.API_CLIENT: [
|
|
123
|
+
"**/client.py", "**/api_client.py", "**/*_client.py",
|
|
124
|
+
"**/http_client.py", "**/rest_client.py"
|
|
125
|
+
],
|
|
126
|
+
ProjectType.ML_FRAMEWORK: [
|
|
127
|
+
"**/model.py", "**/models.py", "**/trainer.py",
|
|
128
|
+
"**/training.py", "**/dataset.py", "**/datasets.py"
|
|
129
|
+
],
|
|
130
|
+
ProjectType.WEB_FRAMEWORK: [
|
|
131
|
+
"**/app.py", "**/routes.py", "**/views.py",
|
|
132
|
+
"**/api.py", "**/endpoints.py", "**/server.py"
|
|
133
|
+
],
|
|
134
|
+
ProjectType.CLI_TOOL: [
|
|
135
|
+
"**/cli.py", "**/commands.py", "**/__main__.py",
|
|
136
|
+
"**/main.py", "**/console.py"
|
|
137
|
+
],
|
|
138
|
+
ProjectType.DATA_PIPELINE: [
|
|
139
|
+
"**/pipeline.py", "**/dag.py", "**/workflow.py",
|
|
140
|
+
"**/etl.py", "**/tasks.py"
|
|
141
|
+
],
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
for proj_type, patterns in file_patterns.items():
|
|
145
|
+
for pattern in patterns:
|
|
146
|
+
matches = list(repo_path.glob(pattern))
|
|
147
|
+
if matches:
|
|
148
|
+
scores[proj_type] += 2
|
|
149
|
+
break # Count each pattern type once
|
|
150
|
+
|
|
151
|
+
# 3. Analyze package/project name (low weight: 1 point)
|
|
152
|
+
project_name = repo_path.name.lower()
|
|
153
|
+
|
|
154
|
+
if "client" in project_name or "sdk" in project_name:
|
|
155
|
+
scores[ProjectType.API_CLIENT] += 1
|
|
156
|
+
if "cli" in project_name or "cmd" in project_name:
|
|
157
|
+
scores[ProjectType.CLI_TOOL] += 1
|
|
158
|
+
if "api" in project_name or "server" in project_name:
|
|
159
|
+
scores[ProjectType.WEB_FRAMEWORK] += 1
|
|
160
|
+
|
|
161
|
+
# 4. Determine winner
|
|
162
|
+
max_score = max(scores.values())
|
|
163
|
+
|
|
164
|
+
if max_score == 0:
|
|
165
|
+
return ProjectType.UNKNOWN
|
|
166
|
+
|
|
167
|
+
# Get type with highest score
|
|
168
|
+
detected_type = max(scores.items(), key=lambda x: x[1])[0]
|
|
169
|
+
|
|
170
|
+
# If score is very low and it's LIBRARY, mark as UNKNOWN
|
|
171
|
+
if detected_type == ProjectType.LIBRARY and max_score < 3:
|
|
172
|
+
return ProjectType.UNKNOWN
|
|
173
|
+
|
|
174
|
+
return detected_type
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
class TemplateQuestionGenerator:
|
|
178
|
+
"""
|
|
179
|
+
Generates targeted questions from synthesis template specifications.
|
|
180
|
+
|
|
181
|
+
The key insight: Template instructions tell us WHAT content we need.
|
|
182
|
+
We can reverse-engineer QUESTIONS that will gather that content.
|
|
183
|
+
|
|
184
|
+
Now with adaptive question generation based on project type detection!
|
|
185
|
+
"""
|
|
186
|
+
|
|
187
|
+
# Mapping of instruction keywords to question templates (BASE PATTERNS)
|
|
188
|
+
INSTRUCTION_PATTERNS = {
|
|
189
|
+
# Purpose and Problem
|
|
190
|
+
"purpose": {
|
|
191
|
+
"questions": [
|
|
192
|
+
"What is the main purpose of this project according to the README or documentation?",
|
|
193
|
+
"What specific problem does this codebase solve?",
|
|
194
|
+
],
|
|
195
|
+
"priority": QuestionPriority.CRITICAL,
|
|
196
|
+
"sources": ["README*", "docs/", "**/__init__.py"]
|
|
197
|
+
},
|
|
198
|
+
|
|
199
|
+
# Target Audience
|
|
200
|
+
"target audience": {
|
|
201
|
+
"questions": [
|
|
202
|
+
"Who is the intended audience or user base for this project?",
|
|
203
|
+
"What types of developers or organizations would use this?",
|
|
204
|
+
],
|
|
205
|
+
"priority": QuestionPriority.CRITICAL,
|
|
206
|
+
"sources": ["README*", "docs/", "CONTRIBUTING*"]
|
|
207
|
+
},
|
|
208
|
+
|
|
209
|
+
# Use Cases
|
|
210
|
+
"use case": {
|
|
211
|
+
"questions": [
|
|
212
|
+
"What are the primary use cases for this project as described in documentation?",
|
|
213
|
+
"What real-world problems can users solve with this?",
|
|
214
|
+
],
|
|
215
|
+
"priority": QuestionPriority.CRITICAL,
|
|
216
|
+
"sources": ["README*", "docs/examples/", "examples/"]
|
|
217
|
+
},
|
|
218
|
+
|
|
219
|
+
# Key Features
|
|
220
|
+
"key feature": {
|
|
221
|
+
"questions": [
|
|
222
|
+
"What are the key features or capabilities highlighted in the README?",
|
|
223
|
+
"What makes this project unique or different from alternatives?",
|
|
224
|
+
],
|
|
225
|
+
"priority": QuestionPriority.HIGH,
|
|
226
|
+
"sources": ["README*", "docs/", "CHANGELOG*"]
|
|
227
|
+
},
|
|
228
|
+
|
|
229
|
+
# Architecture
|
|
230
|
+
"architecture": {
|
|
231
|
+
"questions": [
|
|
232
|
+
"What is the high-level architecture of this system?",
|
|
233
|
+
"What are the main components and how do they relate?",
|
|
234
|
+
],
|
|
235
|
+
"priority": QuestionPriority.HIGH,
|
|
236
|
+
"sources": ["docs/architecture/", "README*", "**/__init__.py"]
|
|
237
|
+
},
|
|
238
|
+
|
|
239
|
+
# Components
|
|
240
|
+
"component": {
|
|
241
|
+
"questions": [
|
|
242
|
+
"What are the core components or modules in this codebase?",
|
|
243
|
+
"How are components organized and what are their responsibilities?",
|
|
244
|
+
],
|
|
245
|
+
"priority": QuestionPriority.HIGH,
|
|
246
|
+
"sources": ["src/", "lib/", "**/__init__.py"]
|
|
247
|
+
},
|
|
248
|
+
|
|
249
|
+
# API/Interface
|
|
250
|
+
"api": {
|
|
251
|
+
"questions": [
|
|
252
|
+
"What are the main public APIs or interfaces provided?",
|
|
253
|
+
"How do users interact with this library programmatically?",
|
|
254
|
+
],
|
|
255
|
+
"priority": QuestionPriority.MEDIUM,
|
|
256
|
+
"sources": ["README*", "docs/api/", "**/__init__.py"]
|
|
257
|
+
},
|
|
258
|
+
|
|
259
|
+
# Authentication
|
|
260
|
+
"authentication": {
|
|
261
|
+
"questions": [
|
|
262
|
+
"What authentication mechanisms are supported?",
|
|
263
|
+
"How do users configure authentication?",
|
|
264
|
+
],
|
|
265
|
+
"priority": QuestionPriority.MEDIUM,
|
|
266
|
+
"sources": ["README*", "docs/", "**/auth*"]
|
|
267
|
+
},
|
|
268
|
+
|
|
269
|
+
# Installation
|
|
270
|
+
"installation": {
|
|
271
|
+
"questions": [
|
|
272
|
+
"How do users install this project?",
|
|
273
|
+
"What are the dependencies and requirements?",
|
|
274
|
+
],
|
|
275
|
+
"priority": QuestionPriority.MEDIUM,
|
|
276
|
+
"sources": ["README*", "setup.py", "pyproject.toml", "requirements.txt"]
|
|
277
|
+
},
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
# Type-specific question patterns (ADAPTIVE PATTERNS)
|
|
281
|
+
TYPE_SPECIFIC_PATTERNS = {
|
|
282
|
+
ProjectType.API_CLIENT: {
|
|
283
|
+
"client": {
|
|
284
|
+
"questions": [
|
|
285
|
+
"What API or service does this client library connect to?",
|
|
286
|
+
"How do users initialize and configure the client?",
|
|
287
|
+
],
|
|
288
|
+
"priority": QuestionPriority.CRITICAL,
|
|
289
|
+
"sources": ["README*", "**/client.py"]
|
|
290
|
+
},
|
|
291
|
+
"authentication": {
|
|
292
|
+
"questions": [
|
|
293
|
+
"What authentication methods does this client support?",
|
|
294
|
+
"How do users provide API credentials?",
|
|
295
|
+
],
|
|
296
|
+
"priority": QuestionPriority.HIGH,
|
|
297
|
+
"sources": ["README*", "**/auth*"]
|
|
298
|
+
},
|
|
299
|
+
"resources": {
|
|
300
|
+
"questions": [
|
|
301
|
+
"What API resources or endpoints are available through this client?",
|
|
302
|
+
"What operations can users perform with this client?",
|
|
303
|
+
],
|
|
304
|
+
"priority": QuestionPriority.HIGH,
|
|
305
|
+
"sources": ["README*", "**/*.py"]
|
|
306
|
+
},
|
|
307
|
+
},
|
|
308
|
+
ProjectType.ML_FRAMEWORK: {
|
|
309
|
+
"models": {
|
|
310
|
+
"questions": [
|
|
311
|
+
"What types of machine learning models does this framework support?",
|
|
312
|
+
"What model architectures are provided or supported?",
|
|
313
|
+
],
|
|
314
|
+
"priority": QuestionPriority.CRITICAL,
|
|
315
|
+
"sources": ["README*", "**/model*.py"]
|
|
316
|
+
},
|
|
317
|
+
"training": {
|
|
318
|
+
"questions": [
|
|
319
|
+
"What training capabilities does this framework provide?",
|
|
320
|
+
"How do users train models with this framework?",
|
|
321
|
+
],
|
|
322
|
+
"priority": QuestionPriority.HIGH,
|
|
323
|
+
"sources": ["README*", "**/train*.py", "**/trainer.py"]
|
|
324
|
+
},
|
|
325
|
+
"inference": {
|
|
326
|
+
"questions": [
|
|
327
|
+
"How does model inference work in this framework?",
|
|
328
|
+
"What inference APIs or methods are provided?",
|
|
329
|
+
],
|
|
330
|
+
"priority": QuestionPriority.HIGH,
|
|
331
|
+
"sources": ["README*", "**/infer*.py", "**/predict*.py"]
|
|
332
|
+
},
|
|
333
|
+
},
|
|
334
|
+
ProjectType.WEB_FRAMEWORK: {
|
|
335
|
+
"endpoints": {
|
|
336
|
+
"questions": [
|
|
337
|
+
"What types of web APIs or endpoints does this framework help create?",
|
|
338
|
+
"How do developers define routes or endpoints?",
|
|
339
|
+
],
|
|
340
|
+
"priority": QuestionPriority.CRITICAL,
|
|
341
|
+
"sources": ["README*", "**/routes.py", "**/api.py"]
|
|
342
|
+
},
|
|
343
|
+
"middleware": {
|
|
344
|
+
"questions": [
|
|
345
|
+
"What middleware or request handling features are provided?",
|
|
346
|
+
"How does request/response processing work?",
|
|
347
|
+
],
|
|
348
|
+
"priority": QuestionPriority.HIGH,
|
|
349
|
+
"sources": ["README*", "**/middleware.py"]
|
|
350
|
+
},
|
|
351
|
+
},
|
|
352
|
+
ProjectType.CLI_TOOL: {
|
|
353
|
+
"commands": {
|
|
354
|
+
"questions": [
|
|
355
|
+
"What commands does this CLI tool provide?",
|
|
356
|
+
"How do users invoke the main functionality?",
|
|
357
|
+
],
|
|
358
|
+
"priority": QuestionPriority.CRITICAL,
|
|
359
|
+
"sources": ["README*", "**/cli.py", "**/commands.py"]
|
|
360
|
+
},
|
|
361
|
+
"options": {
|
|
362
|
+
"questions": [
|
|
363
|
+
"What command-line options and arguments are available?",
|
|
364
|
+
"What configuration can users control via CLI flags?",
|
|
365
|
+
],
|
|
366
|
+
"priority": QuestionPriority.HIGH,
|
|
367
|
+
"sources": ["README*", "**/cli.py"]
|
|
368
|
+
},
|
|
369
|
+
},
|
|
370
|
+
ProjectType.DATA_PIPELINE: {
|
|
371
|
+
"pipeline": {
|
|
372
|
+
"questions": [
|
|
373
|
+
"What data processing pipelines or workflows does this support?",
|
|
374
|
+
"How do users define and run data pipelines?",
|
|
375
|
+
],
|
|
376
|
+
"priority": QuestionPriority.CRITICAL,
|
|
377
|
+
"sources": ["README*", "**/pipeline.py", "**/dag.py"]
|
|
378
|
+
},
|
|
379
|
+
"transforms": {
|
|
380
|
+
"questions": [
|
|
381
|
+
"What data transformation capabilities are provided?",
|
|
382
|
+
"How does data flow through the pipeline?",
|
|
383
|
+
],
|
|
384
|
+
"priority": QuestionPriority.HIGH,
|
|
385
|
+
"sources": ["README*", "**/transform*.py"]
|
|
386
|
+
},
|
|
387
|
+
},
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
# User-focused question patterns (for README best practices)
|
|
391
|
+
USER_FOCUSED_PATTERNS = {
|
|
392
|
+
ProjectType.API_CLIENT: {
|
|
393
|
+
"summary": {
|
|
394
|
+
"questions": [
|
|
395
|
+
"What is the one-sentence summary of this project from the README?",
|
|
396
|
+
],
|
|
397
|
+
"priority": QuestionPriority.CRITICAL,
|
|
398
|
+
"sources": ["README*"]
|
|
399
|
+
},
|
|
400
|
+
"install": {
|
|
401
|
+
"questions": [
|
|
402
|
+
"What is the exact installation command shown in the README?",
|
|
403
|
+
"What are the system requirements mentioned in the README (Python version, OS)?",
|
|
404
|
+
],
|
|
405
|
+
"priority": QuestionPriority.CRITICAL,
|
|
406
|
+
"sources": ["README*"]
|
|
407
|
+
},
|
|
408
|
+
"quickstart": {
|
|
409
|
+
"questions": [
|
|
410
|
+
"What is the minimal working code example in the README?",
|
|
411
|
+
"What code examples are provided in the README for basic usage?",
|
|
412
|
+
],
|
|
413
|
+
"priority": QuestionPriority.CRITICAL,
|
|
414
|
+
"sources": ["README*"]
|
|
415
|
+
},
|
|
416
|
+
"auth_practical": {
|
|
417
|
+
"questions": [
|
|
418
|
+
"How do users provide authentication credentials according to the README?",
|
|
419
|
+
"What is the recommended way to handle API keys mentioned in README?",
|
|
420
|
+
],
|
|
421
|
+
"priority": QuestionPriority.HIGH,
|
|
422
|
+
"sources": ["README*"]
|
|
423
|
+
},
|
|
424
|
+
"configuration": {
|
|
425
|
+
"questions": [
|
|
426
|
+
"What configuration options or environment variables are documented?",
|
|
427
|
+
],
|
|
428
|
+
"priority": QuestionPriority.HIGH,
|
|
429
|
+
"sources": ["README*", ".env.example"]
|
|
430
|
+
},
|
|
431
|
+
},
|
|
432
|
+
ProjectType.ML_FRAMEWORK: {
|
|
433
|
+
"summary": {
|
|
434
|
+
"questions": [
|
|
435
|
+
"What is the one-sentence description of this ML framework from the README?",
|
|
436
|
+
],
|
|
437
|
+
"priority": QuestionPriority.CRITICAL,
|
|
438
|
+
"sources": ["README*"]
|
|
439
|
+
},
|
|
440
|
+
"install": {
|
|
441
|
+
"questions": [
|
|
442
|
+
"What is the installation command shown in the README?",
|
|
443
|
+
"What hardware requirements are mentioned (GPU, CPU, memory)?",
|
|
444
|
+
],
|
|
445
|
+
"priority": QuestionPriority.CRITICAL,
|
|
446
|
+
"sources": ["README*"]
|
|
447
|
+
},
|
|
448
|
+
"quickstart": {
|
|
449
|
+
"questions": [
|
|
450
|
+
"What is the basic training or inference example shown in the README?",
|
|
451
|
+
"What minimal code example is provided for getting started?",
|
|
452
|
+
],
|
|
453
|
+
"priority": QuestionPriority.CRITICAL,
|
|
454
|
+
"sources": ["README*", "examples/"]
|
|
455
|
+
},
|
|
456
|
+
"models": {
|
|
457
|
+
"questions": [
|
|
458
|
+
"What pretrained models or checkpoints are available according to README?",
|
|
459
|
+
],
|
|
460
|
+
"priority": QuestionPriority.HIGH,
|
|
461
|
+
"sources": ["README*"]
|
|
462
|
+
},
|
|
463
|
+
},
|
|
464
|
+
ProjectType.CLI_TOOL: {
|
|
465
|
+
"summary": {
|
|
466
|
+
"questions": [
|
|
467
|
+
"What is the one-sentence description of this CLI tool from the README?",
|
|
468
|
+
],
|
|
469
|
+
"priority": QuestionPriority.CRITICAL,
|
|
470
|
+
"sources": ["README*"]
|
|
471
|
+
},
|
|
472
|
+
"install": {
|
|
473
|
+
"questions": [
|
|
474
|
+
"What is the installation command for this CLI tool?",
|
|
475
|
+
],
|
|
476
|
+
"priority": QuestionPriority.CRITICAL,
|
|
477
|
+
"sources": ["README*"]
|
|
478
|
+
},
|
|
479
|
+
"quickstart": {
|
|
480
|
+
"questions": [
|
|
481
|
+
"What is the most basic command users would run according to README?",
|
|
482
|
+
"What common commands are shown in the README examples?",
|
|
483
|
+
],
|
|
484
|
+
"priority": QuestionPriority.CRITICAL,
|
|
485
|
+
"sources": ["README*"]
|
|
486
|
+
},
|
|
487
|
+
"configuration": {
|
|
488
|
+
"questions": [
|
|
489
|
+
"What CLI flags or configuration options are documented?",
|
|
490
|
+
],
|
|
491
|
+
"priority": QuestionPriority.HIGH,
|
|
492
|
+
"sources": ["README*"]
|
|
493
|
+
},
|
|
494
|
+
},
|
|
495
|
+
ProjectType.WEB_FRAMEWORK: {
|
|
496
|
+
"summary": {
|
|
497
|
+
"questions": [
|
|
498
|
+
"What is the one-sentence description of this framework from the README?",
|
|
499
|
+
],
|
|
500
|
+
"priority": QuestionPriority.CRITICAL,
|
|
501
|
+
"sources": ["README*"]
|
|
502
|
+
},
|
|
503
|
+
"install": {
|
|
504
|
+
"questions": [
|
|
505
|
+
"What is the installation command shown in the README?",
|
|
506
|
+
],
|
|
507
|
+
"priority": QuestionPriority.CRITICAL,
|
|
508
|
+
"sources": ["README*"]
|
|
509
|
+
},
|
|
510
|
+
"quickstart": {
|
|
511
|
+
"questions": [
|
|
512
|
+
"What is the minimal 'Hello World' example shown in the README?",
|
|
513
|
+
"How do users create and run a basic server according to examples?",
|
|
514
|
+
],
|
|
515
|
+
"priority": QuestionPriority.CRITICAL,
|
|
516
|
+
"sources": ["README*", "examples/"]
|
|
517
|
+
},
|
|
518
|
+
},
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
def generate_user_focused_questions(
|
|
522
|
+
self,
|
|
523
|
+
repository_path: str,
|
|
524
|
+
readme_content: Optional[str] = None,
|
|
525
|
+
project_type: Optional[ProjectType] = None,
|
|
526
|
+
max_questions: int = 10
|
|
527
|
+
) -> List[GeneratedQuestion]:
|
|
528
|
+
"""
|
|
529
|
+
Generate user-focused questions following README best practices.
|
|
530
|
+
|
|
531
|
+
Prioritizes: Quickstart > Installation > Usage > Architecture
|
|
532
|
+
Focus: Getting users to success in 10 minutes
|
|
533
|
+
|
|
534
|
+
Args:
|
|
535
|
+
repository_path: Path to repository
|
|
536
|
+
readme_content: Optional README content
|
|
537
|
+
project_type: Optional pre-detected project type
|
|
538
|
+
max_questions: Maximum questions to generate
|
|
539
|
+
|
|
540
|
+
Returns:
|
|
541
|
+
List of user-focused questions prioritized for first-time users
|
|
542
|
+
"""
|
|
543
|
+
# Detect project type if not provided
|
|
544
|
+
if project_type is None:
|
|
545
|
+
project_type = detect_project_type(repository_path, readme_content)
|
|
546
|
+
|
|
547
|
+
questions: List[GeneratedQuestion] = []
|
|
548
|
+
|
|
549
|
+
# Get user-focused patterns for this project type
|
|
550
|
+
if project_type in self.USER_FOCUSED_PATTERNS:
|
|
551
|
+
patterns = self.USER_FOCUSED_PATTERNS[project_type]
|
|
552
|
+
|
|
553
|
+
for pattern_name, config in patterns.items():
|
|
554
|
+
for question_text in config["questions"]:
|
|
555
|
+
questions.append(
|
|
556
|
+
GeneratedQuestion(
|
|
557
|
+
text=question_text,
|
|
558
|
+
priority=config["priority"],
|
|
559
|
+
section_target=pattern_name.title(),
|
|
560
|
+
evidence_sources=config["sources"],
|
|
561
|
+
rationale=f"User-focused README: {pattern_name}"
|
|
562
|
+
)
|
|
563
|
+
)
|
|
564
|
+
|
|
565
|
+
# Deduplicate and sort by priority
|
|
566
|
+
seen = set()
|
|
567
|
+
unique = []
|
|
568
|
+
for q in questions:
|
|
569
|
+
if q.text not in seen:
|
|
570
|
+
seen.add(q.text)
|
|
571
|
+
unique.append(q)
|
|
572
|
+
|
|
573
|
+
priority_order = {
|
|
574
|
+
QuestionPriority.CRITICAL: 0,
|
|
575
|
+
QuestionPriority.HIGH: 1,
|
|
576
|
+
QuestionPriority.MEDIUM: 2,
|
|
577
|
+
QuestionPriority.LOW: 3,
|
|
578
|
+
}
|
|
579
|
+
unique.sort(key=lambda q: priority_order[q.priority])
|
|
580
|
+
|
|
581
|
+
return unique[:max_questions]
|
|
582
|
+
|
|
583
|
+
def generate_questions_from_template(
|
|
584
|
+
self,
|
|
585
|
+
template_spec: Dict,
|
|
586
|
+
repository_path: Optional[str] = None,
|
|
587
|
+
readme_content: Optional[str] = None,
|
|
588
|
+
max_questions: int = 15,
|
|
589
|
+
project_type: Optional[ProjectType] = None,
|
|
590
|
+
focus: QuestionFocus = QuestionFocus.TECHNICAL
|
|
591
|
+
) -> List[GeneratedQuestion]:
|
|
592
|
+
"""
|
|
593
|
+
Generate prioritized questions from a synthesis template.
|
|
594
|
+
|
|
595
|
+
NOW WITH ADAPTIVE QUESTION GENERATION based on project type!
|
|
596
|
+
NOW WITH USER-FOCUSED option for README best practices!
|
|
597
|
+
|
|
598
|
+
Args:
|
|
599
|
+
template_spec: The synthesis template configuration
|
|
600
|
+
repository_path: Path to repository for project type detection
|
|
601
|
+
readme_content: Optional README content for better detection
|
|
602
|
+
max_questions: Maximum number of questions to generate
|
|
603
|
+
project_type: Optional pre-detected project type (skips detection)
|
|
604
|
+
focus: USER_FOCUSED (quickstart first) or TECHNICAL (architecture first)
|
|
605
|
+
|
|
606
|
+
Returns:
|
|
607
|
+
List of questions, sorted by priority and adapted to project type
|
|
608
|
+
"""
|
|
609
|
+
generated_questions: List[GeneratedQuestion] = []
|
|
610
|
+
|
|
611
|
+
# Detect project type if not provided
|
|
612
|
+
if project_type is None and repository_path:
|
|
613
|
+
project_type = detect_project_type(repository_path, readme_content)
|
|
614
|
+
|
|
615
|
+
# Choose pattern set based on focus
|
|
616
|
+
if focus == QuestionFocus.USER_FOCUSED:
|
|
617
|
+
# Use user-focused patterns (installation, quickstart, examples)
|
|
618
|
+
all_patterns = dict(self.INSTRUCTION_PATTERNS)
|
|
619
|
+
|
|
620
|
+
if project_type and project_type in self.USER_FOCUSED_PATTERNS:
|
|
621
|
+
user_patterns = self.USER_FOCUSED_PATTERNS[project_type]
|
|
622
|
+
all_patterns.update(user_patterns)
|
|
623
|
+
|
|
624
|
+
print(f"🔍 Detected project type: {project_type.value}")
|
|
625
|
+
print(f" Using USER-FOCUSED question mode (quickstart first)")
|
|
626
|
+
print(f" Added {len(user_patterns)} user-focused question patterns")
|
|
627
|
+
else:
|
|
628
|
+
# Use technical patterns (architecture, internals)
|
|
629
|
+
all_patterns = dict(self.INSTRUCTION_PATTERNS)
|
|
630
|
+
|
|
631
|
+
if project_type and project_type in self.TYPE_SPECIFIC_PATTERNS:
|
|
632
|
+
type_patterns = self.TYPE_SPECIFIC_PATTERNS[project_type]
|
|
633
|
+
all_patterns.update(type_patterns)
|
|
634
|
+
|
|
635
|
+
print(f"🔍 Detected project type: {project_type.value}")
|
|
636
|
+
print(f" Using TECHNICAL question mode (architecture first)")
|
|
637
|
+
print(f" Added {len(type_patterns)} technical question patterns")
|
|
638
|
+
|
|
639
|
+
for section in template_spec.get("sections", []):
|
|
640
|
+
if not section.get("enabled", True):
|
|
641
|
+
continue
|
|
642
|
+
|
|
643
|
+
section_name = section.get("name", "")
|
|
644
|
+
instructions = section.get("instructions", "").lower()
|
|
645
|
+
|
|
646
|
+
# Match instruction patterns to generate relevant questions
|
|
647
|
+
for pattern, config in all_patterns.items():
|
|
648
|
+
if pattern in instructions:
|
|
649
|
+
for question_text in config["questions"]:
|
|
650
|
+
generated_questions.append(
|
|
651
|
+
GeneratedQuestion(
|
|
652
|
+
text=question_text,
|
|
653
|
+
priority=config["priority"],
|
|
654
|
+
section_target=section_name,
|
|
655
|
+
evidence_sources=config["sources"],
|
|
656
|
+
rationale=f"Needed for {section_name} section: {pattern} (type: {project_type.value if project_type else 'generic'})"
|
|
657
|
+
)
|
|
658
|
+
)
|
|
659
|
+
|
|
660
|
+
# Remove duplicates while preserving order
|
|
661
|
+
seen = set()
|
|
662
|
+
unique_questions = []
|
|
663
|
+
for q in generated_questions:
|
|
664
|
+
if q.text not in seen:
|
|
665
|
+
seen.add(q.text)
|
|
666
|
+
unique_questions.append(q)
|
|
667
|
+
|
|
668
|
+
# Sort by priority (critical first)
|
|
669
|
+
priority_order = {
|
|
670
|
+
QuestionPriority.CRITICAL: 0,
|
|
671
|
+
QuestionPriority.HIGH: 1,
|
|
672
|
+
QuestionPriority.MEDIUM: 2,
|
|
673
|
+
QuestionPriority.LOW: 3,
|
|
674
|
+
}
|
|
675
|
+
unique_questions.sort(key=lambda q: priority_order[q.priority])
|
|
676
|
+
|
|
677
|
+
return unique_questions[:max_questions]
|
|
678
|
+
|
|
679
|
+
def create_question_flow(
|
|
680
|
+
self,
|
|
681
|
+
template_spec: Dict,
|
|
682
|
+
repository_path: Optional[str] = None,
|
|
683
|
+
readme_content: Optional[str] = None
|
|
684
|
+
) -> Dict[str, List[GeneratedQuestion]]:
|
|
685
|
+
"""
|
|
686
|
+
Create a structured question flow with phases.
|
|
687
|
+
|
|
688
|
+
NOW WITH ADAPTIVE QUESTIONS based on project type!
|
|
689
|
+
|
|
690
|
+
Returns:
|
|
691
|
+
Dictionary with phases: "context", "architecture", "implementation"
|
|
692
|
+
"""
|
|
693
|
+
all_questions = self.generate_questions_from_template(
|
|
694
|
+
template_spec,
|
|
695
|
+
repository_path=repository_path,
|
|
696
|
+
readme_content=readme_content
|
|
697
|
+
)
|
|
698
|
+
|
|
699
|
+
flow = {
|
|
700
|
+
"context": [], # CRITICAL priority questions
|
|
701
|
+
"architecture": [], # HIGH priority questions
|
|
702
|
+
"implementation": [], # MEDIUM/LOW priority questions
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
for q in all_questions:
|
|
706
|
+
if q.priority == QuestionPriority.CRITICAL:
|
|
707
|
+
flow["context"].append(q)
|
|
708
|
+
elif q.priority == QuestionPriority.HIGH:
|
|
709
|
+
flow["architecture"].append(q)
|
|
710
|
+
else:
|
|
711
|
+
flow["implementation"].append(q)
|
|
712
|
+
|
|
713
|
+
return flow
|
|
714
|
+
|
|
715
|
+
|
|
716
|
+
def suggest_next_question(
|
|
717
|
+
qa_history: List[Tuple[str, str]],
|
|
718
|
+
template_spec: Dict,
|
|
719
|
+
current_phase: Optional[str] = None
|
|
720
|
+
) -> Optional[str]:
|
|
721
|
+
"""
|
|
722
|
+
Suggest the next question to ask based on history and template needs.
|
|
723
|
+
|
|
724
|
+
Args:
|
|
725
|
+
qa_history: List of (question, answer) tuples asked so far
|
|
726
|
+
template_spec: The synthesis template
|
|
727
|
+
current_phase: Current exploration phase
|
|
728
|
+
|
|
729
|
+
Returns:
|
|
730
|
+
Suggested question text or None if all covered
|
|
731
|
+
"""
|
|
732
|
+
generator = TemplateQuestionGenerator()
|
|
733
|
+
flow = generator.create_question_flow(template_spec)
|
|
734
|
+
|
|
735
|
+
asked_questions = {q for q, _ in qa_history}
|
|
736
|
+
|
|
737
|
+
# Determine current phase
|
|
738
|
+
if current_phase is None:
|
|
739
|
+
if len(qa_history) < 3:
|
|
740
|
+
current_phase = "context"
|
|
741
|
+
elif len(qa_history) < 8:
|
|
742
|
+
current_phase = "architecture"
|
|
743
|
+
else:
|
|
744
|
+
current_phase = "implementation"
|
|
745
|
+
|
|
746
|
+
# Find first unanswered question in current phase
|
|
747
|
+
for question in flow.get(current_phase, []):
|
|
748
|
+
if question.text not in asked_questions:
|
|
749
|
+
return question.text
|
|
750
|
+
|
|
751
|
+
# Move to next phase if current phase complete
|
|
752
|
+
phase_order = ["context", "architecture", "implementation"]
|
|
753
|
+
current_idx = phase_order.index(current_phase)
|
|
754
|
+
|
|
755
|
+
for next_phase in phase_order[current_idx + 1:]:
|
|
756
|
+
for question in flow[next_phase]:
|
|
757
|
+
if question.text not in asked_questions:
|
|
758
|
+
return question.text
|
|
759
|
+
|
|
760
|
+
return None # All questions covered
|
|
761
|
+
|
|
762
|
+
|
|
763
|
+
def generate_context_discovery_questions(project_type: str = "general") -> List[str]:
|
|
764
|
+
"""
|
|
765
|
+
Generate essential context discovery questions for any project.
|
|
766
|
+
|
|
767
|
+
These questions should be asked FIRST before any code-specific questions.
|
|
768
|
+
"""
|
|
769
|
+
base_questions = [
|
|
770
|
+
"What is the main purpose of this project according to the README or documentation?",
|
|
771
|
+
"What problem does this codebase solve for its users?",
|
|
772
|
+
"Who is the target audience for this project?",
|
|
773
|
+
"What are the primary use cases mentioned in the documentation?",
|
|
774
|
+
]
|
|
775
|
+
|
|
776
|
+
type_specific = {
|
|
777
|
+
"api_client": [
|
|
778
|
+
"What API or service does this client library connect to?",
|
|
779
|
+
"What are the main API resources or endpoints available?",
|
|
780
|
+
],
|
|
781
|
+
"ml_framework": [
|
|
782
|
+
"What type of machine learning models or tasks does this support?",
|
|
783
|
+
"What are the key training or inference capabilities?",
|
|
784
|
+
],
|
|
785
|
+
"web_framework": [
|
|
786
|
+
"What type of web applications is this framework designed for?",
|
|
787
|
+
"What are the core web development features provided?",
|
|
788
|
+
],
|
|
789
|
+
"cli_tool": [
|
|
790
|
+
"What commands or workflows does this CLI tool support?",
|
|
791
|
+
"What tasks can users automate with this tool?",
|
|
792
|
+
],
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
questions = base_questions.copy()
|
|
796
|
+
if project_type in type_specific:
|
|
797
|
+
questions.extend(type_specific[project_type])
|
|
798
|
+
|
|
799
|
+
return questions
|
|
800
|
+
|