@hustle-together/api-dev-tools 1.6.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -1
- package/commands/api-research.md +77 -0
- package/hooks/enforce-external-research.py +318 -0
- package/hooks/track-tool-use.py +70 -2
- package/package.json +1 -1
- package/templates/api-dev-state.json +3 -1
- package/templates/settings.json +12 -0
package/README.md
CHANGED
|
@@ -26,8 +26,9 @@ Five powerful slash commands for Claude Code:
|
|
|
26
26
|
- **`/api-status [endpoint]`** - Track implementation progress and phase completion
|
|
27
27
|
|
|
28
28
|
### Enforcement Hooks
|
|
29
|
-
|
|
29
|
+
Six Python hooks that provide **real programmatic guarantees**:
|
|
30
30
|
|
|
31
|
+
- **`enforce-external-research.py`** - (v1.7.0) Detects external API questions and requires research before answering
|
|
31
32
|
- **`enforce-research.py`** - Blocks API code writing until research is complete
|
|
32
33
|
- **`enforce-interview.py`** - Verifies user questions were actually asked (prevents self-answering)
|
|
33
34
|
- **`verify-implementation.py`** - Checks implementation matches interview requirements
|
|
@@ -445,6 +446,30 @@ The workflow now includes automatic detection of common implementation gaps:
|
|
|
445
446
|
|
|
446
447
|
**Fix:** `verify-implementation.py` warns when test files check env vars that don't match interview requirements.
|
|
447
448
|
|
|
449
|
+
### Gap 6: Training Data Reliance (v1.7.0+)
|
|
450
|
+
**Problem:** AI answers questions about external APIs from potentially outdated training data instead of researching first.
|
|
451
|
+
|
|
452
|
+
**Example:**
|
|
453
|
+
- User asks: "What providers does Vercel AI Gateway support?"
|
|
454
|
+
- AI answers from memory: "Groq not in gateway" (WRONG!)
|
|
455
|
+
- Reality: Groq has 4 models in the gateway (Llama variants)
|
|
456
|
+
|
|
457
|
+
**Fix:** New `UserPromptSubmit` hook (`enforce-external-research.py`) that:
|
|
458
|
+
1. Detects questions about external APIs/SDKs using pattern matching
|
|
459
|
+
2. Injects context requiring research before answering
|
|
460
|
+
3. Works for ANY API (Brandfetch, Stripe, Twilio, etc.) - not just specific ones
|
|
461
|
+
4. Auto-allows WebSearch and Context7 without permission prompts
|
|
462
|
+
|
|
463
|
+
```
|
|
464
|
+
USER: "What providers does Brandfetch API support?"
|
|
465
|
+
↓
|
|
466
|
+
HOOK: Detects "Brandfetch", "API", "providers"
|
|
467
|
+
↓
|
|
468
|
+
INJECTS: "RESEARCH REQUIRED: Use Context7/WebSearch before answering"
|
|
469
|
+
↓
|
|
470
|
+
CLAUDE: Researches first → Gives accurate answer
|
|
471
|
+
```
|
|
472
|
+
|
|
448
473
|
## 🔧 Requirements
|
|
449
474
|
|
|
450
475
|
- **Node.js** 14.0.0 or higher
|
package/commands/api-research.md
CHANGED
|
@@ -259,6 +259,83 @@ With thorough research:
|
|
|
259
259
|
- ✅ Robust implementation
|
|
260
260
|
- ✅ Better documentation
|
|
261
261
|
|
|
262
|
+
---
|
|
263
|
+
|
|
264
|
+
## Research-First Schema Design (MANDATORY)
|
|
265
|
+
|
|
266
|
+
### The Anti-Pattern: Schema-First Development
|
|
267
|
+
|
|
268
|
+
**NEVER DO THIS:**
|
|
269
|
+
- ❌ Define interfaces based on assumptions before researching
|
|
270
|
+
- ❌ Rely on training data for API capabilities
|
|
271
|
+
- ❌ Say "I think it supports..." without verification
|
|
272
|
+
- ❌ Build schemas from memory instead of documentation
|
|
273
|
+
|
|
274
|
+
**Real Example of Failure:**
|
|
275
|
+
- User asked: "What providers does Vercel AI Gateway support?"
|
|
276
|
+
- AI answered from memory: "Groq not in gateway"
|
|
277
|
+
- Reality: Groq has 4 models in the gateway (Llama variants)
|
|
278
|
+
- Root cause: No research was done before answering
|
|
279
|
+
|
|
280
|
+
### The Correct Pattern: Research-First
|
|
281
|
+
|
|
282
|
+
**ALWAYS DO THIS:**
|
|
283
|
+
|
|
284
|
+
**Step 1: Research the Source of Truth**
|
|
285
|
+
- Use Context7 (`mcp__context7__resolve-library-id` + `get-library-docs`) for SDK docs
|
|
286
|
+
- Use WebSearch for official provider documentation
|
|
287
|
+
- Query APIs directly when possible (don't assume)
|
|
288
|
+
- Check GitHub repositories for current implementation
|
|
289
|
+
|
|
290
|
+
**Step 2: Build Schema FROM Research**
|
|
291
|
+
- Interface fields emerge from discovered capabilities
|
|
292
|
+
- Every field has a source (docs, SDK types, API response)
|
|
293
|
+
- Don't guess - verify each capability
|
|
294
|
+
- Document where each field came from
|
|
295
|
+
|
|
296
|
+
**Step 3: Verify with Actual Calls**
|
|
297
|
+
- Test capabilities before marking them supported
|
|
298
|
+
- Investigate skipped tests - they're bugs, not features
|
|
299
|
+
- No "should work" - prove it works
|
|
300
|
+
- All tests must pass, not be skipped
|
|
301
|
+
|
|
302
|
+
### Mandatory Checklist Before Answering ANY External API Question
|
|
303
|
+
|
|
304
|
+
Before responding to questions about APIs, SDKs, or external services:
|
|
305
|
+
|
|
306
|
+
```
|
|
307
|
+
[ ] Did I use Context7 to get current documentation?
|
|
308
|
+
[ ] Did I use WebSearch for official docs?
|
|
309
|
+
[ ] Did I verify the information is current (not training data)?
|
|
310
|
+
[ ] Am I stating facts from research, not memory?
|
|
311
|
+
[ ] Have I cited my sources?
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
### Research Query Tracking
|
|
315
|
+
|
|
316
|
+
All research is now tracked in `.claude/api-dev-state.json`:
|
|
317
|
+
|
|
318
|
+
```json
|
|
319
|
+
{
|
|
320
|
+
"research_queries": [
|
|
321
|
+
{
|
|
322
|
+
"timestamp": "2025-12-07T...",
|
|
323
|
+
"tool": "WebSearch",
|
|
324
|
+
"query": "Vercel AI Gateway Groq providers",
|
|
325
|
+
"terms": ["vercel", "gateway", "groq", "providers"]
|
|
326
|
+
},
|
|
327
|
+
{
|
|
328
|
+
"timestamp": "2025-12-07T...",
|
|
329
|
+
"tool": "mcp__context7__get-library-docs",
|
|
330
|
+
"library": "@ai-sdk/gateway",
|
|
331
|
+
"terms": ["@ai-sdk/gateway"]
|
|
332
|
+
}
|
|
333
|
+
]
|
|
334
|
+
}
|
|
335
|
+
```
|
|
336
|
+
|
|
337
|
+
This allows verification that specific topics were actually researched before answering.
|
|
338
|
+
|
|
262
339
|
<claude-commands-template>
|
|
263
340
|
## Research Guidelines
|
|
264
341
|
|
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Hook: UserPromptSubmit
|
|
4
|
+
Purpose: Enforce research before answering external API/SDK questions
|
|
5
|
+
|
|
6
|
+
This hook runs BEFORE Claude processes the user's prompt. It detects
|
|
7
|
+
questions about external APIs, SDKs, or services and injects context
|
|
8
|
+
requiring Claude to research first before answering.
|
|
9
|
+
|
|
10
|
+
Philosophy: "When in doubt, research. Training data is ALWAYS potentially outdated."
|
|
11
|
+
|
|
12
|
+
Returns:
|
|
13
|
+
- Prints context to stdout (injected into conversation)
|
|
14
|
+
- Exit 0 to allow the prompt to proceed
|
|
15
|
+
"""
|
|
16
|
+
import json
|
|
17
|
+
import sys
|
|
18
|
+
import re
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from datetime import datetime
|
|
21
|
+
|
|
22
|
+
# State file is in .claude/ directory (sibling to hooks/)
|
|
23
|
+
STATE_FILE = Path(__file__).parent.parent / "api-dev-state.json"
|
|
24
|
+
|
|
25
|
+
# ============================================================================
|
|
26
|
+
# PATTERN-BASED DETECTION
|
|
27
|
+
# ============================================================================
|
|
28
|
+
|
|
29
|
+
# Patterns that indicate external service/API mentions
|
|
30
|
+
EXTERNAL_SERVICE_PATTERNS = [
|
|
31
|
+
# Package names
|
|
32
|
+
r"@[\w-]+/[\w-]+", # @scope/package
|
|
33
|
+
r"\b[\w-]+-(?:sdk|api|js|ts|py)\b", # something-sdk, something-api, something-js
|
|
34
|
+
|
|
35
|
+
# API/SDK keywords
|
|
36
|
+
r"\b(?:api|sdk|library|package|module|framework)\b",
|
|
37
|
+
|
|
38
|
+
# Technical implementation terms
|
|
39
|
+
r"\b(?:endpoint|route|webhook|oauth|auth|token)\b",
|
|
40
|
+
|
|
41
|
+
# Version references
|
|
42
|
+
r"\bv?\d+\.\d+(?:\.\d+)?\b", # version numbers like v1.2.3, 2.0
|
|
43
|
+
|
|
44
|
+
# Import/require patterns
|
|
45
|
+
r"(?:import|require|from)\s+['\"][\w@/-]+['\"]",
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
# Patterns that indicate asking about features/capabilities
|
|
49
|
+
CAPABILITY_QUESTION_PATTERNS = [
|
|
50
|
+
# "What does X support/have/do"
|
|
51
|
+
r"what\s+(?:does|can|are|is)\s+\w+",
|
|
52
|
+
r"what\s+\w+\s+(?:support|have|provide|offer)",
|
|
53
|
+
|
|
54
|
+
# "Does X support/have"
|
|
55
|
+
r"(?:does|can|will)\s+\w+\s+(?:support|have|handle|do|work)",
|
|
56
|
+
|
|
57
|
+
# "How to/do" questions
|
|
58
|
+
r"how\s+(?:to|do|does|can|should)\s+",
|
|
59
|
+
|
|
60
|
+
# Lists and availability
|
|
61
|
+
r"(?:list|show)\s+(?:of|all|available)",
|
|
62
|
+
r"which\s+\w+\s+(?:are|is)\s+(?:available|supported)",
|
|
63
|
+
r"all\s+(?:available|supported)\s+\w+",
|
|
64
|
+
|
|
65
|
+
# Examples and implementation
|
|
66
|
+
r"example\s+(?:of|for|using|with)",
|
|
67
|
+
r"how\s+to\s+(?:use|implement|integrate|connect|setup|configure)",
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
# Common external service/company names (partial list - patterns catch the rest)
|
|
71
|
+
KNOWN_SERVICES = [
|
|
72
|
+
# AI/ML
|
|
73
|
+
"openai", "anthropic", "google", "gemini", "gpt", "claude", "llama",
|
|
74
|
+
"groq", "perplexity", "mistral", "cohere", "huggingface", "replicate",
|
|
75
|
+
|
|
76
|
+
# Cloud/Infrastructure
|
|
77
|
+
"aws", "azure", "gcp", "vercel", "netlify", "cloudflare", "supabase",
|
|
78
|
+
"firebase", "mongodb", "postgres", "redis", "elasticsearch",
|
|
79
|
+
|
|
80
|
+
# APIs/Services
|
|
81
|
+
"stripe", "twilio", "sendgrid", "mailchimp", "slack", "discord",
|
|
82
|
+
"github", "gitlab", "bitbucket", "jira", "notion", "airtable",
|
|
83
|
+
"shopify", "salesforce", "hubspot", "zendesk",
|
|
84
|
+
|
|
85
|
+
# Data/Analytics
|
|
86
|
+
"segment", "mixpanel", "amplitude", "datadog", "sentry", "grafana",
|
|
87
|
+
|
|
88
|
+
# Media/Content
|
|
89
|
+
"cloudinary", "imgix", "mux", "brandfetch", "unsplash", "pexels",
|
|
90
|
+
|
|
91
|
+
# Auth
|
|
92
|
+
"auth0", "okta", "clerk", "nextauth", "passport",
|
|
93
|
+
|
|
94
|
+
# Payments
|
|
95
|
+
"paypal", "square", "braintree", "adyen",
|
|
96
|
+
]
|
|
97
|
+
|
|
98
|
+
# ============================================================================
|
|
99
|
+
# DETECTION LOGIC
|
|
100
|
+
# ============================================================================
|
|
101
|
+
|
|
102
|
+
def detect_external_api_question(prompt: str) -> dict:
|
|
103
|
+
"""
|
|
104
|
+
Detect if the prompt is asking about external APIs/SDKs.
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
{
|
|
108
|
+
"detected": bool,
|
|
109
|
+
"terms": list of detected terms,
|
|
110
|
+
"patterns_matched": list of pattern types matched,
|
|
111
|
+
"confidence": "high" | "medium" | "low"
|
|
112
|
+
}
|
|
113
|
+
"""
|
|
114
|
+
prompt_lower = prompt.lower()
|
|
115
|
+
detected_terms = []
|
|
116
|
+
patterns_matched = []
|
|
117
|
+
|
|
118
|
+
# Check for known services
|
|
119
|
+
for service in KNOWN_SERVICES:
|
|
120
|
+
if service in prompt_lower:
|
|
121
|
+
detected_terms.append(service)
|
|
122
|
+
patterns_matched.append("known_service")
|
|
123
|
+
|
|
124
|
+
# Check external service patterns
|
|
125
|
+
for pattern in EXTERNAL_SERVICE_PATTERNS:
|
|
126
|
+
matches = re.findall(pattern, prompt_lower, re.IGNORECASE)
|
|
127
|
+
if matches:
|
|
128
|
+
detected_terms.extend(matches)
|
|
129
|
+
patterns_matched.append("external_service_pattern")
|
|
130
|
+
|
|
131
|
+
# Check capability question patterns
|
|
132
|
+
for pattern in CAPABILITY_QUESTION_PATTERNS:
|
|
133
|
+
if re.search(pattern, prompt_lower, re.IGNORECASE):
|
|
134
|
+
patterns_matched.append("capability_question")
|
|
135
|
+
break
|
|
136
|
+
|
|
137
|
+
# Deduplicate
|
|
138
|
+
detected_terms = list(set(detected_terms))
|
|
139
|
+
patterns_matched = list(set(patterns_matched))
|
|
140
|
+
|
|
141
|
+
# Determine confidence
|
|
142
|
+
if "known_service" in patterns_matched and "capability_question" in patterns_matched:
|
|
143
|
+
confidence = "high"
|
|
144
|
+
elif "known_service" in patterns_matched or len(detected_terms) >= 2:
|
|
145
|
+
confidence = "medium"
|
|
146
|
+
elif patterns_matched:
|
|
147
|
+
confidence = "low"
|
|
148
|
+
else:
|
|
149
|
+
confidence = "none"
|
|
150
|
+
|
|
151
|
+
return {
|
|
152
|
+
"detected": confidence in ["high", "medium"],
|
|
153
|
+
"terms": detected_terms[:10], # Limit to 10 terms
|
|
154
|
+
"patterns_matched": patterns_matched,
|
|
155
|
+
"confidence": confidence,
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def check_active_workflow() -> bool:
|
|
160
|
+
"""Check if there's an active API development workflow."""
|
|
161
|
+
if not STATE_FILE.exists():
|
|
162
|
+
return False
|
|
163
|
+
|
|
164
|
+
try:
|
|
165
|
+
state = json.loads(STATE_FILE.read_text())
|
|
166
|
+
phases = state.get("phases", {})
|
|
167
|
+
|
|
168
|
+
# Check if any phase is in progress
|
|
169
|
+
for phase_key, phase_data in phases.items():
|
|
170
|
+
if isinstance(phase_data, dict):
|
|
171
|
+
status = phase_data.get("status", "")
|
|
172
|
+
if status in ["in_progress", "pending"]:
|
|
173
|
+
return True
|
|
174
|
+
|
|
175
|
+
return False
|
|
176
|
+
except (json.JSONDecodeError, Exception):
|
|
177
|
+
return False
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def check_already_researched(terms: list) -> list:
|
|
181
|
+
"""Check which terms have already been researched."""
|
|
182
|
+
if not STATE_FILE.exists():
|
|
183
|
+
return []
|
|
184
|
+
|
|
185
|
+
try:
|
|
186
|
+
state = json.loads(STATE_FILE.read_text())
|
|
187
|
+
research_queries = state.get("research_queries", [])
|
|
188
|
+
|
|
189
|
+
# Also check sources in phases
|
|
190
|
+
phases = state.get("phases", {})
|
|
191
|
+
all_sources = []
|
|
192
|
+
for phase_data in phases.values():
|
|
193
|
+
if isinstance(phase_data, dict):
|
|
194
|
+
sources = phase_data.get("sources", [])
|
|
195
|
+
all_sources.extend(sources)
|
|
196
|
+
|
|
197
|
+
# Combine all research text
|
|
198
|
+
all_research_text = " ".join(str(s) for s in all_sources)
|
|
199
|
+
all_research_text += " ".join(
|
|
200
|
+
str(q.get("query", "")) + " " + str(q.get("term", ""))
|
|
201
|
+
for q in research_queries
|
|
202
|
+
if isinstance(q, dict)
|
|
203
|
+
)
|
|
204
|
+
all_research_text = all_research_text.lower()
|
|
205
|
+
|
|
206
|
+
# Find which terms were already researched
|
|
207
|
+
already_researched = []
|
|
208
|
+
for term in terms:
|
|
209
|
+
if term.lower() in all_research_text:
|
|
210
|
+
already_researched.append(term)
|
|
211
|
+
|
|
212
|
+
return already_researched
|
|
213
|
+
except (json.JSONDecodeError, Exception):
|
|
214
|
+
return []
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def log_detection(prompt: str, detection: dict) -> None:
|
|
218
|
+
"""Log this detection for debugging/auditing."""
|
|
219
|
+
if not STATE_FILE.exists():
|
|
220
|
+
return
|
|
221
|
+
|
|
222
|
+
try:
|
|
223
|
+
state = json.loads(STATE_FILE.read_text())
|
|
224
|
+
|
|
225
|
+
if "prompt_detections" not in state:
|
|
226
|
+
state["prompt_detections"] = []
|
|
227
|
+
|
|
228
|
+
state["prompt_detections"].append({
|
|
229
|
+
"timestamp": datetime.now().isoformat(),
|
|
230
|
+
"prompt_preview": prompt[:100] + "..." if len(prompt) > 100 else prompt,
|
|
231
|
+
"detection": detection,
|
|
232
|
+
})
|
|
233
|
+
|
|
234
|
+
# Keep only last 20 detections
|
|
235
|
+
state["prompt_detections"] = state["prompt_detections"][-20:]
|
|
236
|
+
|
|
237
|
+
STATE_FILE.write_text(json.dumps(state, indent=2))
|
|
238
|
+
except Exception:
|
|
239
|
+
pass # Don't fail the hook on logging errors
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
# ============================================================================
|
|
243
|
+
# MAIN
|
|
244
|
+
# ============================================================================
|
|
245
|
+
|
|
246
|
+
def main():
|
|
247
|
+
# Read hook input from stdin
|
|
248
|
+
try:
|
|
249
|
+
input_data = json.load(sys.stdin)
|
|
250
|
+
except json.JSONDecodeError:
|
|
251
|
+
# If we can't parse input, allow without injection
|
|
252
|
+
sys.exit(0)
|
|
253
|
+
|
|
254
|
+
prompt = input_data.get("prompt", "")
|
|
255
|
+
|
|
256
|
+
if not prompt:
|
|
257
|
+
sys.exit(0)
|
|
258
|
+
|
|
259
|
+
# Check if in active workflow mode (stricter enforcement)
|
|
260
|
+
active_workflow = check_active_workflow()
|
|
261
|
+
|
|
262
|
+
# Detect external API questions
|
|
263
|
+
detection = detect_external_api_question(prompt)
|
|
264
|
+
|
|
265
|
+
# Log for debugging
|
|
266
|
+
if detection["detected"] or active_workflow:
|
|
267
|
+
log_detection(prompt, detection)
|
|
268
|
+
|
|
269
|
+
# Determine if we should inject research requirement
|
|
270
|
+
should_inject = False
|
|
271
|
+
inject_reason = ""
|
|
272
|
+
|
|
273
|
+
if active_workflow:
|
|
274
|
+
# In active workflow, ALWAYS inject for technical questions
|
|
275
|
+
if detection["confidence"] in ["high", "medium", "low"]:
|
|
276
|
+
should_inject = True
|
|
277
|
+
inject_reason = "active_workflow"
|
|
278
|
+
elif detection["detected"]:
|
|
279
|
+
# Check if already researched
|
|
280
|
+
already_researched = check_already_researched(detection["terms"])
|
|
281
|
+
unresearched_terms = [t for t in detection["terms"] if t not in already_researched]
|
|
282
|
+
|
|
283
|
+
if unresearched_terms:
|
|
284
|
+
should_inject = True
|
|
285
|
+
inject_reason = "unresearched_terms"
|
|
286
|
+
detection["unresearched"] = unresearched_terms
|
|
287
|
+
|
|
288
|
+
# Inject context if needed
|
|
289
|
+
if should_inject:
|
|
290
|
+
terms_str = ", ".join(detection.get("unresearched", detection["terms"])[:5])
|
|
291
|
+
|
|
292
|
+
injection = f"""
|
|
293
|
+
<user-prompt-submit-hook>
|
|
294
|
+
EXTERNAL API/SDK DETECTED: {terms_str}
|
|
295
|
+
Confidence: {detection["confidence"]}
|
|
296
|
+
{"Mode: Active API Development Workflow" if active_workflow else ""}
|
|
297
|
+
|
|
298
|
+
MANDATORY RESEARCH REQUIREMENT:
|
|
299
|
+
Before answering this question, you MUST:
|
|
300
|
+
|
|
301
|
+
1. Use Context7 (mcp__context7__resolve-library-id + get-library-docs) to look up current documentation
|
|
302
|
+
2. Use WebSearch to find official documentation and recent updates
|
|
303
|
+
3. NEVER answer from training data alone - it may be outdated
|
|
304
|
+
|
|
305
|
+
Training data can be months or years old. APIs change constantly.
|
|
306
|
+
Research first. Then answer with verified, current information.
|
|
307
|
+
|
|
308
|
+
After researching, cite your sources in your response.
|
|
309
|
+
</user-prompt-submit-hook>
|
|
310
|
+
"""
|
|
311
|
+
print(injection)
|
|
312
|
+
|
|
313
|
+
# Always allow the prompt to proceed
|
|
314
|
+
sys.exit(0)
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
if __name__ == "__main__":
|
|
318
|
+
main()
|
package/hooks/track-tool-use.py
CHANGED
|
@@ -147,6 +147,29 @@ def main():
|
|
|
147
147
|
# Add to sources list
|
|
148
148
|
sources.append(source_entry)
|
|
149
149
|
|
|
150
|
+
# Also add to research_queries for prompt verification
|
|
151
|
+
research_queries = state.setdefault("research_queries", [])
|
|
152
|
+
query_entry = {
|
|
153
|
+
"timestamp": timestamp,
|
|
154
|
+
"tool": tool_name,
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
# Extract query/term based on tool type
|
|
158
|
+
if tool_name == "WebSearch":
|
|
159
|
+
query_entry["query"] = tool_input.get("query", "")
|
|
160
|
+
query_entry["terms"] = extract_terms(tool_input.get("query", ""))
|
|
161
|
+
elif tool_name == "WebFetch":
|
|
162
|
+
query_entry["url"] = tool_input.get("url", "")
|
|
163
|
+
query_entry["terms"] = extract_terms_from_url(tool_input.get("url", ""))
|
|
164
|
+
elif "context7" in tool_name.lower():
|
|
165
|
+
query_entry["library"] = tool_input.get("libraryName", tool_input.get("libraryId", ""))
|
|
166
|
+
query_entry["terms"] = [tool_input.get("libraryName", "").lower()]
|
|
167
|
+
|
|
168
|
+
research_queries.append(query_entry)
|
|
169
|
+
|
|
170
|
+
# Keep only last 50 queries
|
|
171
|
+
state["research_queries"] = research_queries[-50:]
|
|
172
|
+
|
|
150
173
|
# Update last activity timestamp
|
|
151
174
|
research["last_activity"] = timestamp
|
|
152
175
|
research["source_count"] = len(sources)
|
|
@@ -190,7 +213,7 @@ def main():
|
|
|
190
213
|
def create_initial_state():
|
|
191
214
|
"""Create initial state structure"""
|
|
192
215
|
return {
|
|
193
|
-
"version": "1.
|
|
216
|
+
"version": "1.1.0",
|
|
194
217
|
"created_at": datetime.now().isoformat(),
|
|
195
218
|
"phases": {
|
|
196
219
|
"scope": {"status": "not_started"},
|
|
@@ -209,7 +232,9 @@ def create_initial_state():
|
|
|
209
232
|
"schema_matches_docs": False,
|
|
210
233
|
"tests_cover_params": False,
|
|
211
234
|
"all_tests_passing": False
|
|
212
|
-
}
|
|
235
|
+
},
|
|
236
|
+
"research_queries": [],
|
|
237
|
+
"prompt_detections": []
|
|
213
238
|
}
|
|
214
239
|
|
|
215
240
|
|
|
@@ -225,5 +250,48 @@ def sanitize_input(tool_input):
|
|
|
225
250
|
return sanitized
|
|
226
251
|
|
|
227
252
|
|
|
253
|
+
def extract_terms(query: str) -> list:
|
|
254
|
+
"""Extract searchable terms from a query string."""
|
|
255
|
+
import re
|
|
256
|
+
# Remove common words and extract meaningful terms
|
|
257
|
+
stop_words = {"the", "a", "an", "is", "are", "was", "were", "be", "been",
|
|
258
|
+
"how", "to", "do", "does", "what", "which", "for", "and", "or",
|
|
259
|
+
"in", "on", "at", "with", "from", "this", "that", "it"}
|
|
260
|
+
|
|
261
|
+
# Extract words
|
|
262
|
+
words = re.findall(r'\b[\w@/-]+\b', query.lower())
|
|
263
|
+
|
|
264
|
+
# Filter and return
|
|
265
|
+
terms = [w for w in words if w not in stop_words and len(w) > 2]
|
|
266
|
+
return terms[:10] # Limit to 10 terms
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def extract_terms_from_url(url: str) -> list:
|
|
270
|
+
"""Extract meaningful terms from a URL."""
|
|
271
|
+
import re
|
|
272
|
+
from urllib.parse import urlparse
|
|
273
|
+
|
|
274
|
+
try:
|
|
275
|
+
parsed = urlparse(url)
|
|
276
|
+
# Get domain parts and path parts
|
|
277
|
+
domain_parts = parsed.netloc.replace("www.", "").split(".")
|
|
278
|
+
path_parts = [p for p in parsed.path.split("/") if p]
|
|
279
|
+
|
|
280
|
+
# Combine and filter
|
|
281
|
+
all_parts = domain_parts + path_parts
|
|
282
|
+
terms = []
|
|
283
|
+
for part in all_parts:
|
|
284
|
+
# Split by common separators
|
|
285
|
+
sub_parts = re.split(r'[-_.]', part.lower())
|
|
286
|
+
terms.extend(sub_parts)
|
|
287
|
+
|
|
288
|
+
# Filter short/common terms
|
|
289
|
+
stop_terms = {"com", "org", "io", "dev", "api", "docs", "www", "http", "https"}
|
|
290
|
+
terms = [t for t in terms if t not in stop_terms and len(t) > 2]
|
|
291
|
+
return terms[:10]
|
|
292
|
+
except Exception:
|
|
293
|
+
return []
|
|
294
|
+
|
|
295
|
+
|
|
228
296
|
if __name__ == "__main__":
|
|
229
297
|
main()
|
package/package.json
CHANGED
package/templates/settings.json
CHANGED
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
"WebSearch",
|
|
5
5
|
"WebFetch",
|
|
6
6
|
"mcp__context7",
|
|
7
|
+
"mcp__context7__resolve-library-id",
|
|
8
|
+
"mcp__context7__get-library-docs",
|
|
7
9
|
"mcp__github",
|
|
8
10
|
"Bash(claude mcp:*)",
|
|
9
11
|
"Bash(pnpm test:*)",
|
|
@@ -14,6 +16,16 @@
|
|
|
14
16
|
]
|
|
15
17
|
},
|
|
16
18
|
"hooks": {
|
|
19
|
+
"UserPromptSubmit": [
|
|
20
|
+
{
|
|
21
|
+
"hooks": [
|
|
22
|
+
{
|
|
23
|
+
"type": "command",
|
|
24
|
+
"command": "$CLAUDE_PROJECT_DIR/.claude/hooks/enforce-external-research.py"
|
|
25
|
+
}
|
|
26
|
+
]
|
|
27
|
+
}
|
|
28
|
+
],
|
|
17
29
|
"PreToolUse": [
|
|
18
30
|
{
|
|
19
31
|
"matcher": "Write|Edit",
|