contexer 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- contexer/__init__.py +0 -0
- contexer/__main__.py +3 -0
- contexer/server.py +78 -0
- contexer/store.py +739 -0
- contexer-0.1.0.dist-info/METADATA +207 -0
- contexer-0.1.0.dist-info/RECORD +9 -0
- contexer-0.1.0.dist-info/WHEEL +4 -0
- contexer-0.1.0.dist-info/entry_points.txt +2 -0
- contexer-0.1.0.dist-info/licenses/LICENSE +21 -0
contexer/__init__.py
ADDED
|
File without changes
|
contexer/__main__.py
ADDED
contexer/server.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import uuid
|
|
3
|
+
from mcp.server.fastmcp import FastMCP
|
|
4
|
+
from contexer import store
|
|
5
|
+
|
|
6
|
+
SESSION_ID = str(uuid.uuid4())
|
|
7
|
+
mcp = FastMCP("contexer")
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@mcp.tool()
|
|
11
|
+
def capture_context(description: str, repo_path: str = "") -> str:
|
|
12
|
+
"""Called at the start of every task. Captures the developer's task description for the given repo."""
|
|
13
|
+
resolved = store._resolve_repo(repo_path)
|
|
14
|
+
if not resolved:
|
|
15
|
+
return "Skipped — repo path not detected."
|
|
16
|
+
entry_id = store.capture_task(resolved, description, SESSION_ID)
|
|
17
|
+
if entry_id is None:
|
|
18
|
+
return "Skipped — does not look like a task description."
|
|
19
|
+
return f"Captured. id={entry_id}"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@mcp.tool()
|
|
23
|
+
def update_context(content: str, repo_path: str = "", subtype: str = "") -> str:
|
|
24
|
+
"""Called when Claude Code makes a significant decision mid-task. The server filters before storing.
|
|
25
|
+
|
|
26
|
+
subtype: optional classification for filtered retrieval — architecture | constraint | pattern | convention
|
|
27
|
+
"""
|
|
28
|
+
resolved = store._resolve_repo(repo_path)
|
|
29
|
+
if not resolved:
|
|
30
|
+
return "Skipped — repo path not detected."
|
|
31
|
+
stored, entry_id = store.update_decision(resolved, content, SESSION_ID, subtype)
|
|
32
|
+
if stored:
|
|
33
|
+
return f"Stored. id={entry_id}"
|
|
34
|
+
return "Filtered — did not meet storage criteria."
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@mcp.tool()
|
|
38
|
+
def get_context(repo_path: str = "", query: str = "", entry_type: str = "", limit: int = 0) -> str:
|
|
39
|
+
"""Returns stored context for the current repository. Call this when the task requires project context.
|
|
40
|
+
|
|
41
|
+
query: optional keyword filter (case-insensitive substring match against decision content).
|
|
42
|
+
entry_type: optional subtype filter — architecture | constraint | pattern | convention
|
|
43
|
+
limit: max decisions to return (0 = auto: 25 for filtered queries, 10 for unfiltered overview).
|
|
44
|
+
"""
|
|
45
|
+
resolved = store._resolve_repo(repo_path)
|
|
46
|
+
if not resolved:
|
|
47
|
+
return "No repo path detected."
|
|
48
|
+
return store.get_context(resolved, query, entry_type, limit)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@mcp.tool()
|
|
52
|
+
def bootstrap_context(repo_path: str = "") -> str:
|
|
53
|
+
"""Scans a repo for inferable decisions and gap questions. Present inferred
|
|
54
|
+
items to the user for confirmation, store confirmed ones via update_context,
|
|
55
|
+
then ask the gap questions and store each answer."""
|
|
56
|
+
resolved = store._resolve_repo(repo_path)
|
|
57
|
+
if not resolved:
|
|
58
|
+
return json.dumps({"error": "repo path not detected"})
|
|
59
|
+
return json.dumps(store.bootstrap_scan(resolved), indent=2)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@mcp.tool()
|
|
63
|
+
def get_context_for_prompt(repo_path: str = "", prompt: str = "") -> str:
|
|
64
|
+
"""Auto-called by UserPromptSubmit hook on every prompt. Detects rationale/decision
|
|
65
|
+
questions (why, reason, rationale, decided...) and injects matching stored decisions
|
|
66
|
+
as additionalContext. Returns empty string for non-rationale prompts — silent no-op."""
|
|
67
|
+
resolved = store._resolve_repo(repo_path)
|
|
68
|
+
if not resolved:
|
|
69
|
+
return ""
|
|
70
|
+
return store.get_context_for_prompt(resolved, prompt)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def main():
|
|
74
|
+
mcp.run()
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
if __name__ == "__main__":
|
|
78
|
+
main()
|
contexer/store.py
ADDED
|
@@ -0,0 +1,739 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import re
|
|
3
|
+
import tomllib
|
|
4
|
+
import uuid
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
STORE_DIR = Path.home() / ".contexer"
|
|
9
|
+
MAX_ENTRIES = 500
|
|
10
|
+
_UNFILTERED_DISPLAY = 10 # entries shown when no query/type filter applied
|
|
11
|
+
_FILTERED_DISPLAY = 25 # entries shown when a filter is active (caller asked for something specific)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _current_repo_path() -> str:
|
|
15
|
+
path = STORE_DIR / ".current_repo"
|
|
16
|
+
if path.exists():
|
|
17
|
+
return path.read_text().strip()
|
|
18
|
+
return ""
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _resolve_repo(repo_path: str) -> str:
|
|
22
|
+
if repo_path:
|
|
23
|
+
return repo_path
|
|
24
|
+
return _current_repo_path()
|
|
25
|
+
|
|
26
|
+
def _slug(repo_path: str) -> str:
|
|
27
|
+
return re.sub(r"[^a-zA-Z0-9_-]", "_", repo_path.strip("/"))
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _store_path(repo_path: str) -> Path:
|
|
31
|
+
STORE_DIR.mkdir(exist_ok=True)
|
|
32
|
+
return STORE_DIR / f"{_slug(repo_path)}.json"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _load(repo_path: str) -> dict:
|
|
36
|
+
path = _store_path(repo_path)
|
|
37
|
+
if path.exists():
|
|
38
|
+
return json.loads(path.read_text())
|
|
39
|
+
return {"repo_path": repo_path, "entries": []}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _save(repo_path: str, data: dict) -> None:
|
|
43
|
+
_store_path(repo_path).write_text(json.dumps(data, indent=2))
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _is_novel(content: str, existing: list) -> bool:
|
|
47
|
+
if not existing:
|
|
48
|
+
return True
|
|
49
|
+
tokens = set(content.lower().split())
|
|
50
|
+
if not tokens:
|
|
51
|
+
return False
|
|
52
|
+
for entry in existing:
|
|
53
|
+
other = set(entry.get("content", "").lower().split())
|
|
54
|
+
if not other:
|
|
55
|
+
continue
|
|
56
|
+
overlap = len(tokens & other) / max(len(tokens), len(other))
|
|
57
|
+
if overlap > 0.7:
|
|
58
|
+
return False
|
|
59
|
+
return True
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _passes_filter(content: str, existing: list) -> bool:
|
|
63
|
+
# Novelty is a prerequisite veto — duplicates are rejected regardless of signal keywords.
|
|
64
|
+
# Novel content always passes: update_context is only called for significant decisions.
|
|
65
|
+
decisions_only = [e for e in existing if e["type"] == "decision"]
|
|
66
|
+
return _is_novel(content, decisions_only)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
_QUESTION_STARTS = {
|
|
70
|
+
"what", "how", "why", "when", "where", "who", "which",
|
|
71
|
+
"is", "are", "can", "does", "do", "will", "would", "could", "should",
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
def _is_task(content: str) -> bool:
|
|
75
|
+
stripped = content.strip()
|
|
76
|
+
words = stripped.lower().split()
|
|
77
|
+
if len(words) < 5:
|
|
78
|
+
return False
|
|
79
|
+
if stripped.endswith("?") and len(words) < 20:
|
|
80
|
+
return False
|
|
81
|
+
if words[0] in _QUESTION_STARTS and len(words) < 12:
|
|
82
|
+
return False
|
|
83
|
+
return True
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def capture_task(repo_path: str, description: str, session_id: str) -> str | None:
|
|
87
|
+
if not _is_task(description):
|
|
88
|
+
return None
|
|
89
|
+
data = _load(repo_path)
|
|
90
|
+
# keep only decisions — one task slot is enough for "last task" context
|
|
91
|
+
data["entries"] = [e for e in data["entries"] if e["type"] != "task"]
|
|
92
|
+
entry = {
|
|
93
|
+
"id": str(uuid.uuid4()),
|
|
94
|
+
"type": "task",
|
|
95
|
+
"content": description,
|
|
96
|
+
"session_id": session_id,
|
|
97
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
98
|
+
}
|
|
99
|
+
data["entries"].append(entry)
|
|
100
|
+
data["entries"] = data["entries"][-MAX_ENTRIES:]
|
|
101
|
+
_save(repo_path, data)
|
|
102
|
+
return entry["id"]
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def update_decision(repo_path: str, content: str, session_id: str, subtype: str = "") -> tuple[bool, str | None]:
|
|
106
|
+
data = _load(repo_path)
|
|
107
|
+
if not _passes_filter(content, data["entries"]):
|
|
108
|
+
return False, None
|
|
109
|
+
entry = {
|
|
110
|
+
"id": str(uuid.uuid4()),
|
|
111
|
+
"type": "decision",
|
|
112
|
+
"subtype": subtype,
|
|
113
|
+
"content": content,
|
|
114
|
+
"session_id": session_id,
|
|
115
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
116
|
+
}
|
|
117
|
+
data["entries"].append(entry)
|
|
118
|
+
data["entries"] = data["entries"][-MAX_ENTRIES:]
|
|
119
|
+
_save(repo_path, data)
|
|
120
|
+
return True, entry["id"]
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _build_bootstrap_context(repo_path: str) -> list[str]:
|
|
124
|
+
return [
|
|
125
|
+
"STOP — do not answer the user's request yet.",
|
|
126
|
+
f"Repo: {repo_path}. No stored context — complete a quick bootstrap first.",
|
|
127
|
+
"Do this now, before anything else:",
|
|
128
|
+
" 1. Call the bootstrap_context MCP tool with repo_path='' to get items to confirm.",
|
|
129
|
+
" 2. Take the first item. State it to the user, ask 'Correct? yes / no / [correction]'. Wait for reply.",
|
|
130
|
+
" 3. Call update_context to store the confirmed fact. Then move to item 2.",
|
|
131
|
+
" 4. Repeat until all items are done. Then — and only then — address the user's original request.",
|
|
132
|
+
"Start by calling bootstrap_context now.",
|
|
133
|
+
]
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def get_session_start_context(repo_path: str) -> dict:
|
|
137
|
+
data = _load(repo_path)
|
|
138
|
+
decisions = [e for e in data.get("entries", []) if e["type"] == "decision"]
|
|
139
|
+
if not decisions:
|
|
140
|
+
lines = _build_bootstrap_context(repo_path)
|
|
141
|
+
return {
|
|
142
|
+
"systemMessage": "Contexer: no context stored — bootstrapping now",
|
|
143
|
+
"hookSpecificOutput": {
|
|
144
|
+
"hookEventName": "SessionStart",
|
|
145
|
+
"additionalContext": "\n".join(lines),
|
|
146
|
+
},
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
count = len(decisions)
|
|
150
|
+
|
|
151
|
+
# Always embed conventions + constraints — these are always-apply rules Claude must
|
|
152
|
+
# follow from the first task, not discover JIT. Architecture/patterns stay deferred.
|
|
153
|
+
pre_loaded = [d for d in decisions if d.get("subtype") in ("convention", "constraint")]
|
|
154
|
+
deferred_count = count - len(pre_loaded)
|
|
155
|
+
|
|
156
|
+
# systemMessage: full detail for Claude — rules content + JIT retrieval instruction
|
|
157
|
+
sys_parts = []
|
|
158
|
+
if pre_loaded:
|
|
159
|
+
sys_parts.append("## Project rules — apply to ALL tasks in this repo:")
|
|
160
|
+
for d in pre_loaded:
|
|
161
|
+
sys_parts.append(f"- [{d.get('subtype', '')}] {d['content']}")
|
|
162
|
+
if deferred_count > 0:
|
|
163
|
+
sys_parts.append(
|
|
164
|
+
f"{deferred_count} decision(s) stored (architecture/patterns). "
|
|
165
|
+
"Call get_context BEFORE reading files for any question about architecture, "
|
|
166
|
+
"design decisions, rationale, or patterns."
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
# additionalContext: single clean status line shown to the user at session start
|
|
170
|
+
def _pl(n: int, word: str) -> str:
|
|
171
|
+
return f"{n} {word}" if n == 1 else f"{n} {word}s"
|
|
172
|
+
|
|
173
|
+
if pre_loaded and deferred_count > 0:
|
|
174
|
+
user_line = f"Contexer: loaded {_pl(len(pre_loaded), 'decision')} related to constraint and convention. Remaining {_pl(deferred_count, 'decision')} will be loaded as needed."
|
|
175
|
+
elif pre_loaded:
|
|
176
|
+
user_line = f"Contexer: loaded {_pl(len(pre_loaded), 'decision')} related to constraint and convention."
|
|
177
|
+
else:
|
|
178
|
+
user_line = f"Contexer: {_pl(count, 'decision')} will be loaded as needed."
|
|
179
|
+
|
|
180
|
+
return {
|
|
181
|
+
"systemMessage": user_line,
|
|
182
|
+
"hookSpecificOutput": {
|
|
183
|
+
"hookEventName": "SessionStart",
|
|
184
|
+
"additionalContext": "\n".join(sys_parts),
|
|
185
|
+
},
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def get_bootstrap_context_prompt(repo_path: str) -> dict:
|
|
190
|
+
"""Fallback for UserPromptSubmit: catches the case where SessionStart bootstrap
|
|
191
|
+
was skipped (e.g. non-interactive session). Returns empty dict when context exists."""
|
|
192
|
+
data = _load(repo_path)
|
|
193
|
+
decisions = [e for e in data.get("entries", []) if e["type"] == "decision"]
|
|
194
|
+
if decisions:
|
|
195
|
+
return {}
|
|
196
|
+
lines = _build_bootstrap_context(repo_path)
|
|
197
|
+
return {
|
|
198
|
+
"hookSpecificOutput": {
|
|
199
|
+
"hookEventName": "UserPromptSubmit",
|
|
200
|
+
"additionalContext": "\n".join(lines),
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
_RATIONALE_WORDS = frozenset({
|
|
206
|
+
"why", "reason", "rationale", "decision", "decided", "chose", "choice",
|
|
207
|
+
"motivation", "intent", "reasoning", "background", "justif",
|
|
208
|
+
})
|
|
209
|
+
|
|
210
|
+
_QUERY_STOP_WORDS = frozenset({
|
|
211
|
+
"why", "was", "the", "did", "we", "for", "what", "how", "is", "are",
|
|
212
|
+
"can", "does", "this", "that", "it", "to", "of", "in", "a", "an",
|
|
213
|
+
"and", "or", "but", "not", "with", "at", "by", "from", "reason",
|
|
214
|
+
"rationale", "decision", "decided", "chose", "choice", "about", "have",
|
|
215
|
+
"has", "been", "would", "could", "should", "will", "tell", "explain",
|
|
216
|
+
"know", "me", "you", "do", "our", "my", "your", "them", "they",
|
|
217
|
+
"implement", "implemented", "implementation", "use", "using", "used",
|
|
218
|
+
"build", "built", "create", "created", "add", "added", "make", "made",
|
|
219
|
+
"just", "here", "there", "when", "then", "than", "also", "get",
|
|
220
|
+
"into", "which", "who", "where", "what", "that", "its", "been",
|
|
221
|
+
})
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def get_context_for_prompt(repo_path: str, prompt: str) -> str:
|
|
225
|
+
"""Auto-injected by UserPromptSubmit hook. Returns relevant stored decisions when
|
|
226
|
+
the prompt is a rationale/decision question. Silent no-op for all other prompts."""
|
|
227
|
+
data = _load(repo_path)
|
|
228
|
+
if not data.get("entries"):
|
|
229
|
+
return ""
|
|
230
|
+
|
|
231
|
+
words_raw = [w.strip("?,./!;:\"'()[]") for w in prompt.lower().split()]
|
|
232
|
+
word_set = set(words_raw)
|
|
233
|
+
|
|
234
|
+
if not (word_set & _RATIONALE_WORDS):
|
|
235
|
+
return ""
|
|
236
|
+
|
|
237
|
+
# Extract content keywords: alpha-only, length > 3, not stop words
|
|
238
|
+
keywords = [
|
|
239
|
+
w for w in words_raw
|
|
240
|
+
if len(w) > 3 and w not in _QUERY_STOP_WORDS and w.isalpha()
|
|
241
|
+
]
|
|
242
|
+
|
|
243
|
+
# Try keywords longest-first (most specific match wins)
|
|
244
|
+
for kw in sorted(set(keywords), key=len, reverse=True)[:3]:
|
|
245
|
+
result = get_context(repo_path, query=kw)
|
|
246
|
+
if "No matching decisions" not in result and "No context stored" not in result:
|
|
247
|
+
return f"[Contexer: auto-fetched for this question]\n{result}"
|
|
248
|
+
|
|
249
|
+
return ""
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def get_context(repo_path: str, query: str = "", entry_type: str = "", limit: int = 0) -> str:
|
|
253
|
+
data = _load(repo_path)
|
|
254
|
+
entries = data.get("entries", [])
|
|
255
|
+
if not entries:
|
|
256
|
+
return "No context stored for this repository."
|
|
257
|
+
|
|
258
|
+
lines = [f"# Context for {repo_path}\n"]
|
|
259
|
+
|
|
260
|
+
if not entry_type:
|
|
261
|
+
tasks = [e for e in entries if e["type"] == "task"]
|
|
262
|
+
if tasks:
|
|
263
|
+
last = tasks[-1]
|
|
264
|
+
lines.append(f"## Last task ({last['timestamp'][:10]})")
|
|
265
|
+
lines.append(last["content"])
|
|
266
|
+
lines.append("")
|
|
267
|
+
|
|
268
|
+
decisions = [e for e in entries if e["type"] == "decision"]
|
|
269
|
+
|
|
270
|
+
is_filtered = bool(query or entry_type)
|
|
271
|
+
if entry_type:
|
|
272
|
+
decisions = [d for d in decisions if d.get("subtype", "") == entry_type]
|
|
273
|
+
|
|
274
|
+
if query:
|
|
275
|
+
q_lower = query.lower()
|
|
276
|
+
decisions = [d for d in decisions if q_lower in d.get("content", "").lower()]
|
|
277
|
+
|
|
278
|
+
display_limit = limit if limit > 0 else (_FILTERED_DISPLAY if is_filtered else _UNFILTERED_DISPLAY)
|
|
279
|
+
|
|
280
|
+
if decisions:
|
|
281
|
+
filter_note = ""
|
|
282
|
+
if is_filtered:
|
|
283
|
+
parts = []
|
|
284
|
+
if query:
|
|
285
|
+
parts.append(f"query='{query}'")
|
|
286
|
+
if entry_type:
|
|
287
|
+
parts.append(f"type='{entry_type}'")
|
|
288
|
+
filter_note = f" (filtered: {', '.join(parts)})"
|
|
289
|
+
total = len(decisions)
|
|
290
|
+
shown = decisions[-display_limit:]
|
|
291
|
+
if total > display_limit:
|
|
292
|
+
filter_note += f" — showing {len(shown)} of {total}"
|
|
293
|
+
lines.append(f"## Decisions and context{filter_note}")
|
|
294
|
+
for d in shown:
|
|
295
|
+
subtype_tag = f" [{d['subtype']}]" if d.get("subtype") else ""
|
|
296
|
+
lines.append(f"- [{d['timestamp'][:10]}]{subtype_tag} {d['content']}")
|
|
297
|
+
lines.append("")
|
|
298
|
+
elif is_filtered:
|
|
299
|
+
parts = []
|
|
300
|
+
if query:
|
|
301
|
+
parts.append(f"query='{query}'")
|
|
302
|
+
if entry_type:
|
|
303
|
+
parts.append(f"type='{entry_type}'")
|
|
304
|
+
lines.append(f"No matching decisions found ({', '.join(parts)}).")
|
|
305
|
+
|
|
306
|
+
return "\n".join(lines)
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def _infer_purpose(name: str, readme_summary: str) -> str:
|
|
310
|
+
"""Derive a concrete purpose assumption from project name and README first line."""
|
|
311
|
+
if readme_summary:
|
|
312
|
+
return readme_summary
|
|
313
|
+
if not name:
|
|
314
|
+
return "Purpose not yet documented"
|
|
315
|
+
n = name.lower()
|
|
316
|
+
if any(w in n for w in ["api", "server", "service", "backend"]):
|
|
317
|
+
return f"Backend API or service (\"{name}\")"
|
|
318
|
+
if any(w in n for w in ["cli", "tool", "cmd"]):
|
|
319
|
+
return f"CLI tool (\"{name}\")"
|
|
320
|
+
if any(w in n for w in ["bot", "agent"]):
|
|
321
|
+
return f"Bot or agent (\"{name}\")"
|
|
322
|
+
if any(w in n for w in ["worker", "job", "queue", "task"]):
|
|
323
|
+
return f"Background worker or job processor (\"{name}\")"
|
|
324
|
+
if any(w in n for w in ["web", "app", "ui", "front", "dashboard"]):
|
|
325
|
+
return f"Web app or frontend (\"{name}\")"
|
|
326
|
+
if any(w in n for w in ["lib", "sdk", "package", "plugin"]):
|
|
327
|
+
return f"Library or SDK (\"{name}\")"
|
|
328
|
+
return f"\"{name}\" — type not obvious from name alone"
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def bootstrap_scan(repo_path: str) -> dict:
|
|
332
|
+
root = Path(repo_path)
|
|
333
|
+
data = _load(repo_path)
|
|
334
|
+
existing = [e for e in data.get("entries", []) if e["type"] == "decision"]
|
|
335
|
+
inferred: list[str] = []
|
|
336
|
+
found_files: list[str] = []
|
|
337
|
+
all_deps: set[str] = set()
|
|
338
|
+
|
|
339
|
+
# signals used only for question generation — not stored as inferred facts
|
|
340
|
+
sig: dict = {
|
|
341
|
+
"project_name": "",
|
|
342
|
+
"readme_summary": "",
|
|
343
|
+
"has_tests": False,
|
|
344
|
+
"has_ci": False,
|
|
345
|
+
"has_container": False,
|
|
346
|
+
"has_infra": False,
|
|
347
|
+
"has_security_sensitive": False, # auth or payment deps detected
|
|
348
|
+
"cloud_detected": "", # "AWS" | "GCP" | "Azure" | ""
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
def _add(fact: str) -> None:
|
|
352
|
+
proxy = [{"content": f} for f in inferred]
|
|
353
|
+
if _is_novel(fact, existing + proxy):
|
|
354
|
+
inferred.append(fact)
|
|
355
|
+
|
|
356
|
+
def _gap(assumption: str, question: str, hint: str) -> dict:
|
|
357
|
+
return {"assumption": assumption, "question": question, "hint": hint}
|
|
358
|
+
|
|
359
|
+
def _has_dep(*names: str) -> bool:
|
|
360
|
+
return any(n in dep for n in names for dep in all_deps)
|
|
361
|
+
|
|
362
|
+
# --- Python ---
|
|
363
|
+
pyproject_path = root / "pyproject.toml"
|
|
364
|
+
if pyproject_path.exists():
|
|
365
|
+
found_files.append("pyproject.toml")
|
|
366
|
+
try:
|
|
367
|
+
with open(pyproject_path, "rb") as f:
|
|
368
|
+
pyp = tomllib.load(f)
|
|
369
|
+
proj = pyp.get("project", {})
|
|
370
|
+
name, py_req = proj.get("name", ""), proj.get("requires-python", "")
|
|
371
|
+
if name:
|
|
372
|
+
sig["project_name"] = name
|
|
373
|
+
_add(f"Python project{f' \"{name}\"' if name else ''}{f', requires-python {py_req}' if py_req else ''}")
|
|
374
|
+
tool = pyp.get("tool", {})
|
|
375
|
+
if "pytest" in tool:
|
|
376
|
+
_add("Test framework: pytest")
|
|
377
|
+
sig["has_tests"] = True
|
|
378
|
+
if "ruff" in tool:
|
|
379
|
+
_add("Linting/formatting: ruff")
|
|
380
|
+
if "mypy" in tool:
|
|
381
|
+
_add("Type checking: mypy")
|
|
382
|
+
raw: list[str] = list(proj.get("dependencies", []))
|
|
383
|
+
for group in pyp.get("dependency-groups", {}).values():
|
|
384
|
+
raw.extend(d for d in group if isinstance(d, str))
|
|
385
|
+
for extra in proj.get("optional-dependencies", {}).values():
|
|
386
|
+
raw.extend(extra)
|
|
387
|
+
for dep in raw:
|
|
388
|
+
normalized = re.split(r"[>=<!~\[\s;]", dep.strip())[0].lower().replace("_", "-")
|
|
389
|
+
all_deps.add(normalized)
|
|
390
|
+
except Exception:
|
|
391
|
+
pass
|
|
392
|
+
|
|
393
|
+
if (root / "uv.lock").exists():
|
|
394
|
+
found_files.append("uv.lock")
|
|
395
|
+
_add("Package manager: uv")
|
|
396
|
+
|
|
397
|
+
# --- Node / JS ---
|
|
398
|
+
pkg_json_path = root / "package.json"
|
|
399
|
+
if pkg_json_path.exists():
|
|
400
|
+
found_files.append("package.json")
|
|
401
|
+
try:
|
|
402
|
+
pkg = json.loads(pkg_json_path.read_text())
|
|
403
|
+
name = pkg.get("name", "")
|
|
404
|
+
if name and not sig["project_name"]:
|
|
405
|
+
sig["project_name"] = name
|
|
406
|
+
node_ver = pkg.get("engines", {}).get("node", "")
|
|
407
|
+
parts = [f"Node.js project \"{name}\"" if name else "Node.js project"]
|
|
408
|
+
if node_ver:
|
|
409
|
+
parts.append(f"requires Node {node_ver}")
|
|
410
|
+
_add(", ".join(parts))
|
|
411
|
+
mgr = pkg.get("packageManager", "")
|
|
412
|
+
if mgr:
|
|
413
|
+
_add(f"Package manager: {mgr.split('@')[0]}")
|
|
414
|
+
node_deps = {**pkg.get("dependencies", {}), **pkg.get("devDependencies", {})}
|
|
415
|
+
all_deps.update(k.lower() for k in node_deps)
|
|
416
|
+
if pkg.get("workspaces"):
|
|
417
|
+
_add("Monorepo: npm/yarn workspaces")
|
|
418
|
+
if "typescript" in node_deps:
|
|
419
|
+
_add("Language: TypeScript")
|
|
420
|
+
for fw in ["next", "nuxt", "remix", "svelte", "react", "vue", "express", "fastify", "hono", "elysia"]:
|
|
421
|
+
if fw in node_deps:
|
|
422
|
+
_add(f"Framework: {fw}")
|
|
423
|
+
break
|
|
424
|
+
test_cmd = pkg.get("scripts", {}).get("test", "")
|
|
425
|
+
if "jest" in test_cmd or "jest" in node_deps:
|
|
426
|
+
_add("Test framework: Jest")
|
|
427
|
+
sig["has_tests"] = True
|
|
428
|
+
elif "vitest" in test_cmd or "vitest" in node_deps:
|
|
429
|
+
_add("Test framework: Vitest")
|
|
430
|
+
sig["has_tests"] = True
|
|
431
|
+
except Exception:
|
|
432
|
+
pass
|
|
433
|
+
|
|
434
|
+
# --- Go ---
|
|
435
|
+
if (root / "go.mod").exists():
|
|
436
|
+
found_files.append("go.mod")
|
|
437
|
+
try:
|
|
438
|
+
for line in (root / "go.mod").read_text().splitlines():
|
|
439
|
+
if line.startswith("module "):
|
|
440
|
+
_add(f"Go module: {line.split()[1]}")
|
|
441
|
+
elif line.startswith("go "):
|
|
442
|
+
_add(f"Go version: {line.split()[1]}")
|
|
443
|
+
break
|
|
444
|
+
except Exception:
|
|
445
|
+
pass
|
|
446
|
+
|
|
447
|
+
# --- Rust ---
|
|
448
|
+
if (root / "Cargo.toml").exists():
|
|
449
|
+
found_files.append("Cargo.toml")
|
|
450
|
+
try:
|
|
451
|
+
with open(root / "Cargo.toml", "rb") as f:
|
|
452
|
+
c = tomllib.load(f)
|
|
453
|
+
p = c.get("package", {})
|
|
454
|
+
if p.get("name") and not sig["project_name"]:
|
|
455
|
+
sig["project_name"] = p["name"]
|
|
456
|
+
rust_name = f' "{p["name"]}"' if p.get("name") else ""
|
|
457
|
+
rust_edition = f', edition {p["edition"]}' if p.get("edition") else ""
|
|
458
|
+
_add(f"Rust project{rust_name}{rust_edition}")
|
|
459
|
+
except Exception:
|
|
460
|
+
pass
|
|
461
|
+
|
|
462
|
+
# --- Monorepo ---
|
|
463
|
+
for mf in ["nx.json", "turbo.json", "lerna.json", "pnpm-workspace.yaml"]:
|
|
464
|
+
if (root / mf).exists():
|
|
465
|
+
found_files.append(mf)
|
|
466
|
+
_add(f"Monorepo: {mf.split('.')[0]} workspace")
|
|
467
|
+
break
|
|
468
|
+
if not any("Monorepo" in i for i in inferred):
|
|
469
|
+
if (root / "packages").is_dir() or (root / "apps").is_dir():
|
|
470
|
+
_add("Monorepo: packages/ or apps/ directory structure")
|
|
471
|
+
|
|
472
|
+
# --- Data layer ---
|
|
473
|
+
_DB_MAP = {
|
|
474
|
+
"PostgreSQL": {"psycopg", "psycopg2", "asyncpg", "pg", "postgres", "neon"},
|
|
475
|
+
"MySQL/MariaDB": {"pymysql", "aiomysql", "mysql2", "mysql"},
|
|
476
|
+
"MongoDB": {"pymongo", "motor", "mongodb", "mongoose"},
|
|
477
|
+
"Redis": {"redis", "aioredis", "ioredis"},
|
|
478
|
+
"SQLite": {"aiosqlite", "better-sqlite3"},
|
|
479
|
+
}
|
|
480
|
+
_ORM_DEPS = {"sqlalchemy", "tortoise-orm", "databases", "prisma", "drizzle-orm",
|
|
481
|
+
"typeorm", "sequelize", "knex", "mikro-orm"}
|
|
482
|
+
detected_db = [label for label, names in _DB_MAP.items() if _has_dep(*names)]
|
|
483
|
+
if detected_db:
|
|
484
|
+
_add(f"Data store(s): {', '.join(detected_db)}")
|
|
485
|
+
detected_orm = next((d for d in _ORM_DEPS if _has_dep(d)), None)
|
|
486
|
+
if detected_orm:
|
|
487
|
+
_add(f"ORM / query builder: {detected_orm}")
|
|
488
|
+
|
|
489
|
+
# --- Auth / payments (security-sensitive signals) ---
|
|
490
|
+
_AUTH_JWT = {"python-jose", "pyjwt", "jose"}
|
|
491
|
+
_AUTH_FRAMEWORK = {"passlib", "authlib", "passport", "next-auth", "@auth", "clerk",
|
|
492
|
+
"supabase", "firebase-admin", "google-auth", "python-keycloak"}
|
|
493
|
+
_PAYMENT_DEPS = {"stripe", "braintree"}
|
|
494
|
+
if _has_dep(*_AUTH_JWT):
|
|
495
|
+
_add("Auth: JWT-based (pyjwt / python-jose detected)")
|
|
496
|
+
sig["has_security_sensitive"] = True
|
|
497
|
+
elif _has_dep(*_AUTH_FRAMEWORK):
|
|
498
|
+
pkg_found = next((d for d in _AUTH_FRAMEWORK if _has_dep(d)), "unknown")
|
|
499
|
+
_add(f"Auth: {pkg_found} detected")
|
|
500
|
+
sig["has_security_sensitive"] = True
|
|
501
|
+
if _has_dep(*_PAYMENT_DEPS):
|
|
502
|
+
sig["has_security_sensitive"] = True
|
|
503
|
+
|
|
504
|
+
# --- Cloud SDKs ---
|
|
505
|
+
if _has_dep("boto3", "botocore", "aws-cdk", "@aws-sdk", "aws-lambda"):
|
|
506
|
+
_add("Cloud: AWS SDK present (boto3 / @aws-sdk)")
|
|
507
|
+
sig["cloud_detected"] = sig["cloud_detected"] or "AWS"
|
|
508
|
+
if _has_dep("google-cloud", "@google-cloud", "google-auth"):
|
|
509
|
+
_add("Cloud: GCP SDK present")
|
|
510
|
+
sig["cloud_detected"] = sig["cloud_detected"] or "GCP"
|
|
511
|
+
if _has_dep("azure-", "@azure"):
|
|
512
|
+
_add("Cloud: Azure SDK present")
|
|
513
|
+
sig["cloud_detected"] = sig["cloud_detected"] or "Azure"
|
|
514
|
+
|
|
515
|
+
# --- External integrations ---
|
|
516
|
+
_INTEGRATIONS = {
|
|
517
|
+
"stripe": "Payments: Stripe", "braintree": "Payments: Braintree",
|
|
518
|
+
"sendgrid": "Email: SendGrid", "resend": "Email: Resend",
|
|
519
|
+
"twilio": "Messaging: Twilio",
|
|
520
|
+
"openai": "AI: OpenAI SDK", "anthropic": "AI: Anthropic SDK", "langchain": "AI: LangChain",
|
|
521
|
+
"celery": "Task queue: Celery", "dramatiq": "Task queue: Dramatiq",
|
|
522
|
+
"kafka-python": "Messaging: Kafka", "confluent-kafka": "Messaging: Kafka (Confluent)",
|
|
523
|
+
"pika": "Messaging: RabbitMQ", "aio-pika": "Messaging: RabbitMQ (async)",
|
|
524
|
+
"elasticsearch-py": "Search: Elasticsearch", "typesense": "Search: Typesense",
|
|
525
|
+
}
|
|
526
|
+
for dep, label in _INTEGRATIONS.items():
|
|
527
|
+
if _has_dep(dep):
|
|
528
|
+
_add(label)
|
|
529
|
+
|
|
530
|
+
# --- CI/CD ---
|
|
531
|
+
gh_wf = root / ".github" / "workflows"
|
|
532
|
+
if gh_wf.is_dir():
|
|
533
|
+
wfs = list(gh_wf.glob("*.yml")) + list(gh_wf.glob("*.yaml"))
|
|
534
|
+
if wfs:
|
|
535
|
+
found_files.append(".github/workflows/")
|
|
536
|
+
_add(f"CI/CD: GitHub Actions ({len(wfs)} workflow file(s))")
|
|
537
|
+
sig["has_ci"] = True
|
|
538
|
+
if (root / ".gitlab-ci.yml").exists():
|
|
539
|
+
found_files.append(".gitlab-ci.yml")
|
|
540
|
+
_add("CI/CD: GitLab CI")
|
|
541
|
+
sig["has_ci"] = True
|
|
542
|
+
|
|
543
|
+
# --- Docker ---
|
|
544
|
+
if (root / "Dockerfile").exists():
|
|
545
|
+
found_files.append("Dockerfile")
|
|
546
|
+
try:
|
|
547
|
+
first_from = next(
|
|
548
|
+
(l.split()[1] for l in (root / "Dockerfile").read_text().splitlines() if l.startswith("FROM")), None
|
|
549
|
+
)
|
|
550
|
+
_add(f"Containerized — Dockerfile present{f' (base: {first_from})' if first_from else ''}")
|
|
551
|
+
except Exception:
|
|
552
|
+
_add("Containerized — Dockerfile present")
|
|
553
|
+
sig["has_container"] = True
|
|
554
|
+
for compose in ["docker-compose.yml", "docker-compose.yaml"]:
|
|
555
|
+
if (root / compose).exists():
|
|
556
|
+
found_files.append(compose)
|
|
557
|
+
_add("Local dev: docker-compose present")
|
|
558
|
+
break
|
|
559
|
+
|
|
560
|
+
# --- Linting / formatting ---
|
|
561
|
+
eslint_files = [".eslintrc", ".eslintrc.js", ".eslintrc.json", ".eslintrc.cjs",
|
|
562
|
+
"eslint.config.js", "eslint.config.mjs", "eslint.config.cjs"]
|
|
563
|
+
if any((root / f).exists() for f in eslint_files):
|
|
564
|
+
found_files.append(".eslintrc*")
|
|
565
|
+
_add("Linting: ESLint")
|
|
566
|
+
prettier_files = [".prettierrc", ".prettierrc.json", ".prettierrc.js",
|
|
567
|
+
".prettierrc.cjs", "prettier.config.js"]
|
|
568
|
+
if any((root / f).exists() for f in prettier_files):
|
|
569
|
+
found_files.append(".prettierrc*")
|
|
570
|
+
_add("Formatting: Prettier")
|
|
571
|
+
if (root / "ruff.toml").exists():
|
|
572
|
+
found_files.append("ruff.toml")
|
|
573
|
+
_add("Linting/formatting: ruff (ruff.toml)")
|
|
574
|
+
if (root / "pytest.ini").exists():
|
|
575
|
+
found_files.append("pytest.ini")
|
|
576
|
+
_add("Test framework: pytest (pytest.ini)")
|
|
577
|
+
sig["has_tests"] = True
|
|
578
|
+
|
|
579
|
+
# --- Infrastructure ---
|
|
580
|
+
if list(root.glob("*.tf")) or (root / "terraform").is_dir():
|
|
581
|
+
_add("Infrastructure as code: Terraform")
|
|
582
|
+
sig["has_infra"] = True
|
|
583
|
+
if any((root / d).is_dir() for d in ["k8s", "kubernetes", "helm"]):
|
|
584
|
+
_add("Deployment: Kubernetes (manifests or Helm charts present)")
|
|
585
|
+
sig["has_infra"] = True
|
|
586
|
+
|
|
587
|
+
# --- Architecture signals ---
|
|
588
|
+
src = root / "src"
|
|
589
|
+
if src.is_dir():
|
|
590
|
+
layers = [d for d in ["api", "services", "models", "controllers", "middleware", "handlers", "repositories"]
|
|
591
|
+
if (src / d).is_dir()]
|
|
592
|
+
if layers:
|
|
593
|
+
layer_str = ", ".join(layers[:3]) + ("..." if len(layers) > 3 else "")
|
|
594
|
+
_add(f"Architecture: layered structure detected (src/{layer_str})")
|
|
595
|
+
|
|
596
|
+
# --- README summary (for purpose inference) ---
|
|
597
|
+
readme = root / "README.md"
|
|
598
|
+
if readme.exists():
|
|
599
|
+
found_files.append("README.md")
|
|
600
|
+
try:
|
|
601
|
+
lines = [l.strip() for l in readme.read_text().splitlines()
|
|
602
|
+
if l.strip() and not l.startswith("#")]
|
|
603
|
+
if lines:
|
|
604
|
+
sig["readme_summary"] = lines[0][:120]
|
|
605
|
+
except Exception:
|
|
606
|
+
pass
|
|
607
|
+
for cf in ["CLAUDE.md", ".cursorrules"]:
|
|
608
|
+
if (root / cf).exists():
|
|
609
|
+
found_files.append(cf)
|
|
610
|
+
|
|
611
|
+
# --- Primary stack detection for stack-aware hints ---
|
|
612
|
+
primary_stack = (
|
|
613
|
+
"python" if any("Python" in i for i in inferred) else
|
|
614
|
+
"node" if any("Node.js" in i or "TypeScript" in i for i in inferred) else
|
|
615
|
+
"go" if any("Go module" in i or "Go version" in i for i in inferred) else
|
|
616
|
+
"rust" if any("Rust" in i for i in inferred) else
|
|
617
|
+
"generic"
|
|
618
|
+
)
|
|
619
|
+
|
|
620
|
+
def _test_hint() -> str:
|
|
621
|
+
if primary_stack == "python":
|
|
622
|
+
return "e.g. pytest with fixtures and coverage threshold; no mocking external calls in unit tests"
|
|
623
|
+
if primary_stack == "node":
|
|
624
|
+
return "e.g. Jest or Vitest; 80% coverage threshold; no real HTTP calls in unit tests"
|
|
625
|
+
if primary_stack == "go":
|
|
626
|
+
return "e.g. go test, table-driven tests; benchmarks for hot paths"
|
|
627
|
+
if primary_stack == "rust":
|
|
628
|
+
return "e.g. cargo test; #[cfg(test)] modules; integration tests in tests/"
|
|
629
|
+
return "e.g. unit tests, integration tests, coverage threshold"
|
|
630
|
+
|
|
631
|
+
def _exclusions_hint() -> str:
|
|
632
|
+
if primary_stack == "python":
|
|
633
|
+
return "e.g. 'no requests, use httpx'; 'no Flask, FastAPI only'; 'always type-annotate public APIs'"
|
|
634
|
+
if primary_stack == "node":
|
|
635
|
+
return "e.g. 'no CommonJS, ESM only'; 'no lodash, use native'; 'no class-based components'"
|
|
636
|
+
if primary_stack == "go":
|
|
637
|
+
return "e.g. 'no global state'; 'always wrap errors with fmt.Errorf'; 'no init() functions'"
|
|
638
|
+
if primary_stack == "rust":
|
|
639
|
+
return "e.g. 'no unwrap() in production code'; 'async with tokio only'; 'no unsafe blocks'"
|
|
640
|
+
return "e.g. specific libraries to avoid, patterns to always follow, things that must never happen"
|
|
641
|
+
|
|
642
|
+
def _constraints_hint() -> str:
|
|
643
|
+
if sig["has_security_sensitive"] and sig["cloud_detected"]:
|
|
644
|
+
return f"e.g. GDPR / PCI-DSS compliance; {sig['cloud_detected']} cost ceiling; latency SLA"
|
|
645
|
+
if sig["has_security_sensitive"]:
|
|
646
|
+
return "e.g. GDPR, PCI-DSS, SOC2, HIPAA; audit logging requirements; data residency"
|
|
647
|
+
if sig["cloud_detected"]:
|
|
648
|
+
return f"e.g. {sig['cloud_detected']} cost ceiling; latency SLA; multi-region requirements"
|
|
649
|
+
return "e.g. <100ms p99 latency; 1M+ concurrent users; GDPR; monthly cost ceiling"
|
|
650
|
+
|
|
651
|
+
# --- Intent gaps: all conditional on signals ---
|
|
652
|
+
gaps: list[dict] = []
|
|
653
|
+
|
|
654
|
+
# Purpose — always: can never be inferred from code
|
|
655
|
+
name = sig["project_name"]
|
|
656
|
+
gaps.append(_gap(
|
|
657
|
+
assumption=_infer_purpose(name, sig["readme_summary"]),
|
|
658
|
+
question="What does this repo do and who uses it?",
|
|
659
|
+
hint=(
|
|
660
|
+
f"e.g. what {name} is for and who uses it"
|
|
661
|
+
if name else
|
|
662
|
+
"e.g. 'REST API for internal task management, used by 3 frontend apps'"
|
|
663
|
+
),
|
|
664
|
+
))
|
|
665
|
+
|
|
666
|
+
# Tests — only if no test framework detected
|
|
667
|
+
if not sig["has_tests"]:
|
|
668
|
+
gaps.append(_gap(
|
|
669
|
+
assumption="No automated test framework detected",
|
|
670
|
+
question="Is automated testing in scope?",
|
|
671
|
+
hint=_test_hint(),
|
|
672
|
+
))
|
|
673
|
+
|
|
674
|
+
# CI — only if no CI config found
|
|
675
|
+
if not sig["has_ci"]:
|
|
676
|
+
gaps.append(_gap(
|
|
677
|
+
assumption="No CI/CD config found in this repo",
|
|
678
|
+
question="Is there a build or deploy pipeline, or is one planned?",
|
|
679
|
+
hint="e.g. GitHub Actions, GitLab CI, CircleCI; or: manual deploys, not needed yet",
|
|
680
|
+
))
|
|
681
|
+
|
|
682
|
+
# Deployment — only if no container or infra config
|
|
683
|
+
if not sig["has_container"] and not sig["has_infra"]:
|
|
684
|
+
gaps.append(_gap(
|
|
685
|
+
assumption="No container or infra config found — deployment target unclear",
|
|
686
|
+
question="Where does this run, or is it local-only?",
|
|
687
|
+
hint="e.g. containerized VPS, serverless function, internal CLI, local-only tool, not deployed yet",
|
|
688
|
+
))
|
|
689
|
+
|
|
690
|
+
# Cloud SDK but no deploy config — probably in a separate repo
|
|
691
|
+
if sig["cloud_detected"] and not sig["has_container"] and not sig["has_infra"]:
|
|
692
|
+
gaps.append(_gap(
|
|
693
|
+
assumption=f"{sig['cloud_detected']} SDK detected but no deploy config found here",
|
|
694
|
+
question=f"Is the {sig['cloud_detected']} deploy config in a separate repo?",
|
|
695
|
+
hint="e.g. separate infra repo, serverless framework config, or not yet set up",
|
|
696
|
+
))
|
|
697
|
+
|
|
698
|
+
# Compliance — only if auth or payment deps detected
|
|
699
|
+
if sig["has_security_sensitive"]:
|
|
700
|
+
gaps.append(_gap(
|
|
701
|
+
assumption="Auth or payment handling detected — compliance requirements unknown",
|
|
702
|
+
question="Any compliance or security requirements given the auth/payment handling?",
|
|
703
|
+
hint="e.g. GDPR, PCI-DSS, SOC2, HIPAA; internal security policy; audit logging; data residency",
|
|
704
|
+
))
|
|
705
|
+
|
|
706
|
+
# Team conventions — only if architecture signals suggest a team wrote this
|
|
707
|
+
has_team_signals = (
|
|
708
|
+
any("Architecture" in i or "layered" in i for i in inferred) or
|
|
709
|
+
len(inferred) > 5
|
|
710
|
+
)
|
|
711
|
+
if has_team_signals:
|
|
712
|
+
gaps.append(_gap(
|
|
713
|
+
assumption="Team conventions not captured in config files",
|
|
714
|
+
question="Any branching model, PR process, or unwritten norms beyond what's in config files?",
|
|
715
|
+
hint="e.g. trunk-based vs feature branches; PR review requirements; who owns which area",
|
|
716
|
+
))
|
|
717
|
+
|
|
718
|
+
# Exclusions — only if dep tree suggests architectural choices were made
|
|
719
|
+
has_dep_choices = len(all_deps) > 5 or bool(detected_orm) or len(detected_db) > 0
|
|
720
|
+
if has_dep_choices:
|
|
721
|
+
gaps.append(_gap(
|
|
722
|
+
assumption="No known intentional library exclusions or architectural mandates",
|
|
723
|
+
question="Any libraries or patterns that are intentionally excluded or always required?",
|
|
724
|
+
hint=_exclusions_hint(),
|
|
725
|
+
))
|
|
726
|
+
|
|
727
|
+
# Constraints — only if production signals exist
|
|
728
|
+
has_production_signals = (
|
|
729
|
+
sig["has_security_sensitive"] or sig["cloud_detected"] or
|
|
730
|
+
sig["has_infra"] or sig["has_container"]
|
|
731
|
+
)
|
|
732
|
+
if has_production_signals:
|
|
733
|
+
gaps.append(_gap(
|
|
734
|
+
assumption="No known performance, scale, or compliance constraints",
|
|
735
|
+
question="Any constraints that shape technical decisions?",
|
|
736
|
+
hint=_constraints_hint(),
|
|
737
|
+
))
|
|
738
|
+
|
|
739
|
+
return {"inferred": inferred, "gaps": gaps, "existing_context_files": found_files}
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: contexer
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: MCP server that captures and retrieves architectural decisions and context for Claude Code sessions
|
|
5
|
+
Project-URL: Homepage, https://github.com/bhargavamin/contexer
|
|
6
|
+
Project-URL: Repository, https://github.com/bhargavamin/contexer
|
|
7
|
+
Project-URL: Issues, https://github.com/bhargavamin/contexer/issues
|
|
8
|
+
Author-email: Bhargav Amin <devops.techpro@gmail.com>
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: ai,claude,context,decisions,llm,mcp
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
18
|
+
Requires-Python: >=3.12
|
|
19
|
+
Requires-Dist: mcp>=1.9.4
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
|
|
22
|
+
# Contexer
|
|
23
|
+
|
|
24
|
+
Contexer is a lightweight MCP server for Claude Code that automatically captures decisions made during coding sessions and surfaces them at the start of every future session — so Claude never starts blind.
|
|
25
|
+
|
|
26
|
+
## The problem
|
|
27
|
+
|
|
28
|
+
Every Claude Code session starts with no memory of the previous one. CLAUDE.md files require manual maintenance and go stale. When Claude works autonomously, the reasoning behind decisions disappears when the session ends. Teams end up re-explaining the same constraints, conventions, and architecture choices every time.
|
|
29
|
+
|
|
30
|
+
Contexer solves this by capturing decisions as they happen — silently, automatically, in the background — and replaying them as project rules at session start.
|
|
31
|
+
|
|
32
|
+
---
|
|
33
|
+
|
|
34
|
+
## Quick start
|
|
35
|
+
|
|
36
|
+
Install takes under two minutes. See **[docs/install.md](docs/install.md)** for full steps, verification, and uninstall.
|
|
37
|
+
For hooks, tool internals, filter logic, and storage layout see **[docs/architecture.md](docs/architecture.md)**.
|
|
38
|
+
|
|
39
|
+
**Plugin (recommended):**
|
|
40
|
+
|
|
41
|
+
```
|
|
42
|
+
/plugin marketplace add bhargavamin/contexer
|
|
43
|
+
/plugin install contexer@contexer
|
|
44
|
+
/reload-plugins
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
**Manual:**
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
git clone git@github.com:bhargavamin/contexer.git ~/tools/contexer
|
|
51
|
+
bash ~/tools/contexer/scripts/install.sh
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
After install, **open a new Claude Code session in any repo.** If no context exists, Claude will automatically run a bootstrap to capture your first decisions. If context exists, all constraints and conventions are injected at session start.
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## How it works
|
|
59
|
+
|
|
60
|
+
You work normally. Contexer runs in the background via Claude Code hooks.
|
|
61
|
+
|
|
62
|
+
```
|
|
63
|
+
Session opens
|
|
64
|
+
└─▶ All conventions + constraints injected as project rules
|
|
65
|
+
Architecture/pattern decisions available on demand (JIT)
|
|
66
|
+
First session with no context → bootstrap runs automatically
|
|
67
|
+
|
|
68
|
+
You type a prompt
|
|
69
|
+
└─▶ Your first message is stored as the current task description
|
|
70
|
+
└─▶ "Why/reason/rationale/decided" questions auto-fetch matching decisions
|
|
71
|
+
|
|
72
|
+
Claude works
|
|
73
|
+
└─▶ Calls get_context when it needs architecture or pattern context
|
|
74
|
+
└─▶ Calls update_context when it makes a significant decision
|
|
75
|
+
|
|
76
|
+
Context window nears limit
|
|
77
|
+
└─▶ Claude is reminded to save any unsaved decisions before compaction
|
|
78
|
+
└─▶ After compaction, full context is reloaded automatically
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
**You never call any tool directly.** Claude handles all tool calls. If Claude misses something, say *"store that decision"* and it will call `update_context` immediately.
|
|
82
|
+
|
|
83
|
+
---
|
|
84
|
+
|
|
85
|
+
## Decision types
|
|
86
|
+
|
|
87
|
+
Every stored decision has a `subtype` that controls when and how it is surfaced.
|
|
88
|
+
|
|
89
|
+
| Subtype | What it captures | Injected at session start? |
|
|
90
|
+
|---|---|---|
|
|
91
|
+
| `constraint` | Rules that must always apply — "never commit untested code" | Yes — always |
|
|
92
|
+
| `convention` | Team or project standards — "use uv not pip", "conventional commits" | Yes — always |
|
|
93
|
+
| `architecture` | Structural decisions — "chose FastMCP over low-level mcp.Server" | No — fetched on demand |
|
|
94
|
+
| `pattern` | Recurring implementation approaches — "plain dicts as function boundaries" | No — fetched on demand |
|
|
95
|
+
|
|
96
|
+
Constraints and conventions are injected directly at every session start because they apply to every task. Architecture and pattern decisions are large and task-specific — Claude fetches them just-in-time when the task requires them.
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
## Managing decisions
|
|
101
|
+
|
|
102
|
+
All operations use natural language. Claude translates them into the right tool call.
|
|
103
|
+
|
|
104
|
+
### Store a decision
|
|
105
|
+
|
|
106
|
+
```
|
|
107
|
+
"store that as a constraint"
|
|
108
|
+
"save this as a convention: always use uv not pip"
|
|
109
|
+
"remember this architecture decision"
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
Claude calls `update_context` with the content and the appropriate subtype. The server applies a novelty filter before storing — content with more than 70% token overlap with an existing decision is silently discarded as a duplicate.
|
|
113
|
+
|
|
114
|
+
> **Note:** Your first prompt each session is automatically stored as the *task description* — not as a decision or constraint. If you open a session with an instruction like *"always update docs before committing"*, it is captured as the task, not stored as a constraint. To store it as a constraint, either complete the turn (Claude will call `update_context` at the end) or explicitly say *"store that as a constraint"*.
|
|
115
|
+
|
|
116
|
+
### Query decisions
|
|
117
|
+
|
|
118
|
+
```
|
|
119
|
+
"show me all constraints"
|
|
120
|
+
"what decisions did we make about postgres?"
|
|
121
|
+
"show everything stored for this repo"
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
| Example call | What it returns |
|
|
125
|
+
|---|---|
|
|
126
|
+
| `get_context()` | Latest 10 decisions — overview |
|
|
127
|
+
| `get_context(entry_type="constraint")` | Up to 25 constraints |
|
|
128
|
+
| `get_context(query="postgres")` | Up to 25 decisions matching "postgres" |
|
|
129
|
+
| `get_context(limit=50)` | Up to 50 decisions |
|
|
130
|
+
|
|
131
|
+
### Update a decision
|
|
132
|
+
|
|
133
|
+
```
|
|
134
|
+
"update the uv decision — we switched back to pip"
|
|
135
|
+
"correct the constraint about commit format"
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
Claude calls `update_context` with the revised content. The old entry is not removed — a new entry is added alongside it. If the revised content is too similar to the original (>70% token overlap), it will be filtered as a duplicate. Rephrase it to include what changed.
|
|
139
|
+
|
|
140
|
+
### Remove a decision
|
|
141
|
+
|
|
142
|
+
```
|
|
143
|
+
"delete the postgres decision"
|
|
144
|
+
"remove all outdated constraints"
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
Claude reads `~/.contexer/<repo_slug>.json` and removes the matching entry directly. The file is plain JSON — you can also edit or prune it manually at any time.
|
|
148
|
+
|
|
149
|
+
---
|
|
150
|
+
|
|
151
|
+
## Tools reference
|
|
152
|
+
|
|
153
|
+
| Tool | Triggered by | What it does |
|
|
154
|
+
|---|---|---|
|
|
155
|
+
| `capture_context` | `UserPromptSubmit` hook — once per session | Stores the first prompt as the task description |
|
|
156
|
+
| `update_context` | Claude, mid-task | Nominates a decision; server filters before storing |
|
|
157
|
+
| `get_context` | Claude, on demand | Returns stored decisions — filtered by keyword or subtype |
|
|
158
|
+
| `get_context_for_prompt` | `UserPromptSubmit` hook — every prompt | Detects rationale questions and auto-injects matching decisions |
|
|
159
|
+
| `bootstrap_context` | Claude, first session with no context | Scans repo stack for inferable decisions; surfaces gap questions |
|
|
160
|
+
|
|
161
|
+
---
|
|
162
|
+
|
|
163
|
+
## Storage
|
|
164
|
+
|
|
165
|
+
Decisions are stored locally at `~/.contexer/<repo_slug>.json` — one file per repo, capped at 500 entries. No cloud, no database, no accounts.
|
|
166
|
+
|
|
167
|
+
Each entry contains:
|
|
168
|
+
|
|
169
|
+
| Field | Values |
|
|
170
|
+
|---|---|
|
|
171
|
+
| `id` | UUID |
|
|
172
|
+
| `type` | `task` or `decision` |
|
|
173
|
+
| `subtype` | `architecture` \| `constraint` \| `pattern` \| `convention` |
|
|
174
|
+
| `content` | The full decision text |
|
|
175
|
+
| `session_id` | UUID for the session that created it |
|
|
176
|
+
| `timestamp` | ISO 8601 UTC |
|
|
177
|
+
|
|
178
|
+
Inspect the store at any time:
|
|
179
|
+
|
|
180
|
+
```bash
|
|
181
|
+
cat ~/.contexer/<repo_slug>.json
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
---
|
|
185
|
+
|
|
186
|
+
## Troubleshooting
|
|
187
|
+
|
|
188
|
+
**Claude isn't storing decisions automatically.**
|
|
189
|
+
Claude calls `update_context` at the end of significant decisions, not continuously. If something specific was missed, say *"store that decision"* and Claude will call it immediately.
|
|
190
|
+
|
|
191
|
+
**A decision was stored but later ignored.**
|
|
192
|
+
Constraints and conventions are injected at session start. If you added a new constraint mid-session, it will appear from the next session onward.
|
|
193
|
+
|
|
194
|
+
**A decision is outdated or wrong.**
|
|
195
|
+
Say *"delete the X decision"* or edit `~/.contexer/<repo_slug>.json` directly and remove the entry.
|
|
196
|
+
|
|
197
|
+
**The novelty filter rejected a new decision.**
|
|
198
|
+
If the content is too similar to an existing one (>70% token overlap), it is silently discarded. Rephrase it to be more specific, or remove the old entry first.
|
|
199
|
+
|
|
200
|
+
**No context was injected at session start.**
|
|
201
|
+
If no decisions are stored for the repo, the bootstrap flow runs instead. Complete bootstrap once and all future sessions will have context.
|
|
202
|
+
|
|
203
|
+
---
|
|
204
|
+
|
|
205
|
+
## License
|
|
206
|
+
|
|
207
|
+
MIT
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
contexer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
contexer/__main__.py,sha256=2ukUX0-kicHLiK9MT2Y02r5ZSaI6_pwQd-jpmu5rgXw,41
|
|
3
|
+
contexer/server.py,sha256=kfdETEvvTzeBr9SZzaOMzTDZUSbS17WG4ci4c6LAw2A,3034
|
|
4
|
+
contexer/store.py,sha256=UNUPkWkI3sflSvoJDIccNvf0o-i4aD53eGokYIUrlj4,30686
|
|
5
|
+
contexer-0.1.0.dist-info/METADATA,sha256=wCLYEwT0oDeHzZ-WhVBUNV65oX0PrA8woxJTqkH7Vss,8488
|
|
6
|
+
contexer-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
7
|
+
contexer-0.1.0.dist-info/entry_points.txt,sha256=MClLqvDhWe4p8kmVgwl9o7heDyx6wNep-TnPG-AxF50,50
|
|
8
|
+
contexer-0.1.0.dist-info/licenses/LICENSE,sha256=WDlQ0ISzY2yN46w9zx-kcr6V8jTDB0RqqW-iRY85VDk,1069
|
|
9
|
+
contexer-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Bhargav Amin
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|