contextcraft-py 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- contextcraft/__init__.py +1 -0
- contextcraft/api/__init__.py +1 -0
- contextcraft/api/main.py +311 -0
- contextcraft/api/routes/__init__.py +1 -0
- contextcraft/cli/__init__.py +1 -0
- contextcraft/cli/main.py +625 -0
- contextcraft/config.py +117 -0
- contextcraft/db/__init__.py +1 -0
- contextcraft/db/chunks_repo.py +264 -0
- contextcraft/db/connection.py +114 -0
- contextcraft/db/graph_repo.py +133 -0
- contextcraft/db/migrations/001_init.sql +55 -0
- contextcraft/db/migrations/__init__.py +1 -0
- contextcraft/embeddings/__init__.py +1 -0
- contextcraft/embeddings/base.py +35 -0
- contextcraft/embeddings/gemini.py +65 -0
- contextcraft/embeddings/ollama.py +55 -0
- contextcraft/embeddings/openai.py +76 -0
- contextcraft/git/__init__.py +1 -0
- contextcraft/git/async_git.py +35 -0
- contextcraft/git/blame.py +151 -0
- contextcraft/git/history.py +77 -0
- contextcraft/graph/__init__.py +1 -0
- contextcraft/graph/expander.py +79 -0
- contextcraft/graph/models.py +27 -0
- contextcraft/graph/resolver.py +252 -0
- contextcraft/http_timeouts.py +11 -0
- contextcraft/llm/__init__.py +1 -0
- contextcraft/llm/anthropic.py +49 -0
- contextcraft/llm/base.py +28 -0
- contextcraft/llm/gemini.py +60 -0
- contextcraft/llm/ollama.py +146 -0
- contextcraft/llm/openai.py +70 -0
- contextcraft/models.py +149 -0
- contextcraft/parser/__init__.py +1 -0
- contextcraft/parser/ast_parser.py +333 -0
- contextcraft/py.typed +0 -0
- contextcraft/reranker/__init__.py +6 -0
- contextcraft/reranker/base.py +39 -0
- contextcraft/reranker/cohere.py +75 -0
- contextcraft/search/__init__.py +1 -0
- contextcraft/search/bm25_search.py +69 -0
- contextcraft/search/context_builder.py +172 -0
- contextcraft/search/hybrid.py +147 -0
- contextcraft/search/vector_search.py +67 -0
- contextcraft/security.py +103 -0
- contextcraft/startup.py +88 -0
- contextcraft_py-0.3.0.dist-info/METADATA +384 -0
- contextcraft_py-0.3.0.dist-info/RECORD +51 -0
- contextcraft_py-0.3.0.dist-info/WHEEL +4 -0
- contextcraft_py-0.3.0.dist-info/entry_points.txt +2 -0
contextcraft/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# ContextCraft
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# contextcraft.api
|
contextcraft/api/main.py
ADDED
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
"""FastAPI application for ContextCraft.
|
|
2
|
+
|
|
3
|
+
Provides REST endpoints:
|
|
4
|
+
GET /health — Health check
|
|
5
|
+
GET /repos — List indexed repositories
|
|
6
|
+
POST /index — Trigger indexing of a repository
|
|
7
|
+
POST /ask — Ask a question (SSE streaming)
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import asyncio
|
|
13
|
+
import logging
|
|
14
|
+
import os
|
|
15
|
+
from collections.abc import AsyncIterator
|
|
16
|
+
from contextlib import asynccontextmanager
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
from fastapi import FastAPI, HTTPException, Request
|
|
21
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
22
|
+
from pydantic import BaseModel, Field
|
|
23
|
+
from slowapi import Limiter, _rate_limit_exceeded_handler
|
|
24
|
+
from slowapi.errors import RateLimitExceeded
|
|
25
|
+
from slowapi.util import get_remote_address
|
|
26
|
+
from sse_starlette.sse import EventSourceResponse
|
|
27
|
+
|
|
28
|
+
from contextcraft.config import settings
|
|
29
|
+
from contextcraft.db import chunks_repo
|
|
30
|
+
from contextcraft.db.connection import close_pool, run_migrations
|
|
31
|
+
from contextcraft.embeddings.base import BaseEmbedder
|
|
32
|
+
from contextcraft.embeddings.gemini import GeminiEmbedder
|
|
33
|
+
from contextcraft.embeddings.openai import OpenAIEmbedder
|
|
34
|
+
from contextcraft.llm.base import BaseLLM
|
|
35
|
+
from contextcraft.models import SearchResult
|
|
36
|
+
from contextcraft.reranker.cohere import CohereReranker, RerankerUnavailableError
|
|
37
|
+
from contextcraft.search.context_builder import build_context, format_sources
|
|
38
|
+
from contextcraft.search.hybrid import hybrid_search
|
|
39
|
+
from contextcraft.security import sanitize_query, validate_repo_path
|
|
40
|
+
from contextcraft.startup import verify_startup
|
|
41
|
+
|
|
42
|
+
logger = logging.getLogger(__name__)
|
|
43
|
+
|
|
44
|
+
_background_tasks: set[asyncio.Task[Any]] = set()
|
|
45
|
+
limiter = Limiter(key_func=get_remote_address)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@asynccontextmanager
|
|
49
|
+
async def lifespan(app: FastAPI) -> AsyncIterator[None]:
|
|
50
|
+
"""Startup: connect to DB, verify deps, run migrations. Shutdown: close pool."""
|
|
51
|
+
await verify_startup()
|
|
52
|
+
await run_migrations()
|
|
53
|
+
yield
|
|
54
|
+
await close_pool()
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
app = FastAPI(
|
|
58
|
+
title="ContextCraft API",
|
|
59
|
+
description="Index codebases and ask questions with full context.",
|
|
60
|
+
version=settings.app_version,
|
|
61
|
+
lifespan=lifespan,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
app.state.limiter = limiter
|
|
65
|
+
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) # type: ignore[arg-type]
|
|
66
|
+
|
|
67
|
+
app.add_middleware(
|
|
68
|
+
CORSMiddleware,
|
|
69
|
+
allow_origins=settings.allowed_origins,
|
|
70
|
+
allow_credentials=True,
|
|
71
|
+
allow_methods=["GET", "POST", "OPTIONS"],
|
|
72
|
+
allow_headers=["*"],
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
# ---------------------------------------------------------------------------
|
|
77
|
+
# Request / Response models
|
|
78
|
+
# ---------------------------------------------------------------------------
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class IndexRequest(BaseModel):
|
|
82
|
+
repo_path: str
|
|
83
|
+
incremental: bool = False
|
|
84
|
+
skip_embeddings: bool = False
|
|
85
|
+
skip_git: bool = False
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class AskRequest(BaseModel):
|
|
89
|
+
question: str = Field(..., max_length=500)
|
|
90
|
+
repo_ids: list[str] | None = None
|
|
91
|
+
all_repos: bool = False
|
|
92
|
+
top_k: int = 10
|
|
93
|
+
expand_deps: bool = False
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
class RepoResponse(BaseModel):
|
|
97
|
+
id: str
|
|
98
|
+
name: str
|
|
99
|
+
local_path: str
|
|
100
|
+
languages: list[str]
|
|
101
|
+
chunk_count: int
|
|
102
|
+
last_indexed_at: str | None
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class HealthResponse(BaseModel):
|
|
106
|
+
status: str
|
|
107
|
+
version: str
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
# ---------------------------------------------------------------------------
|
|
111
|
+
# Endpoints
|
|
112
|
+
# ---------------------------------------------------------------------------
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
@app.get("/health", response_model=HealthResponse)
|
|
116
|
+
async def health() -> HealthResponse:
|
|
117
|
+
"""Health check endpoint."""
|
|
118
|
+
return HealthResponse(status="ok", version=settings.app_version)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@app.get("/repos", response_model=list[RepoResponse])
|
|
122
|
+
async def list_repos() -> list[RepoResponse]:
|
|
123
|
+
"""List all indexed repositories."""
|
|
124
|
+
repos = await chunks_repo.list_repositories()
|
|
125
|
+
return [
|
|
126
|
+
RepoResponse(
|
|
127
|
+
id=str(r.id),
|
|
128
|
+
name=r.name,
|
|
129
|
+
local_path=r.local_path,
|
|
130
|
+
languages=[lang.value for lang in r.languages],
|
|
131
|
+
chunk_count=r.chunk_count,
|
|
132
|
+
last_indexed_at=(r.last_indexed_at.isoformat() if r.last_indexed_at else None),
|
|
133
|
+
)
|
|
134
|
+
for r in repos
|
|
135
|
+
]
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
@app.post("/index")
|
|
139
|
+
async def index_repo(request: IndexRequest) -> dict[str, str]:
|
|
140
|
+
"""Trigger indexing of a repository (runs in background)."""
|
|
141
|
+
try:
|
|
142
|
+
repo_path = validate_repo_path(Path(request.repo_path))
|
|
143
|
+
except ValueError as exc:
|
|
144
|
+
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
|
145
|
+
|
|
146
|
+
from contextcraft.cli.main import _index_async
|
|
147
|
+
|
|
148
|
+
task = asyncio.create_task(
|
|
149
|
+
_index_async(
|
|
150
|
+
repo_path,
|
|
151
|
+
incremental=request.incremental,
|
|
152
|
+
skip_embeddings=request.skip_embeddings,
|
|
153
|
+
skip_git=request.skip_git,
|
|
154
|
+
)
|
|
155
|
+
)
|
|
156
|
+
_background_tasks.add(task)
|
|
157
|
+
task.add_done_callback(_background_tasks.discard)
|
|
158
|
+
|
|
159
|
+
return {"status": "indexing_started", "repo_path": str(repo_path)}
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
@app.post("/ask")
|
|
163
|
+
@limiter.limit("10/minute")
|
|
164
|
+
async def ask_question(http_request: Request, request: AskRequest) -> EventSourceResponse:
|
|
165
|
+
"""Ask a question about an indexed codebase (SSE streaming)."""
|
|
166
|
+
question = sanitize_query(request.question)
|
|
167
|
+
if not question:
|
|
168
|
+
raise HTTPException(status_code=400, detail="Question must not be empty")
|
|
169
|
+
|
|
170
|
+
if settings.embedding_provider == "openai" and not settings.openai_api_key:
|
|
171
|
+
raise HTTPException(status_code=503, detail="OPENAI_API_KEY not configured")
|
|
172
|
+
if settings.embedding_provider == "gemini" and not settings.gemini_api_key:
|
|
173
|
+
raise HTTPException(status_code=503, detail="GEMINI_API_KEY not configured")
|
|
174
|
+
|
|
175
|
+
repos = await chunks_repo.list_repositories()
|
|
176
|
+
if not repos:
|
|
177
|
+
raise HTTPException(status_code=404, detail="No repositories indexed")
|
|
178
|
+
|
|
179
|
+
target_repos = []
|
|
180
|
+
if request.all_repos:
|
|
181
|
+
target_repos = repos
|
|
182
|
+
elif request.repo_ids:
|
|
183
|
+
for rid in request.repo_ids:
|
|
184
|
+
found = next((r for r in repos if str(r.id) == rid or r.name == rid), None)
|
|
185
|
+
if found:
|
|
186
|
+
target_repos.append(found)
|
|
187
|
+
if not target_repos:
|
|
188
|
+
raise HTTPException(
|
|
189
|
+
status_code=404, detail="None of the requested repositories were found"
|
|
190
|
+
)
|
|
191
|
+
else:
|
|
192
|
+
target_repos = [repos[0]]
|
|
193
|
+
|
|
194
|
+
target_repo_ids = [r.id for r in target_repos]
|
|
195
|
+
primary_repo_path = target_repos[0].local_path
|
|
196
|
+
|
|
197
|
+
embedder: BaseEmbedder = (
|
|
198
|
+
GeminiEmbedder() if settings.embedding_provider == "gemini" else OpenAIEmbedder()
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
try:
|
|
202
|
+
query_embedding = await embedder.embed_single(question)
|
|
203
|
+
except Exception as exc:
|
|
204
|
+
logger.error("Embedding service error: %s", exc)
|
|
205
|
+
raise HTTPException(
|
|
206
|
+
status_code=503,
|
|
207
|
+
detail="Embedding service unavailable. Check API keys and try again.",
|
|
208
|
+
) from exc
|
|
209
|
+
|
|
210
|
+
use_reranker = settings.rerank_enabled and bool(settings.cohere_api_key)
|
|
211
|
+
fetch_k = 20 if use_reranker else request.top_k
|
|
212
|
+
|
|
213
|
+
results = await hybrid_search(
|
|
214
|
+
query_embedding=query_embedding,
|
|
215
|
+
query_text=question,
|
|
216
|
+
repo_ids=target_repo_ids,
|
|
217
|
+
top_k=fetch_k,
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
if not results:
|
|
221
|
+
raise HTTPException(status_code=404, detail="No relevant code found")
|
|
222
|
+
|
|
223
|
+
rerank_warning: str | None = None
|
|
224
|
+
if use_reranker:
|
|
225
|
+
try:
|
|
226
|
+
reranker = CohereReranker()
|
|
227
|
+
results = await reranker.rerank(question, results, request.top_k)
|
|
228
|
+
except RerankerUnavailableError as exc:
|
|
229
|
+
logger.warning("%s", exc)
|
|
230
|
+
rerank_warning = str(exc)
|
|
231
|
+
results = results[: request.top_k]
|
|
232
|
+
|
|
233
|
+
dep_results: list[SearchResult] | None = None
|
|
234
|
+
if request.expand_deps:
|
|
235
|
+
try:
|
|
236
|
+
from contextcraft.graph.expander import expand_with_deps
|
|
237
|
+
|
|
238
|
+
chunk_ids = [sr.chunk.id for sr in results]
|
|
239
|
+
dep_chunks = await expand_with_deps(chunk_ids)
|
|
240
|
+
if dep_chunks:
|
|
241
|
+
dep_results = [
|
|
242
|
+
SearchResult(chunk=dc, score=0.0, rank=len(results) + i)
|
|
243
|
+
for i, dc in enumerate(dep_chunks)
|
|
244
|
+
]
|
|
245
|
+
except Exception:
|
|
246
|
+
pass
|
|
247
|
+
|
|
248
|
+
context = build_context(
|
|
249
|
+
results,
|
|
250
|
+
repo_path=primary_repo_path,
|
|
251
|
+
expand_deps=request.expand_deps,
|
|
252
|
+
dep_chunks=dep_results,
|
|
253
|
+
)
|
|
254
|
+
sources = format_sources(results)
|
|
255
|
+
|
|
256
|
+
system_prompt = """You are ContextCraft, an expert code analysis assistant.
|
|
257
|
+
You answer questions about codebases using the provided code context.
|
|
258
|
+
Base your answers ONLY on the provided code context.
|
|
259
|
+
Reference specific file paths and line numbers when explaining code.
|
|
260
|
+
Be concise but thorough. Use markdown formatting."""
|
|
261
|
+
|
|
262
|
+
user_message = f"## Code Context\n\n{context}\n\n## Question\n\n{question}"
|
|
263
|
+
|
|
264
|
+
async def event_generator() -> AsyncIterator[dict[str, Any]]:
|
|
265
|
+
llm: BaseLLM
|
|
266
|
+
if settings.llm_provider == "anthropic":
|
|
267
|
+
from contextcraft.llm.anthropic import AnthropicLLM
|
|
268
|
+
|
|
269
|
+
llm = AnthropicLLM()
|
|
270
|
+
elif settings.llm_provider == "ollama":
|
|
271
|
+
from contextcraft.llm.ollama import OllamaLLM
|
|
272
|
+
|
|
273
|
+
llm = OllamaLLM()
|
|
274
|
+
elif settings.llm_provider == "gemini":
|
|
275
|
+
from contextcraft.llm.gemini import GeminiLLM
|
|
276
|
+
|
|
277
|
+
llm = GeminiLLM()
|
|
278
|
+
else:
|
|
279
|
+
from contextcraft.llm.openai import OpenAILLM
|
|
280
|
+
|
|
281
|
+
llm = OpenAILLM()
|
|
282
|
+
|
|
283
|
+
if rerank_warning:
|
|
284
|
+
yield {"event": "warning", "data": rerank_warning}
|
|
285
|
+
|
|
286
|
+
try:
|
|
287
|
+
async for token in llm.stream(system_prompt, user_message):
|
|
288
|
+
yield {"event": "token", "data": token}
|
|
289
|
+
except asyncio.CancelledError:
|
|
290
|
+
logger.info("SSE client disconnected — stopping LLM stream")
|
|
291
|
+
close = getattr(llm, "close", None)
|
|
292
|
+
if close is not None:
|
|
293
|
+
await close()
|
|
294
|
+
raise
|
|
295
|
+
|
|
296
|
+
yield {"event": "sources", "data": sources}
|
|
297
|
+
yield {"event": "done", "data": ""}
|
|
298
|
+
|
|
299
|
+
return EventSourceResponse(event_generator())
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
if __name__ == "__main__":
|
|
303
|
+
import uvicorn
|
|
304
|
+
|
|
305
|
+
port = int(os.environ.get("PORT", settings.api_port))
|
|
306
|
+
uvicorn.run(
|
|
307
|
+
"contextcraft.api.main:app",
|
|
308
|
+
host=settings.api_host,
|
|
309
|
+
port=port,
|
|
310
|
+
reload=True,
|
|
311
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# contextcraft.api.routes
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# contextcraft.cli
|