contextcraft-py 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. contextcraft/__init__.py +1 -0
  2. contextcraft/api/__init__.py +1 -0
  3. contextcraft/api/main.py +311 -0
  4. contextcraft/api/routes/__init__.py +1 -0
  5. contextcraft/cli/__init__.py +1 -0
  6. contextcraft/cli/main.py +625 -0
  7. contextcraft/config.py +117 -0
  8. contextcraft/db/__init__.py +1 -0
  9. contextcraft/db/chunks_repo.py +264 -0
  10. contextcraft/db/connection.py +114 -0
  11. contextcraft/db/graph_repo.py +133 -0
  12. contextcraft/db/migrations/001_init.sql +55 -0
  13. contextcraft/db/migrations/__init__.py +1 -0
  14. contextcraft/embeddings/__init__.py +1 -0
  15. contextcraft/embeddings/base.py +35 -0
  16. contextcraft/embeddings/gemini.py +65 -0
  17. contextcraft/embeddings/ollama.py +55 -0
  18. contextcraft/embeddings/openai.py +76 -0
  19. contextcraft/git/__init__.py +1 -0
  20. contextcraft/git/async_git.py +35 -0
  21. contextcraft/git/blame.py +151 -0
  22. contextcraft/git/history.py +77 -0
  23. contextcraft/graph/__init__.py +1 -0
  24. contextcraft/graph/expander.py +79 -0
  25. contextcraft/graph/models.py +27 -0
  26. contextcraft/graph/resolver.py +252 -0
  27. contextcraft/http_timeouts.py +11 -0
  28. contextcraft/llm/__init__.py +1 -0
  29. contextcraft/llm/anthropic.py +49 -0
  30. contextcraft/llm/base.py +28 -0
  31. contextcraft/llm/gemini.py +60 -0
  32. contextcraft/llm/ollama.py +146 -0
  33. contextcraft/llm/openai.py +70 -0
  34. contextcraft/models.py +149 -0
  35. contextcraft/parser/__init__.py +1 -0
  36. contextcraft/parser/ast_parser.py +333 -0
  37. contextcraft/py.typed +0 -0
  38. contextcraft/reranker/__init__.py +6 -0
  39. contextcraft/reranker/base.py +39 -0
  40. contextcraft/reranker/cohere.py +75 -0
  41. contextcraft/search/__init__.py +1 -0
  42. contextcraft/search/bm25_search.py +69 -0
  43. contextcraft/search/context_builder.py +172 -0
  44. contextcraft/search/hybrid.py +147 -0
  45. contextcraft/search/vector_search.py +67 -0
  46. contextcraft/security.py +103 -0
  47. contextcraft/startup.py +88 -0
  48. contextcraft_py-0.3.0.dist-info/METADATA +384 -0
  49. contextcraft_py-0.3.0.dist-info/RECORD +51 -0
  50. contextcraft_py-0.3.0.dist-info/WHEEL +4 -0
  51. contextcraft_py-0.3.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1 @@
1
+ # ContextCraft
@@ -0,0 +1 @@
1
+ # contextcraft.api
@@ -0,0 +1,311 @@
1
+ """FastAPI application for ContextCraft.
2
+
3
+ Provides REST endpoints:
4
+ GET /health — Health check
5
+ GET /repos — List indexed repositories
6
+ POST /index — Trigger indexing of a repository
7
+ POST /ask — Ask a question (SSE streaming)
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import asyncio
13
+ import logging
14
+ import os
15
+ from collections.abc import AsyncIterator
16
+ from contextlib import asynccontextmanager
17
+ from pathlib import Path
18
+ from typing import Any
19
+
20
+ from fastapi import FastAPI, HTTPException, Request
21
+ from fastapi.middleware.cors import CORSMiddleware
22
+ from pydantic import BaseModel, Field
23
+ from slowapi import Limiter, _rate_limit_exceeded_handler
24
+ from slowapi.errors import RateLimitExceeded
25
+ from slowapi.util import get_remote_address
26
+ from sse_starlette.sse import EventSourceResponse
27
+
28
+ from contextcraft.config import settings
29
+ from contextcraft.db import chunks_repo
30
+ from contextcraft.db.connection import close_pool, run_migrations
31
+ from contextcraft.embeddings.base import BaseEmbedder
32
+ from contextcraft.embeddings.gemini import GeminiEmbedder
33
+ from contextcraft.embeddings.openai import OpenAIEmbedder
34
+ from contextcraft.llm.base import BaseLLM
35
+ from contextcraft.models import SearchResult
36
+ from contextcraft.reranker.cohere import CohereReranker, RerankerUnavailableError
37
+ from contextcraft.search.context_builder import build_context, format_sources
38
+ from contextcraft.search.hybrid import hybrid_search
39
+ from contextcraft.security import sanitize_query, validate_repo_path
40
+ from contextcraft.startup import verify_startup
41
+
42
+ logger = logging.getLogger(__name__)
43
+
44
+ _background_tasks: set[asyncio.Task[Any]] = set()
45
+ limiter = Limiter(key_func=get_remote_address)
46
+
47
+
48
+ @asynccontextmanager
49
+ async def lifespan(app: FastAPI) -> AsyncIterator[None]:
50
+ """Startup: connect to DB, verify deps, run migrations. Shutdown: close pool."""
51
+ await verify_startup()
52
+ await run_migrations()
53
+ yield
54
+ await close_pool()
55
+
56
+
57
+ app = FastAPI(
58
+ title="ContextCraft API",
59
+ description="Index codebases and ask questions with full context.",
60
+ version=settings.app_version,
61
+ lifespan=lifespan,
62
+ )
63
+
64
+ app.state.limiter = limiter
65
+ app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) # type: ignore[arg-type]
66
+
67
+ app.add_middleware(
68
+ CORSMiddleware,
69
+ allow_origins=settings.allowed_origins,
70
+ allow_credentials=True,
71
+ allow_methods=["GET", "POST", "OPTIONS"],
72
+ allow_headers=["*"],
73
+ )
74
+
75
+
76
+ # ---------------------------------------------------------------------------
77
+ # Request / Response models
78
+ # ---------------------------------------------------------------------------
79
+
80
+
81
+ class IndexRequest(BaseModel):
82
+ repo_path: str
83
+ incremental: bool = False
84
+ skip_embeddings: bool = False
85
+ skip_git: bool = False
86
+
87
+
88
+ class AskRequest(BaseModel):
89
+ question: str = Field(..., max_length=500)
90
+ repo_ids: list[str] | None = None
91
+ all_repos: bool = False
92
+ top_k: int = 10
93
+ expand_deps: bool = False
94
+
95
+
96
+ class RepoResponse(BaseModel):
97
+ id: str
98
+ name: str
99
+ local_path: str
100
+ languages: list[str]
101
+ chunk_count: int
102
+ last_indexed_at: str | None
103
+
104
+
105
+ class HealthResponse(BaseModel):
106
+ status: str
107
+ version: str
108
+
109
+
110
+ # ---------------------------------------------------------------------------
111
+ # Endpoints
112
+ # ---------------------------------------------------------------------------
113
+
114
+
115
+ @app.get("/health", response_model=HealthResponse)
116
+ async def health() -> HealthResponse:
117
+ """Health check endpoint."""
118
+ return HealthResponse(status="ok", version=settings.app_version)
119
+
120
+
121
+ @app.get("/repos", response_model=list[RepoResponse])
122
+ async def list_repos() -> list[RepoResponse]:
123
+ """List all indexed repositories."""
124
+ repos = await chunks_repo.list_repositories()
125
+ return [
126
+ RepoResponse(
127
+ id=str(r.id),
128
+ name=r.name,
129
+ local_path=r.local_path,
130
+ languages=[lang.value for lang in r.languages],
131
+ chunk_count=r.chunk_count,
132
+ last_indexed_at=(r.last_indexed_at.isoformat() if r.last_indexed_at else None),
133
+ )
134
+ for r in repos
135
+ ]
136
+
137
+
138
+ @app.post("/index")
139
+ async def index_repo(request: IndexRequest) -> dict[str, str]:
140
+ """Trigger indexing of a repository (runs in background)."""
141
+ try:
142
+ repo_path = validate_repo_path(Path(request.repo_path))
143
+ except ValueError as exc:
144
+ raise HTTPException(status_code=400, detail=str(exc)) from exc
145
+
146
+ from contextcraft.cli.main import _index_async
147
+
148
+ task = asyncio.create_task(
149
+ _index_async(
150
+ repo_path,
151
+ incremental=request.incremental,
152
+ skip_embeddings=request.skip_embeddings,
153
+ skip_git=request.skip_git,
154
+ )
155
+ )
156
+ _background_tasks.add(task)
157
+ task.add_done_callback(_background_tasks.discard)
158
+
159
+ return {"status": "indexing_started", "repo_path": str(repo_path)}
160
+
161
+
162
+ @app.post("/ask")
163
+ @limiter.limit("10/minute")
164
+ async def ask_question(http_request: Request, request: AskRequest) -> EventSourceResponse:
165
+ """Ask a question about an indexed codebase (SSE streaming)."""
166
+ question = sanitize_query(request.question)
167
+ if not question:
168
+ raise HTTPException(status_code=400, detail="Question must not be empty")
169
+
170
+ if settings.embedding_provider == "openai" and not settings.openai_api_key:
171
+ raise HTTPException(status_code=503, detail="OPENAI_API_KEY not configured")
172
+ if settings.embedding_provider == "gemini" and not settings.gemini_api_key:
173
+ raise HTTPException(status_code=503, detail="GEMINI_API_KEY not configured")
174
+
175
+ repos = await chunks_repo.list_repositories()
176
+ if not repos:
177
+ raise HTTPException(status_code=404, detail="No repositories indexed")
178
+
179
+ target_repos = []
180
+ if request.all_repos:
181
+ target_repos = repos
182
+ elif request.repo_ids:
183
+ for rid in request.repo_ids:
184
+ found = next((r for r in repos if str(r.id) == rid or r.name == rid), None)
185
+ if found:
186
+ target_repos.append(found)
187
+ if not target_repos:
188
+ raise HTTPException(
189
+ status_code=404, detail="None of the requested repositories were found"
190
+ )
191
+ else:
192
+ target_repos = [repos[0]]
193
+
194
+ target_repo_ids = [r.id for r in target_repos]
195
+ primary_repo_path = target_repos[0].local_path
196
+
197
+ embedder: BaseEmbedder = (
198
+ GeminiEmbedder() if settings.embedding_provider == "gemini" else OpenAIEmbedder()
199
+ )
200
+
201
+ try:
202
+ query_embedding = await embedder.embed_single(question)
203
+ except Exception as exc:
204
+ logger.error("Embedding service error: %s", exc)
205
+ raise HTTPException(
206
+ status_code=503,
207
+ detail="Embedding service unavailable. Check API keys and try again.",
208
+ ) from exc
209
+
210
+ use_reranker = settings.rerank_enabled and bool(settings.cohere_api_key)
211
+ fetch_k = 20 if use_reranker else request.top_k
212
+
213
+ results = await hybrid_search(
214
+ query_embedding=query_embedding,
215
+ query_text=question,
216
+ repo_ids=target_repo_ids,
217
+ top_k=fetch_k,
218
+ )
219
+
220
+ if not results:
221
+ raise HTTPException(status_code=404, detail="No relevant code found")
222
+
223
+ rerank_warning: str | None = None
224
+ if use_reranker:
225
+ try:
226
+ reranker = CohereReranker()
227
+ results = await reranker.rerank(question, results, request.top_k)
228
+ except RerankerUnavailableError as exc:
229
+ logger.warning("%s", exc)
230
+ rerank_warning = str(exc)
231
+ results = results[: request.top_k]
232
+
233
+ dep_results: list[SearchResult] | None = None
234
+ if request.expand_deps:
235
+ try:
236
+ from contextcraft.graph.expander import expand_with_deps
237
+
238
+ chunk_ids = [sr.chunk.id for sr in results]
239
+ dep_chunks = await expand_with_deps(chunk_ids)
240
+ if dep_chunks:
241
+ dep_results = [
242
+ SearchResult(chunk=dc, score=0.0, rank=len(results) + i)
243
+ for i, dc in enumerate(dep_chunks)
244
+ ]
245
+ except Exception:
246
+ pass
247
+
248
+ context = build_context(
249
+ results,
250
+ repo_path=primary_repo_path,
251
+ expand_deps=request.expand_deps,
252
+ dep_chunks=dep_results,
253
+ )
254
+ sources = format_sources(results)
255
+
256
+ system_prompt = """You are ContextCraft, an expert code analysis assistant.
257
+ You answer questions about codebases using the provided code context.
258
+ Base your answers ONLY on the provided code context.
259
+ Reference specific file paths and line numbers when explaining code.
260
+ Be concise but thorough. Use markdown formatting."""
261
+
262
+ user_message = f"## Code Context\n\n{context}\n\n## Question\n\n{question}"
263
+
264
+ async def event_generator() -> AsyncIterator[dict[str, Any]]:
265
+ llm: BaseLLM
266
+ if settings.llm_provider == "anthropic":
267
+ from contextcraft.llm.anthropic import AnthropicLLM
268
+
269
+ llm = AnthropicLLM()
270
+ elif settings.llm_provider == "ollama":
271
+ from contextcraft.llm.ollama import OllamaLLM
272
+
273
+ llm = OllamaLLM()
274
+ elif settings.llm_provider == "gemini":
275
+ from contextcraft.llm.gemini import GeminiLLM
276
+
277
+ llm = GeminiLLM()
278
+ else:
279
+ from contextcraft.llm.openai import OpenAILLM
280
+
281
+ llm = OpenAILLM()
282
+
283
+ if rerank_warning:
284
+ yield {"event": "warning", "data": rerank_warning}
285
+
286
+ try:
287
+ async for token in llm.stream(system_prompt, user_message):
288
+ yield {"event": "token", "data": token}
289
+ except asyncio.CancelledError:
290
+ logger.info("SSE client disconnected — stopping LLM stream")
291
+ close = getattr(llm, "close", None)
292
+ if close is not None:
293
+ await close()
294
+ raise
295
+
296
+ yield {"event": "sources", "data": sources}
297
+ yield {"event": "done", "data": ""}
298
+
299
+ return EventSourceResponse(event_generator())
300
+
301
+
302
+ if __name__ == "__main__":
303
+ import uvicorn
304
+
305
+ port = int(os.environ.get("PORT", settings.api_port))
306
+ uvicorn.run(
307
+ "contextcraft.api.main:app",
308
+ host=settings.api_host,
309
+ port=port,
310
+ reload=True,
311
+ )
@@ -0,0 +1 @@
1
+ # contextcraft.api.routes
@@ -0,0 +1 @@
1
+ # contextcraft.cli