contextcraft-py 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- contextcraft_py-0.3.0/.env.example +46 -0
- contextcraft_py-0.3.0/.github/workflows/ci.yml +77 -0
- contextcraft_py-0.3.0/.gitignore +90 -0
- contextcraft_py-0.3.0/BENCHMARK.md +136 -0
- contextcraft_py-0.3.0/CHANGELOG.md +87 -0
- contextcraft_py-0.3.0/PKG-INFO +384 -0
- contextcraft_py-0.3.0/README.md +342 -0
- contextcraft_py-0.3.0/docker/Dockerfile +35 -0
- contextcraft_py-0.3.0/docker/docker-compose.yml +39 -0
- contextcraft_py-0.3.0/eval/README.md +35 -0
- contextcraft_py-0.3.0/eval/run_eval.py +210 -0
- contextcraft_py-0.3.0/eval/test_cases.json +52 -0
- contextcraft_py-0.3.0/migrations/003_chunk_edges.sql +16 -0
- contextcraft_py-0.3.0/pyproject.toml +94 -0
- contextcraft_py-0.3.0/railway.toml +12 -0
- contextcraft_py-0.3.0/src/contextcraft/__init__.py +1 -0
- contextcraft_py-0.3.0/src/contextcraft/api/__init__.py +1 -0
- contextcraft_py-0.3.0/src/contextcraft/api/main.py +311 -0
- contextcraft_py-0.3.0/src/contextcraft/api/routes/__init__.py +1 -0
- contextcraft_py-0.3.0/src/contextcraft/cli/__init__.py +1 -0
- contextcraft_py-0.3.0/src/contextcraft/cli/main.py +625 -0
- contextcraft_py-0.3.0/src/contextcraft/config.py +117 -0
- contextcraft_py-0.3.0/src/contextcraft/db/__init__.py +1 -0
- contextcraft_py-0.3.0/src/contextcraft/db/chunks_repo.py +264 -0
- contextcraft_py-0.3.0/src/contextcraft/db/connection.py +114 -0
- contextcraft_py-0.3.0/src/contextcraft/db/graph_repo.py +133 -0
- contextcraft_py-0.3.0/src/contextcraft/db/migrations/001_init.sql +55 -0
- contextcraft_py-0.3.0/src/contextcraft/db/migrations/__init__.py +1 -0
- contextcraft_py-0.3.0/src/contextcraft/embeddings/__init__.py +1 -0
- contextcraft_py-0.3.0/src/contextcraft/embeddings/base.py +35 -0
- contextcraft_py-0.3.0/src/contextcraft/embeddings/gemini.py +65 -0
- contextcraft_py-0.3.0/src/contextcraft/embeddings/ollama.py +55 -0
- contextcraft_py-0.3.0/src/contextcraft/embeddings/openai.py +76 -0
- contextcraft_py-0.3.0/src/contextcraft/git/__init__.py +1 -0
- contextcraft_py-0.3.0/src/contextcraft/git/async_git.py +35 -0
- contextcraft_py-0.3.0/src/contextcraft/git/blame.py +151 -0
- contextcraft_py-0.3.0/src/contextcraft/git/history.py +77 -0
- contextcraft_py-0.3.0/src/contextcraft/graph/__init__.py +1 -0
- contextcraft_py-0.3.0/src/contextcraft/graph/expander.py +79 -0
- contextcraft_py-0.3.0/src/contextcraft/graph/models.py +27 -0
- contextcraft_py-0.3.0/src/contextcraft/graph/resolver.py +252 -0
- contextcraft_py-0.3.0/src/contextcraft/http_timeouts.py +11 -0
- contextcraft_py-0.3.0/src/contextcraft/llm/__init__.py +1 -0
- contextcraft_py-0.3.0/src/contextcraft/llm/anthropic.py +49 -0
- contextcraft_py-0.3.0/src/contextcraft/llm/base.py +28 -0
- contextcraft_py-0.3.0/src/contextcraft/llm/gemini.py +60 -0
- contextcraft_py-0.3.0/src/contextcraft/llm/ollama.py +146 -0
- contextcraft_py-0.3.0/src/contextcraft/llm/openai.py +70 -0
- contextcraft_py-0.3.0/src/contextcraft/models.py +149 -0
- contextcraft_py-0.3.0/src/contextcraft/parser/__init__.py +1 -0
- contextcraft_py-0.3.0/src/contextcraft/parser/ast_parser.py +333 -0
- contextcraft_py-0.3.0/src/contextcraft/py.typed +0 -0
- contextcraft_py-0.3.0/src/contextcraft/reranker/__init__.py +6 -0
- contextcraft_py-0.3.0/src/contextcraft/reranker/base.py +39 -0
- contextcraft_py-0.3.0/src/contextcraft/reranker/cohere.py +75 -0
- contextcraft_py-0.3.0/src/contextcraft/search/__init__.py +1 -0
- contextcraft_py-0.3.0/src/contextcraft/search/bm25_search.py +69 -0
- contextcraft_py-0.3.0/src/contextcraft/search/context_builder.py +172 -0
- contextcraft_py-0.3.0/src/contextcraft/search/hybrid.py +147 -0
- contextcraft_py-0.3.0/src/contextcraft/search/vector_search.py +67 -0
- contextcraft_py-0.3.0/src/contextcraft/security.py +103 -0
- contextcraft_py-0.3.0/src/contextcraft/startup.py +88 -0
- contextcraft_py-0.3.0/tests/__init__.py +1 -0
- contextcraft_py-0.3.0/tests/fixtures/import_ground_truth.txt +124 -0
- contextcraft_py-0.3.0/tests/fixtures/sample_go.go +48 -0
- contextcraft_py-0.3.0/tests/fixtures/sample_javascript.js +30 -0
- contextcraft_py-0.3.0/tests/fixtures/sample_python.py +61 -0
- contextcraft_py-0.3.0/tests/fixtures/sample_typescript.ts +48 -0
- contextcraft_py-0.3.0/tests/test_graph.py +275 -0
- contextcraft_py-0.3.0/tests/test_parser.py +238 -0
- contextcraft_py-0.3.0/web/.gitignore +41 -0
- contextcraft_py-0.3.0/web/AGENTS.md +5 -0
- contextcraft_py-0.3.0/web/Dockerfile +47 -0
- contextcraft_py-0.3.0/web/README.md +36 -0
- contextcraft_py-0.3.0/web/eslint.config.mjs +18 -0
- contextcraft_py-0.3.0/web/next-env.d.ts +6 -0
- contextcraft_py-0.3.0/web/next.config.ts +7 -0
- contextcraft_py-0.3.0/web/package-lock.json +6600 -0
- contextcraft_py-0.3.0/web/package.json +26 -0
- contextcraft_py-0.3.0/web/public/file.svg +1 -0
- contextcraft_py-0.3.0/web/public/globe.svg +1 -0
- contextcraft_py-0.3.0/web/public/next.svg +1 -0
- contextcraft_py-0.3.0/web/public/vercel.svg +1 -0
- contextcraft_py-0.3.0/web/public/window.svg +1 -0
- contextcraft_py-0.3.0/web/src/app/api/ask/route.ts +29 -0
- contextcraft_py-0.3.0/web/src/app/api/repos/route.ts +13 -0
- contextcraft_py-0.3.0/web/src/app/favicon.ico +0 -0
- contextcraft_py-0.3.0/web/src/app/globals.css +82 -0
- contextcraft_py-0.3.0/web/src/app/layout.tsx +22 -0
- contextcraft_py-0.3.0/web/src/app/page.module.css +241 -0
- contextcraft_py-0.3.0/web/src/app/page.tsx +267 -0
- contextcraft_py-0.3.0/web/src/components/SourceCitations.module.css +68 -0
- contextcraft_py-0.3.0/web/src/components/SourceCitations.tsx +84 -0
- contextcraft_py-0.3.0/web/src/lib/types.ts +26 -0
- contextcraft_py-0.3.0/web/tsconfig.json +34 -0
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# ContextCraft — Environment Variables
|
|
2
|
+
# Copy this file to .env and fill in the values.
|
|
3
|
+
|
|
4
|
+
# --- Database ---
|
|
5
|
+
# Railway/Heroku: DATABASE_URL is also accepted (no CONTEXTCRAFT_ prefix).
|
|
6
|
+
CONTEXTCRAFT_DATABASE_URL=postgresql://contextcraft:contextcraft@localhost:5432/contextcraft
|
|
7
|
+
|
|
8
|
+
# --- Embeddings & LLM (Gemini — default) ---
|
|
9
|
+
CONTEXTCRAFT_GEMINI_API_KEY=your_gemini_api_key_here
|
|
10
|
+
CONTEXTCRAFT_EMBEDDING_PROVIDER=gemini
|
|
11
|
+
CONTEXTCRAFT_EMBEDDING_MODEL=text-embedding-004
|
|
12
|
+
CONTEXTCRAFT_LLM_PROVIDER=gemini
|
|
13
|
+
CONTEXTCRAFT_GEMINI_CHAT_MODEL=gemini-1.5-flash
|
|
14
|
+
|
|
15
|
+
# --- OpenAI (optional alternative) ---
|
|
16
|
+
# CONTEXTCRAFT_EMBEDDING_PROVIDER=openai
|
|
17
|
+
# CONTEXTCRAFT_LLM_PROVIDER=openai
|
|
18
|
+
# CONTEXTCRAFT_OPENAI_API_KEY=sk-your-key-here
|
|
19
|
+
# CONTEXTCRAFT_OPENAI_CHAT_MODEL=gpt-4o
|
|
20
|
+
|
|
21
|
+
# --- Anthropic (optional alternative LLM) ---
|
|
22
|
+
# CONTEXTCRAFT_LLM_PROVIDER=anthropic
|
|
23
|
+
# CONTEXTCRAFT_ANTHROPIC_API_KEY=sk-ant-your-key-here
|
|
24
|
+
# CONTEXTCRAFT_ANTHROPIC_MODEL=claude-sonnet-4-20250514
|
|
25
|
+
|
|
26
|
+
# --- Ollama (optional local LLM) ---
|
|
27
|
+
# CONTEXTCRAFT_LLM_PROVIDER=ollama
|
|
28
|
+
# CONTEXTCRAFT_OLLAMA_BASE_URL=http://localhost:11434
|
|
29
|
+
# CONTEXTCRAFT_OLLAMA_MODEL=qwen2.5-coder:7b
|
|
30
|
+
# CONTEXTCRAFT_OLLAMA_ALLOW_REMOTE=false
|
|
31
|
+
|
|
32
|
+
# --- Cohere reranking (optional) ---
|
|
33
|
+
# CONTEXTCRAFT_COHERE_API_KEY=your_cohere_key_here
|
|
34
|
+
# CONTEXTCRAFT_RERANK_ENABLED=true
|
|
35
|
+
|
|
36
|
+
# --- Search ---
|
|
37
|
+
CONTEXTCRAFT_SEARCH_TOP_K=10
|
|
38
|
+
CONTEXTCRAFT_MAX_CONTEXT_TOKENS=20000
|
|
39
|
+
|
|
40
|
+
# --- API / CORS ---
|
|
41
|
+
CONTEXTCRAFT_API_HOST=0.0.0.0
|
|
42
|
+
CONTEXTCRAFT_API_PORT=8000
|
|
43
|
+
CONTEXTCRAFT_ALLOWED_ORIGINS=http://localhost:3000,http://127.0.0.1:3000
|
|
44
|
+
|
|
45
|
+
# --- Logging ---
|
|
46
|
+
CONTEXTCRAFT_LOG_LEVEL=INFO
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
lint:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
|
|
15
|
+
- name: Set up Python
|
|
16
|
+
uses: actions/setup-python@v5
|
|
17
|
+
with:
|
|
18
|
+
python-version: "3.12"
|
|
19
|
+
|
|
20
|
+
- name: Install dependencies
|
|
21
|
+
run: pip install -e ".[dev]"
|
|
22
|
+
|
|
23
|
+
- name: Ruff lint
|
|
24
|
+
run: ruff check src/ tests/
|
|
25
|
+
|
|
26
|
+
- name: Ruff format check
|
|
27
|
+
run: ruff format --check src/ tests/
|
|
28
|
+
|
|
29
|
+
typecheck:
|
|
30
|
+
runs-on: ubuntu-latest
|
|
31
|
+
steps:
|
|
32
|
+
- uses: actions/checkout@v4
|
|
33
|
+
|
|
34
|
+
- name: Set up Python
|
|
35
|
+
uses: actions/setup-python@v5
|
|
36
|
+
with:
|
|
37
|
+
python-version: "3.12"
|
|
38
|
+
|
|
39
|
+
- name: Install dependencies
|
|
40
|
+
run: pip install -e ".[dev]"
|
|
41
|
+
|
|
42
|
+
- name: mypy
|
|
43
|
+
run: mypy src/contextcraft/ --ignore-missing-imports
|
|
44
|
+
|
|
45
|
+
test:
|
|
46
|
+
runs-on: ubuntu-latest
|
|
47
|
+
services:
|
|
48
|
+
postgres:
|
|
49
|
+
image: pgvector/pgvector:pg16
|
|
50
|
+
env:
|
|
51
|
+
POSTGRES_USER: contextcraft
|
|
52
|
+
POSTGRES_PASSWORD: contextcraft
|
|
53
|
+
POSTGRES_DB: contextcraft
|
|
54
|
+
ports:
|
|
55
|
+
- 5432:5432
|
|
56
|
+
options: >-
|
|
57
|
+
--health-cmd "pg_isready -U contextcraft -d contextcraft"
|
|
58
|
+
--health-interval 5s
|
|
59
|
+
--health-timeout 5s
|
|
60
|
+
--health-retries 5
|
|
61
|
+
|
|
62
|
+
steps:
|
|
63
|
+
- uses: actions/checkout@v4
|
|
64
|
+
|
|
65
|
+
- name: Set up Python
|
|
66
|
+
uses: actions/setup-python@v5
|
|
67
|
+
with:
|
|
68
|
+
python-version: "3.12"
|
|
69
|
+
|
|
70
|
+
- name: Install dependencies
|
|
71
|
+
run: pip install -e ".[dev]"
|
|
72
|
+
|
|
73
|
+
- name: Run tests
|
|
74
|
+
env:
|
|
75
|
+
CONTEXTCRAFT_DATABASE_URL: postgresql://contextcraft:contextcraft@localhost:5432/contextcraft
|
|
76
|
+
CONTEXTCRAFT_OPENAI_API_KEY: ""
|
|
77
|
+
run: pytest -v --tb=short
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# ContextCraft — Python + FastAPI + Next.js + Docker + PyPI + Railway
|
|
2
|
+
|
|
3
|
+
# --- PYTHON ---
|
|
4
|
+
__pycache__/
|
|
5
|
+
*.py[cod]
|
|
6
|
+
*.pyo
|
|
7
|
+
*.pyd
|
|
8
|
+
.Python
|
|
9
|
+
*.so
|
|
10
|
+
|
|
11
|
+
# --- VIRTUAL ENVIRONMENTS ---
|
|
12
|
+
.venv/
|
|
13
|
+
venv/
|
|
14
|
+
env/
|
|
15
|
+
.uv/
|
|
16
|
+
|
|
17
|
+
# --- BUILD & DISTRIBUTION ---
|
|
18
|
+
dist/
|
|
19
|
+
build/
|
|
20
|
+
*.egg-info/
|
|
21
|
+
.eggs/
|
|
22
|
+
*.whl
|
|
23
|
+
*.tar.gz
|
|
24
|
+
MANIFEST
|
|
25
|
+
|
|
26
|
+
# --- ENVIRONMENT & SECRETS ---
|
|
27
|
+
.env
|
|
28
|
+
.env.*
|
|
29
|
+
!.env.example
|
|
30
|
+
|
|
31
|
+
# --- TESTING & COVERAGE ---
|
|
32
|
+
.pytest_cache/
|
|
33
|
+
.coverage
|
|
34
|
+
.coverage.*
|
|
35
|
+
htmlcov/
|
|
36
|
+
coverage.xml
|
|
37
|
+
*.coveragerc
|
|
38
|
+
|
|
39
|
+
# --- TYPE CHECKING & LINTING ---
|
|
40
|
+
.mypy_cache/
|
|
41
|
+
.ruff_cache/
|
|
42
|
+
.dmypy.json
|
|
43
|
+
dmypy.json
|
|
44
|
+
|
|
45
|
+
# --- NEXT.JS / NODE ---
|
|
46
|
+
web/.next/
|
|
47
|
+
web/node_modules/
|
|
48
|
+
web/out/
|
|
49
|
+
web/.turbo/
|
|
50
|
+
web/coverage/
|
|
51
|
+
*.tsbuildinfo
|
|
52
|
+
|
|
53
|
+
# --- DOCKER ---
|
|
54
|
+
.docker/
|
|
55
|
+
docker-volumes/
|
|
56
|
+
|
|
57
|
+
# --- DATABASES & LOCAL STATE ---
|
|
58
|
+
*.db
|
|
59
|
+
*.sqlite
|
|
60
|
+
*.sqlite3
|
|
61
|
+
pgdata/
|
|
62
|
+
|
|
63
|
+
# --- LOGS ---
|
|
64
|
+
*.log
|
|
65
|
+
logs/
|
|
66
|
+
|
|
67
|
+
# --- EVAL HARNESS OUTPUTS ---
|
|
68
|
+
eval/results/
|
|
69
|
+
eval/outputs/
|
|
70
|
+
eval/*.json
|
|
71
|
+
!eval/test_cases.json
|
|
72
|
+
|
|
73
|
+
# --- BENCHMARK ARTIFACTS ---
|
|
74
|
+
benchmark_raw/
|
|
75
|
+
*.benchmark.json
|
|
76
|
+
|
|
77
|
+
# --- IDE & OS ---
|
|
78
|
+
.vscode/
|
|
79
|
+
.idea/
|
|
80
|
+
*.swp
|
|
81
|
+
*.swo
|
|
82
|
+
.DS_Store
|
|
83
|
+
Thumbs.db
|
|
84
|
+
|
|
85
|
+
# --- CLAUDE CODE ---
|
|
86
|
+
CLAUDE.md
|
|
87
|
+
.claude/
|
|
88
|
+
|
|
89
|
+
# --- RAILWAY ---
|
|
90
|
+
.railway/
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
# ContextCraft — Benchmark Results
|
|
2
|
+
|
|
3
|
+
Evaluated against ContextCraft's own codebase (v0.3.0).
|
|
4
|
+
Eval set: 10 hand-curated questions with verified ground-truth source files.
|
|
5
|
+
Date: May 19, 2026
|
|
6
|
+
Embedder: Gemini Embedding 2 | Reranker: Cohere rerank-english-v3.0 | LLM Judge: Gemini 3.1 Flash Lite
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
## Methodology
|
|
11
|
+
|
|
12
|
+
Each configuration was evaluated over **3 complete iterations** (30 total queries per
|
|
13
|
+
config) to produce stable latency metrics. Before each run, the system undergoes a
|
|
14
|
+
warm-up phase — one throwaway embedding and one pgvector hybrid search — to eliminate
|
|
15
|
+
cold-start penalties from connection pool initialization and network overhead.
|
|
16
|
+
|
|
17
|
+
**Ground truth verification:** Expected source files were determined by manual
|
|
18
|
+
inspection of the codebase before any queries were run against the system. No
|
|
19
|
+
system output was used to construct the test set. One test case was corrected
|
|
20
|
+
mid-evaluation after discovering the original expected source (`001_init.sql`)
|
|
21
|
+
pointed to a SQL migration file outside the AST parser's indexing scope — an
|
|
22
|
+
honest limitation documented in the observations below.
|
|
23
|
+
|
|
24
|
+
**Faithfulness scoring:** Each generated answer is evaluated by a second LLM call
|
|
25
|
+
acting as a judge, asked to return `PASS` or `FAIL` based on whether the answer
|
|
26
|
+
correctly covers the ground truth. This is an approximation — LLM-as-judge
|
|
27
|
+
introduces its own variance, particularly with smaller models.
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Retrieval Quality
|
|
32
|
+
|
|
33
|
+
| Configuration | Source Hit Rate | Faithful Answers |
|
|
34
|
+
|---|---|---|
|
|
35
|
+
| RRF only | **80.0%** | 73.3% |
|
|
36
|
+
| RRF + Reranker | **75.0%** | 60.0% |
|
|
37
|
+
| RRF + Reranker + Deps | **75.0%** | 53.3% |
|
|
38
|
+
|
|
39
|
+
*Source Hit Rate: percentage of queries where at least one expected source file
|
|
40
|
+
appeared in the retrieved context, position-agnostic, averaged across 3 runs.*
|
|
41
|
+
|
|
42
|
+
*Faithful Answers: percentage of generated answers judged to correctly cover the
|
|
43
|
+
ground truth by a second LLM call, averaged across 3 runs.*
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
## Latency (retrieval only, averaged over 3 runs)
|
|
48
|
+
|
|
49
|
+
| Configuration | P50 | P95 |
|
|
50
|
+
|---|---|---|
|
|
51
|
+
| RRF only | 3,876ms | 4,340ms |
|
|
52
|
+
| RRF + Reranker | 5,121ms | 6,969ms |
|
|
53
|
+
| RRF + Reranker + Deps | 4,993ms | 5,621ms |
|
|
54
|
+
|
|
55
|
+
*Latency measured from query embedding start to final ranked chunk list returned.
|
|
56
|
+
Excludes LLM answer generation time, which is model-dependent and typically adds
|
|
57
|
+
2–15 seconds.*
|
|
58
|
+
|
|
59
|
+
---
|
|
60
|
+
|
|
61
|
+
## Observations
|
|
62
|
+
|
|
63
|
+
**RRF only is the strongest configuration on this codebase.** It achieves the
|
|
64
|
+
highest source hit rate (80%) and the best faithfulness score (73.3%) at the
|
|
65
|
+
lowest latency (3.88s P50). For a ~270 chunk corpus queried with factual
|
|
66
|
+
architectural questions, the hybrid BM25 + vector RRF baseline is already
|
|
67
|
+
well-calibrated.
|
|
68
|
+
|
|
69
|
+
**The Cohere reranker adds ~1.25s P50 latency** (Cohere API round-trip) and
|
|
70
|
+
did not improve source hit rate on this eval set. Hit rate dropped marginally
|
|
71
|
+
from 80% to 75%. This is consistent with how cross-encoders behave on small,
|
|
72
|
+
well-chunked corpora — the initial retrieval candidates are already good, and
|
|
73
|
+
reranking changes their order without surfacing new files. The reranker is
|
|
74
|
+
expected to show larger gains on codebases with 10K+ chunks where the initial
|
|
75
|
+
candidate pool is noisier.
|
|
76
|
+
|
|
77
|
+
**Faithfulness decreases with each added stage.** Adding the reranker dropped
|
|
78
|
+
faithfulness from 73.3% to 60.0%, and dependency expansion brought it further
|
|
79
|
+
to 53.3%. This is partly a LLM judge sensitivity issue — `gemini-3.1-flash-lite`
|
|
80
|
+
is a small model that is sensitive to context ordering changes introduced by
|
|
81
|
+
reranking. It is not a reliable signal that answer quality degraded for end users.
|
|
82
|
+
|
|
83
|
+
**Dependency expansion is sparse on this codebase.** The graph expander fired
|
|
84
|
+
on only 1 question across all 30 queries per config (question 8: SSE disconnect
|
|
85
|
+
handling), expanding 1 dependency chunk from 10 source chunks each time. The
|
|
86
|
+
dependency graph becomes more valuable on larger, more interconnected codebases
|
|
87
|
+
where a retrieved function imports heavily from other modules.
|
|
88
|
+
|
|
89
|
+
**Persistent retrieval miss — question 10.** "How are file imports attached to
|
|
90
|
+
code chunks?" consistently returns 0% hit rate across all configurations. The
|
|
91
|
+
expected source (`src/contextcraft/parser/ast_parser.py`) is indexed, but the
|
|
92
|
+
query embedding does not match it in the top-10. This is a genuine retrieval
|
|
93
|
+
gap for questions about implicit behaviour rather than named functions or classes,
|
|
94
|
+
and represents a known limitation of bi-encoder retrieval.
|
|
95
|
+
|
|
96
|
+
**Question 2 is a structural miss.** "What is the token limit for a single code
|
|
97
|
+
chunk?" misses across all configurations because `config.py` (the expected source)
|
|
98
|
+
does not surface in the top results despite containing the answer. This points to
|
|
99
|
+
a vocabulary mismatch between the query and the indexed content — a candidate for
|
|
100
|
+
query expansion or metadata filtering in a future iteration.
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## Eval Set
|
|
105
|
+
|
|
106
|
+
10 questions covering: hybrid search implementation, token limits, git blame
|
|
107
|
+
caching, incremental indexing, reranker interface, language support, connection
|
|
108
|
+
pooling, SSE disconnect handling, embedding storage, and import attachment logic.
|
|
109
|
+
|
|
110
|
+
Full questions, expected sources, and ground truth answers: `eval/test_cases.json`
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
|
|
114
|
+
## Reproducing
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
# 1. Configure environment
|
|
118
|
+
cp .env.example .env
|
|
119
|
+
# Set CONTEXTCRAFT_GEMINI_API_KEY and CONTEXTCRAFT_COHERE_API_KEY
|
|
120
|
+
|
|
121
|
+
# 2. Start database
|
|
122
|
+
docker compose -f docker/docker-compose.yml up -d postgres
|
|
123
|
+
|
|
124
|
+
# 3. Index the codebase
|
|
125
|
+
contextcraft index .
|
|
126
|
+
|
|
127
|
+
# 4. Run evaluation (all three configurations)
|
|
128
|
+
python eval/run_eval.py --runs 3
|
|
129
|
+
python eval/run_eval.py --rerank --runs 3
|
|
130
|
+
python eval/run_eval.py --rerank --deps --runs 3
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
> **Rate limit note:** The eval harness makes 2 LLM calls per question.
|
|
134
|
+
> On Gemini free tier, use `gemini-3.1-flash-lite` (500 RPD / 15 RPM) with
|
|
135
|
+
> `asyncio.sleep(10)` between questions in `eval/run_eval.py` to stay within limits.
|
|
136
|
+
> Each 3-run configuration takes approximately 10–12 minutes end to end.
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
- **Gemini provider** — Default embeddings (`text-embedding-004`) and chat (`gemini-1.5-flash`) via `google-genai`; zero-cost local dev with a free AI Studio key.
|
|
12
|
+
- **Production startup checks** (`startup.py`) — API lifespan verifies PostgreSQL, pgvector extension, provider API keys, and Ollama reachability when configured.
|
|
13
|
+
- **Security module** (`security.py`) — Repo path validation (blocks sensitive dirs and symlink escape), query sanitization (500-char cap, control-char strip), Ollama URL SSRF guard (localhost-only unless `CONTEXTCRAFT_OLLAMA_ALLOW_REMOTE=true`).
|
|
14
|
+
- **API rate limiting** — `slowapi` on `POST /ask` (10 requests/minute per IP).
|
|
15
|
+
- **Configurable CORS** — `CONTEXTCRAFT_ALLOWED_ORIGINS` (comma-separated); defaults to local Next.js dev origins (not `*`).
|
|
16
|
+
- **Async git subprocesses** — `git/async_git.py`; blame and log no longer block the event loop during indexing.
|
|
17
|
+
- **HTTP timeouts** — Shared `http_timeouts.py` for Ollama and outbound clients (connect/read caps).
|
|
18
|
+
- **PyPI packaging** — `py.typed` marker, `slowapi` runtime dependency, hatchling wheel build.
|
|
19
|
+
- **Railway deploy config** — `railway.toml` with `/health` check, `restartPolicyType = on_failure`, `$PORT` binding.
|
|
20
|
+
- **Docker hardening** — Multi-stage image runs as non-root `appuser`; `PORT` env respected in CMD.
|
|
21
|
+
|
|
22
|
+
### Changed
|
|
23
|
+
- **Default providers** — `embedding_provider` and `llm_provider` default to `gemini` (OpenAI and Anthropic remain swappable).
|
|
24
|
+
- **`DATABASE_URL` alias** — Railway/Heroku-style `DATABASE_URL` accepted alongside `CONTEXTCRAFT_DATABASE_URL`.
|
|
25
|
+
- **Cohere reranker errors** — Raises `RerankerUnavailableError` with RRF fallback instead of opaque failures; API emits a warning SSE event when reranking is skipped.
|
|
26
|
+
- **Tree-sitter parsing** — CPU-bound `parse_file` runs in a thread pool via `parse_file_async` during async indexing.
|
|
27
|
+
- **`.gitignore`** — Comprehensive stack coverage (Python, Next.js, Docker, eval outputs, IDE, Railway); `CLAUDE.md` kept local-only (not in public repo).
|
|
28
|
+
|
|
29
|
+
### Fixed
|
|
30
|
+
- Datetime defaults use timezone-aware `datetime.now(UTC)` (no deprecated `utcnow()`).
|
|
31
|
+
- OpenAI SSE streams close cleanly on client disconnect (`CancelledError`).
|
|
32
|
+
- Ollama embedder/LLM use validated base URLs and explicit timeouts.
|
|
33
|
+
|
|
34
|
+
## [0.3.0] — 2026-05-17
|
|
35
|
+
|
|
36
|
+
### Added
|
|
37
|
+
- **Dependency graph** (`chunk_edges` table) — Static analyzer that resolves direct Python imports and class inheritance, mapping them to source and target chunk IDs with confidence scores.
|
|
38
|
+
- **Context expansion** — Expands LLM context with 1-hop dependencies; cycle guard via `visited` set in `graph.expander`. Adds `--with-deps` CLI flag and `expand_deps` API parameter.
|
|
39
|
+
- **Ollama LLM provider** (`qwen2.5-coder:7b`) — `OllamaLLM` via `/api/chat` with `/api/tags` connection verification and streaming support.
|
|
40
|
+
- **Multi-repo search** — `hybrid_search` performs RRF normalization per repo before merging, so large repos do not drown out small ones.
|
|
41
|
+
- **Multi-repo Web UI** — Repo multi-select and “Expand Graph Context” toggle in `web/src/app/page.tsx`.
|
|
42
|
+
- **Multi-repo CLI/API** — `--repos` / `--all-repos` CLI flags; `repo_ids` / `all_repos` on `/ask`.
|
|
43
|
+
- **Benchmark harness** — `BENCHMARK.md` and eval tooling for source hit rate and latency.
|
|
44
|
+
- **Testing** — `tests/fixtures/import_ground_truth.txt` and `test_graph.py` for dependency edge generation.
|
|
45
|
+
|
|
46
|
+
## [0.2.0] — 2026-05-17
|
|
47
|
+
|
|
48
|
+
### Added
|
|
49
|
+
- **Cohere reranker** (`rerank-english-v3.0`) — cross-encoder reranking via `reranker/` module with abstract `BaseReranker` interface. Increases retrieval pool to 60 candidates, reranks down to requested `top_k`.
|
|
50
|
+
- **`--no-rerank` CLI flag** on `contextcraft ask` to bypass reranking when speed is preferred over precision.
|
|
51
|
+
- **Reranker in FastAPI** — `/ask` endpoint automatically reranks when `CONTEXTCRAFT_COHERE_API_KEY` is set.
|
|
52
|
+
- **RAG evaluation harness** (`eval/`) — `run_eval.py` measures source hit rate, LLM-as-a-judge faithfulness, and p50 latency across 10 benchmark queries. Includes `test_cases.json` and `README.md`.
|
|
53
|
+
- **Next.js Web UI** (`web/`) — App Router, TypeScript, vanilla CSS dark-mode design:
|
|
54
|
+
- Chat interface with SSE streaming (token-by-token rendering)
|
|
55
|
+
- Repository selector dropdown
|
|
56
|
+
- Source citations with Shiki syntax highlighting, line ranges, relevance scores, and git blame metadata
|
|
57
|
+
- API proxy routes (`/api/ask`, `/api/repos`) to avoid CORS
|
|
58
|
+
- Multi-stage Dockerfile for production builds
|
|
59
|
+
- `web` service in `docker-compose.yml`
|
|
60
|
+
- **Background task tracking** in FastAPI — strong references to `asyncio.Task` objects prevent GC of indexing tasks.
|
|
61
|
+
|
|
62
|
+
### Changed
|
|
63
|
+
- Hybrid search now fetches 60 candidates (up from 20) when reranker is enabled.
|
|
64
|
+
- `pyproject.toml` — added `cohere>=5.0.0` dependency, mypy overrides, ruff ignore rules for `E501`, `W293`, `UP042`, `B905`.
|
|
65
|
+
|
|
66
|
+
### Fixed
|
|
67
|
+
- All `ruff check` lint errors across 36 source files.
|
|
68
|
+
- All `ruff format` formatting inconsistencies across 15 files.
|
|
69
|
+
- All `mypy --strict` type-check errors (21 → 0).
|
|
70
|
+
- Fixed `test_imports_extracted` test after ruff removed unused import from fixture.
|
|
71
|
+
- Replaced ambiguous EN DASH with hyphen in `context_builder.py`.
|
|
72
|
+
|
|
73
|
+
## [0.1.0] — 2026-05-17
|
|
74
|
+
|
|
75
|
+
### Added
|
|
76
|
+
- **Tree-sitter AST parser** for Python, JavaScript/TypeScript, and Go — extracts functions, classes, and modules as semantic chunks (not fixed-size splits).
|
|
77
|
+
- **PostgreSQL + pgvector** storage with async connection pool (asyncpg), retry logic, and full CRUD for repositories and code chunks.
|
|
78
|
+
- **Hybrid search** via Reciprocal Rank Fusion (RRF) combining pgvector cosine similarity and PostgreSQL tsvector full-text search in a single SQL query.
|
|
79
|
+
- **Git blame + commit history** per chunk — runs `git blame --porcelain` once per file (not per chunk) for 50x speedup.
|
|
80
|
+
- **CLI** (`contextcraft index`, `contextcraft ask`, `contextcraft status`) built with Typer and Rich progress bars.
|
|
81
|
+
- **FastAPI API server** with `/health`, `/repos`, `/index`, and `/ask` (SSE streaming) endpoints.
|
|
82
|
+
- **Embeddings pipeline** with OpenAI `text-embedding-3-small` and Ollama support via abstract `BaseEmbedder` interface.
|
|
83
|
+
- **LLM providers**: OpenAI and Anthropic Claude, swappable via config, with streaming support.
|
|
84
|
+
- **Docker Compose** for local pgvector, multi-stage Dockerfile for production.
|
|
85
|
+
- **GitHub Actions CI**: ruff format → ruff check → mypy `--strict` → pytest.
|
|
86
|
+
- **Incremental indexing**: `--incremental` flag re-indexes only files changed since last commit.
|
|
87
|
+
- **`.gitignore` / `.contextignore` support**: skips binary files, `node_modules`, etc.
|