PyPI - docforge-cli - Versions diffs - 0.2.1__tar.gz → 0.3.0__tar.gz - Mend

docforge-cli 0.2.1tar.gz → 0.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

{docforge_cli-0.2.1/src/docforge_cli.egg-info → docforge_cli-0.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: docforge-cli
-Version: 0.2.1
+Version: 0.3.0
 Summary: Forge searchable context from Confluence and git repos for AI coding assistants
 License: MIT
 Project-URL: Homepage, https://GranatenUdo.github.io/docforge/
@@ -11,29 +11,29 @@ Project-URL: Documentation, https://GranatenUdo.github.io/docforge/
 Requires-Python: >=3.12
 Description-Content-Type: text/markdown
 License-File: LICENSE
-Requires-Dist: typer>=0.12
-Requires-Dist: asyncpg>=0.30
-Requires-Dist: httpx>=0.27
-Requires-Dist: pydantic>=2.9
-Requires-Dist: pydantic-settings>=2.6
-Requires-Dist: beautifulsoup4>=4.12
-Requires-Dist: sentence-transformers>=5.0
-Requires-Dist: pgvector>=0.3
-Requires-Dist: pyyaml>=6.0
-Requires-Dist: fastmcp>=2.0
-Requires-Dist: fastapi>=0.115
-Requires-Dist: uvicorn>=0.34
-Requires-Dist: numpy>=1.26
+Requires-Dist: typer<1.0,>=0.12
+Requires-Dist: asyncpg<1.0,>=0.30
+Requires-Dist: httpx<1.0,>=0.27
+Requires-Dist: pydantic<3.0,>=2.9
+Requires-Dist: pydantic-settings<3.0,>=2.6
+Requires-Dist: beautifulsoup4<5.0,>=4.12
+Requires-Dist: sentence-transformers<6.0,>=5.0
+Requires-Dist: pgvector<1.0,>=0.3
+Requires-Dist: pyyaml<7.0,>=6.0
+Requires-Dist: fastmcp<4.0,>=3.0
+Requires-Dist: fastapi<1.0,>=0.115
+Requires-Dist: uvicorn<1.0,>=0.34
+Requires-Dist: numpy<3.0,>=1.26
 Provides-Extra: dev
-Requires-Dist: pytest>=8.0; extra == "dev"
-Requires-Dist: pytest-asyncio>=0.24; extra == "dev"
-Requires-Dist: pytest-cov>=7.0; extra == "dev"
-Requires-Dist: ruff>=0.8; extra == "dev"
-Requires-Dist: testcontainers[postgres]>=4.0; extra == "dev"
+Requires-Dist: pytest<10.0,>=9.0; extra == "dev"
+Requires-Dist: pytest-asyncio<2.0,>=1.0; extra == "dev"
+Requires-Dist: pytest-cov<8.0,>=7.0; extra == "dev"
+Requires-Dist: ruff<1.0,>=0.8; extra == "dev"
+Requires-Dist: testcontainers[postgres]<5.0,>=4.0; extra == "dev"
 Provides-Extra: entra
-Requires-Dist: fastapi-azure-auth>=5.0; extra == "entra"
-Requires-Dist: azure-identity>=1.19; extra == "entra"
-Requires-Dist: aiohttp>=3.10; extra == "entra"
+Requires-Dist: fastapi-azure-auth<6.0,>=5.0; extra == "entra"
+Requires-Dist: azure-identity<2.0,>=1.19; extra == "entra"
+Requires-Dist: aiohttp<4.0,>=3.10; extra == "entra"
 Dynamic: license-file
 # docforge
@@ -83,15 +83,22 @@ docforge is the narrow, focused option in this landscape: minimal footprint, MCP
 - You need near-real-time updates → ingest is batch; no webhook-driven continuous sync yet.
 - You need multilingual search evaluated → EmbeddingGemma is multilingual, but docforge has no eval coverage on non-English corpora yet.
+For the full trust model, accepted risks, and assumptions docforge makes about its operating environment, see [`docs/threat-model.md`](docs/threat-model.md).
 ## Quick Start
+**Prerequisites:**
+- Python 3.12+
+- Docker (for the local Postgres + pgvector container)
+- A [Hugging Face token](https://huggingface.co/settings/tokens) with access to the gated [EmbeddingGemma-300M](https://huggingface.co/google/embeddinggemma-300m) model. Accept the model license on the model page first.
 ```bash
 pip install docforge-cli
 docforge init my-project
 cd my-project
 # Edit docforge.yml with your Confluence URL
 # Edit sources.yml with your page IDs and local git repo paths
-# Edit .env with your credentials
+# Edit .env with your credentials (CONFLUENCE_API_TOKEN, HF_TOKEN, DATABASE_URL)
 docker compose up -d db
 docforge init-db
 docforge ingest
@@ -126,15 +133,69 @@ When an AI assistant needs cross-team context, it calls docforge's `search_docum
 ## Deploy to your infrastructure
-For team-wide use, deploy the search API to Azure (~$35/month at default SKUs):
+For team-wide use, deploy the search API to Azure (~$90/month at default SKUs with embedder always-on for production; ~$55/month with the default scale-to-zero embedder):
 - PostgreSQL Flexible Server (Burstable B1ms, 32 GB) with pgvector.
 - Container App running the FastAPI search API.
-- Container Registry, Key Vault, Log Analytics, managed environment.
+- Container App running the embedder service (EmbeddingGemma-300M, model baked into the image).
+- Container Registry (Standard), Key Vault, Log Analytics, managed environment.
 - Team members use a lightweight MCP client that calls the hosted API.
 See [`deploy/azure/`](deploy/azure/) for Bicep templates and a full cost breakdown.
+## Self-hosting / forking
+The embedder image bakes the EmbeddingGemma-300M model at build time,
+which requires a HuggingFace access token. Forks and adopters need to:
+1. Get an HF token at https://huggingface.co/settings/tokens.
+2. Accept the EmbeddingGemma license at
+   https://huggingface.co/google/embeddinggemma-300m.
+3. Add a repo secret `HF_TOKEN` under
+   `Settings → Secrets and variables → Actions`.
+The CI workflow forwards the secret to BuildKit via
+`--mount=type=secret,id=hf_token`; the token never enters any image
+layer. If you fork this repo and run the CI workflow, it will build the
+embedder image automatically on commits to `master` and PRs (without
+pushing unless on `master`). To enable pushes to a registry, also add
+secrets `ACR_LOGIN_SERVER`, `ACR_USERNAME`, and `ACR_PASSWORD`.
+## Upgrading the embedding model
+The dimension-mismatch guard in `RemoteEmbedder` makes an
+embedder/search API mismatch loud (`HTTP 503` with a clear log line)
+rather than silent. Upgrade procedure:
+1. **Pick the new model.** Note its output dimensionality `D` (e.g.
+   `768` for EmbeddingGemma, `1024` for many newer models).
+2. **Update config.** Set `embedding_model: <new>` and
+   `embedding_dimensions: D` in the search API's deployment config
+   (Bicep parameters + Key Vault, or `docforge.yml` for self-hosters).
+3. **Build the embedder image** with the new model:
+   ```bash
+   docker build \
+     --build-arg EMBEDDING_MODEL=<new> \
+     --secret id=hf_token,env=HF_TOKEN \
+     -f Dockerfile.embedder \
+     -t docforge-embedder:<tag> .
+   ```
+4. **Apply schema migration.** Add a new vector column:
+   ```sql
+   ALTER TABLE chunks ADD COLUMN embedding_new vector(D);
+   ```
+   Re-ingest to populate the new column. Until backfill completes, the
+   search API serves from the old column.
+5. **Cut over.** Deploy the new embedder image first, then the new
+   search API. The dim-mismatch guard ensures search refuses to serve
+   wrong-dim vectors.
+6. **Drop the old column** after a confidence interval.
 ## Configuration
 See `docs/` for the full configuration reference, including `docforge.yml` and `sources.yml` schemas.
@@ -170,6 +231,16 @@ Check that the database is running: `docker compose up -d db`. Verify `DATABASE_
 MIT. See [LICENSE](LICENSE).
+## License compatibility
+docforge is MIT-licensed; the default embedding model,
+[EmbeddingGemma-300M](https://huggingface.co/google/embeddinggemma-300m), is
+distributed under the [Gemma Terms of Use](https://ai.google.dev/gemma/terms),
+which restrict harmful use and building products that compete with Gemma. Swap
+to a permissively-licensed alternative via `embedding_model` in `docforge.yml`
+if those constraints don't fit your use case (see
+[microsite FAQ — Can I use a different embedding model?](https://GranatenUdo.github.io/docforge/faq/#can-i-use-a-different-embedding-model)).
 ## Credits
 docforge stands on open shoulders:

docforge_cli-0.2.1/PKG-INFO → docforge_cli-0.3.0/README.md RENAMED Viewed

@@ -1,41 +1,3 @@
-Metadata-Version: 2.4
-Name: docforge-cli
-Version: 0.2.1
-Summary: Forge searchable context from Confluence and git repos for AI coding assistants
-License: MIT
-Project-URL: Homepage, https://GranatenUdo.github.io/docforge/
-Project-URL: Source, https://github.com/GranatenUdo/docforge
-Project-URL: Issues, https://github.com/GranatenUdo/docforge/issues
-Project-URL: Changelog, https://github.com/GranatenUdo/docforge/blob/master/CHANGELOG.md
-Project-URL: Documentation, https://GranatenUdo.github.io/docforge/
-Requires-Python: >=3.12
-Description-Content-Type: text/markdown
-License-File: LICENSE
-Requires-Dist: typer>=0.12
-Requires-Dist: asyncpg>=0.30
-Requires-Dist: httpx>=0.27
-Requires-Dist: pydantic>=2.9
-Requires-Dist: pydantic-settings>=2.6
-Requires-Dist: beautifulsoup4>=4.12
-Requires-Dist: sentence-transformers>=5.0
-Requires-Dist: pgvector>=0.3
-Requires-Dist: pyyaml>=6.0
-Requires-Dist: fastmcp>=2.0
-Requires-Dist: fastapi>=0.115
-Requires-Dist: uvicorn>=0.34
-Requires-Dist: numpy>=1.26
-Provides-Extra: dev
-Requires-Dist: pytest>=8.0; extra == "dev"
-Requires-Dist: pytest-asyncio>=0.24; extra == "dev"
-Requires-Dist: pytest-cov>=7.0; extra == "dev"
-Requires-Dist: ruff>=0.8; extra == "dev"
-Requires-Dist: testcontainers[postgres]>=4.0; extra == "dev"
-Provides-Extra: entra
-Requires-Dist: fastapi-azure-auth>=5.0; extra == "entra"
-Requires-Dist: azure-identity>=1.19; extra == "entra"
-Requires-Dist: aiohttp>=3.10; extra == "entra"
-Dynamic: license-file
 # docforge
 **The self-hosted context engine for AI coding assistants.**
@@ -83,15 +45,22 @@ docforge is the narrow, focused option in this landscape: minimal footprint, MCP
 - You need near-real-time updates → ingest is batch; no webhook-driven continuous sync yet.
 - You need multilingual search evaluated → EmbeddingGemma is multilingual, but docforge has no eval coverage on non-English corpora yet.
+For the full trust model, accepted risks, and assumptions docforge makes about its operating environment, see [`docs/threat-model.md`](docs/threat-model.md).
 ## Quick Start
+**Prerequisites:**
+- Python 3.12+
+- Docker (for the local Postgres + pgvector container)
+- A [Hugging Face token](https://huggingface.co/settings/tokens) with access to the gated [EmbeddingGemma-300M](https://huggingface.co/google/embeddinggemma-300m) model. Accept the model license on the model page first.
 ```bash
 pip install docforge-cli
 docforge init my-project
 cd my-project
 # Edit docforge.yml with your Confluence URL
 # Edit sources.yml with your page IDs and local git repo paths
-# Edit .env with your credentials
+# Edit .env with your credentials (CONFLUENCE_API_TOKEN, HF_TOKEN, DATABASE_URL)
 docker compose up -d db
 docforge init-db
 docforge ingest
@@ -126,15 +95,69 @@ When an AI assistant needs cross-team context, it calls docforge's `search_docum
 ## Deploy to your infrastructure
-For team-wide use, deploy the search API to Azure (~$35/month at default SKUs):
+For team-wide use, deploy the search API to Azure (~$90/month at default SKUs with embedder always-on for production; ~$55/month with the default scale-to-zero embedder):
 - PostgreSQL Flexible Server (Burstable B1ms, 32 GB) with pgvector.
 - Container App running the FastAPI search API.
-- Container Registry, Key Vault, Log Analytics, managed environment.
+- Container App running the embedder service (EmbeddingGemma-300M, model baked into the image).
+- Container Registry (Standard), Key Vault, Log Analytics, managed environment.
 - Team members use a lightweight MCP client that calls the hosted API.
 See [`deploy/azure/`](deploy/azure/) for Bicep templates and a full cost breakdown.
+## Self-hosting / forking
+The embedder image bakes the EmbeddingGemma-300M model at build time,
+which requires a HuggingFace access token. Forks and adopters need to:
+1. Get an HF token at https://huggingface.co/settings/tokens.
+2. Accept the EmbeddingGemma license at
+   https://huggingface.co/google/embeddinggemma-300m.
+3. Add a repo secret `HF_TOKEN` under
+   `Settings → Secrets and variables → Actions`.
+The CI workflow forwards the secret to BuildKit via
+`--mount=type=secret,id=hf_token`; the token never enters any image
+layer. If you fork this repo and run the CI workflow, it will build the
+embedder image automatically on commits to `master` and PRs (without
+pushing unless on `master`). To enable pushes to a registry, also add
+secrets `ACR_LOGIN_SERVER`, `ACR_USERNAME`, and `ACR_PASSWORD`.
+## Upgrading the embedding model
+The dimension-mismatch guard in `RemoteEmbedder` makes an
+embedder/search API mismatch loud (`HTTP 503` with a clear log line)
+rather than silent. Upgrade procedure:
+1. **Pick the new model.** Note its output dimensionality `D` (e.g.
+   `768` for EmbeddingGemma, `1024` for many newer models).
+2. **Update config.** Set `embedding_model: <new>` and
+   `embedding_dimensions: D` in the search API's deployment config
+   (Bicep parameters + Key Vault, or `docforge.yml` for self-hosters).
+3. **Build the embedder image** with the new model:
+   ```bash
+   docker build \
+     --build-arg EMBEDDING_MODEL=<new> \
+     --secret id=hf_token,env=HF_TOKEN \
+     -f Dockerfile.embedder \
+     -t docforge-embedder:<tag> .
+   ```
+4. **Apply schema migration.** Add a new vector column:
+   ```sql
+   ALTER TABLE chunks ADD COLUMN embedding_new vector(D);
+   ```
+   Re-ingest to populate the new column. Until backfill completes, the
+   search API serves from the old column.
+5. **Cut over.** Deploy the new embedder image first, then the new
+   search API. The dim-mismatch guard ensures search refuses to serve
+   wrong-dim vectors.
+6. **Drop the old column** after a confidence interval.
 ## Configuration
 See `docs/` for the full configuration reference, including `docforge.yml` and `sources.yml` schemas.
@@ -170,6 +193,16 @@ Check that the database is running: `docker compose up -d db`. Verify `DATABASE_
 MIT. See [LICENSE](LICENSE).
+## License compatibility
+docforge is MIT-licensed; the default embedding model,
+[EmbeddingGemma-300M](https://huggingface.co/google/embeddinggemma-300m), is
+distributed under the [Gemma Terms of Use](https://ai.google.dev/gemma/terms),
+which restrict harmful use and building products that compete with Gemma. Swap
+to a permissively-licensed alternative via `embedding_model` in `docforge.yml`
+if those constraints don't fit your use case (see
+[microsite FAQ — Can I use a different embedding model?](https://GranatenUdo.github.io/docforge/faq/#can-i-use-a-different-embedding-model)).
 ## Credits
 docforge stands on open shoulders:

{docforge_cli-0.2.1 → docforge_cli-0.3.0}/pyproject.toml RENAMED Viewed

@@ -4,25 +4,25 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "docforge-cli"
-version = "0.2.1"
+version = "0.3.0"
 description = "Forge searchable context from Confluence and git repos for AI coding assistants"
 readme = "README.md"
 license = {text = "MIT"}
 requires-python = ">=3.12"
 dependencies = [
-    "typer>=0.12",
-    "asyncpg>=0.30",
-    "httpx>=0.27",
-    "pydantic>=2.9",
-    "pydantic-settings>=2.6",
-    "beautifulsoup4>=4.12",
-    "sentence-transformers>=5.0",
-    "pgvector>=0.3",
-    "pyyaml>=6.0",
-    "fastmcp>=2.0",
-    "fastapi>=0.115",
-    "uvicorn>=0.34",
-    "numpy>=1.26",
+    "typer>=0.12,<1.0",
+    "asyncpg>=0.30,<1.0",
+    "httpx>=0.27,<1.0",
+    "pydantic>=2.9,<3.0",
+    "pydantic-settings>=2.6,<3.0",
+    "beautifulsoup4>=4.12,<5.0",
+    "sentence-transformers>=5.0,<6.0",
+    "pgvector>=0.3,<1.0",
+    "pyyaml>=6.0,<7.0",
+    "fastmcp>=3.0,<4.0",
+    "fastapi>=0.115,<1.0",
+    "uvicorn>=0.34,<1.0",
+    "numpy>=1.26,<3.0",   # both 1.x and 2.x tested
 ]
 [project.urls]
@@ -37,17 +37,17 @@ docforge = "docforge.cli:app"
 [project.optional-dependencies]
 dev = [
-    "pytest>=8.0",
-    "pytest-asyncio>=0.24",
-    "pytest-cov>=7.0",
-    "ruff>=0.8",
-    "testcontainers[postgres]>=4.0",
+    "pytest>=9.0,<10.0",
+    "pytest-asyncio>=1.0,<2.0",
+    "pytest-cov>=7.0,<8.0",
+    "ruff>=0.8,<1.0",
+    "testcontainers[postgres]>=4.0,<5.0",
 ]
 entra = [
-    "fastapi-azure-auth>=5.0",
-    "azure-identity>=1.19",
+    "fastapi-azure-auth>=5.0,<6.0",
+    "azure-identity>=1.19,<2.0",
     # aiohttp is required by azure-identity.aio's async pipeline
-    "aiohttp>=3.10",
+    "aiohttp>=3.10,<4.0",
 ]
 [tool.setuptools.packages.find]
@@ -68,7 +68,7 @@ select = ["E", "F", "I", "W"]
 asyncio_mode = "auto"
 testpaths = ["tests"]
 markers = [
-    "integration: requires Docker (pgvector container)",
+    "integration: tests requiring real external resources (Docker for Postgres, network for embedding model)",
 ]
 addopts = "--cov=src/docforge"

{docforge_cli-0.2.1 → docforge_cli-0.3.0}/src/docforge/api.py RENAMED Viewed

@@ -14,51 +14,50 @@ import time
 from contextlib import asynccontextmanager
 from typing import Any
+import asyncpg
 import numpy as np
 from fastapi import Depends, FastAPI, HTTPException, Request
 from fastapi.security import SecurityScopes
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 from docforge.config import Settings
-from docforge.db import close_pool, get_pool
-from docforge.processors.embedder import Embedder
+from docforge.db import _init_connection  # registers pgvector codec on each new pool conn
+from docforge.processors.embedder import Embedder, EmbedderProtocol
+from docforge.query_log import log_query
 logger = logging.getLogger(__name__)
-_embedder: Embedder | None = None
-_settings: Settings | None = None
-_azure_scheme = None  # Populated in lifespan when auth.mode == "entra"
-_cleanup_task: asyncio.Task | None = None
 _CLEANUP_INTERVAL_SECONDS = 3600  # one hour — overridable in tests
+CLEANUP_LOCK_ID = 0xD0CF0001  # decimal 3,503,226,881 — stable across replicas
-async def _query_log_cleanup_loop(database_url: str, retention_days: int) -> None:
-    """Deletes query_log rows older than retention_days every
-    _CLEANUP_INTERVAL_SECONDS. Idempotent, so multi-replica is safe."""
-    # int() coercion makes the f-string SQL below injection-safe. asyncpg's
+async def _query_log_cleanup_loop(pool: asyncpg.Pool, retention_days: int) -> None:
+    """Each iteration takes a transaction-scoped advisory lock. A replica
+    that can't acquire it skips this iteration. The lock auto-releases at
+    COMMIT/ROLLBACK and on connection drop — no manual unlock to forget."""
+    # int() coercion makes the f-string SQL below injection-safe; asyncpg's
     # $1::interval parameter binding doesn't accept str, hence the literal.
     days = int(retention_days)
     while True:
         try:
-            pool = await get_pool(database_url)
             async with pool.acquire() as conn:
-                result = await conn.execute(
-                    f"DELETE FROM query_log WHERE created_at < now() - interval '{days} days'"
-                )
-            logger.info("query_log cleanup: %s", result)
+                async with conn.transaction():
+                    got_lock = await conn.fetchval(
+                        "SELECT pg_try_advisory_xact_lock($1)", CLEANUP_LOCK_ID
+                    )
+                    if got_lock:
+                        result = await conn.execute(
+                            f"DELETE FROM query_log "
+                            f"WHERE created_at < now() - interval '{days} days'"
+                        )
+                        logger.info("query_log cleanup: %s", result)
+                    else:
+                        logger.debug("query_log cleanup: another replica holds the lock")
         except Exception as e:
             logger.exception("query_log cleanup failed: %s", e)
         await asyncio.sleep(_CLEANUP_INTERVAL_SECONDS)
-def _get_settings() -> Settings:
-    global _settings
-    if _settings is None:
-        _settings = Settings()
-    return _settings
 def _build_auth_scheme(settings: Settings):
     """Return a SingleTenantAzureAuthorizationCodeBearer if mode==entra, else None."""
     if settings.auth.mode != "entra":
@@ -75,55 +74,91 @@ def _build_auth_scheme(settings: Settings):
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    """Load the embedding model at startup; close the DB pool on shutdown."""
-    global _embedder, _azure_scheme, _cleanup_task
-    settings = _get_settings()
-    _azure_scheme = _build_auth_scheme(settings)
-    if _azure_scheme is not None:
-        await _azure_scheme.openid_config.load_config()
-        logger.info(
-            "Entra auth enabled (tenant=%s, audience=%s)",
-            settings.auth.tenant_id,
-            settings.auth.audience,
-        )
-    logger.info("Loading embedding model...")
-    _embedder = Embedder(settings.embedding_model, hf_token=settings.hf_token.get_secret_value())
-    logger.info("Model loaded: %s (%dd)", _embedder.model_name, _embedder.dimensions)
-    _cleanup_task = asyncio.create_task(
-        _query_log_cleanup_loop(settings.database_url, settings.query_log_retention_days)
+    """Build per-process resources at startup; tear them down on shutdown.
+    Yields a dict whose entries flow into request.state for handler access
+    via the Depends getters below."""
+    settings = Settings()
+    embedder: EmbedderProtocol | None = None  # set inside try; outer finally reads it
+    pool = await asyncpg.create_pool(
+        settings.database_url,
+        min_size=settings.pool_min_size,
+        max_size=settings.pool_max_size,
+        init=_init_connection,
     )
+    try:
+        # Embedder construction can raise (Phase 1 dimension guard); the
+        # outer finally still closes the pool in that case. Offloaded to a
+        # thread so the model-load file I/O doesn't stall the event loop.
+        embedder = await asyncio.to_thread(Embedder.from_settings, settings)
+        logger.info("Model loaded: %s (%dd)", embedder.model_name, embedder.dimensions)
+        azure_scheme = _build_auth_scheme(settings)
+        if azure_scheme is not None:
+            await azure_scheme.openid_config.load_config()
+            logger.info(
+                "Entra auth enabled (tenant=%s, audience=%s)",
+                settings.auth.tenant_id,
+                settings.auth.audience,
+            )
-    yield
-    if _cleanup_task is not None:
-        _cleanup_task.cancel()
+        cleanup_task = asyncio.create_task(
+            _query_log_cleanup_loop(pool, settings.query_log_retention_days)
+        )
         try:
-            await _cleanup_task
-        except asyncio.CancelledError:
-            pass
-    await close_pool()
+            yield {
+                "settings": settings,
+                "pool": pool,
+                "embedder": embedder,
+                "azure_scheme": azure_scheme,
+            }
+        finally:
+            cleanup_task.cancel()
+            try:
+                await cleanup_task
+            except asyncio.CancelledError:
+                pass
+    finally:
+        if embedder is not None and hasattr(embedder, "aclose"):
+            await embedder.aclose()
+        await pool.close()
 app = FastAPI(title="docforge", lifespan=lifespan)
-async def _auth_dependency(request: Request):
+def get_settings(request: Request) -> Settings:
+    return request.state.settings
+def get_pool_dep(request: Request) -> asyncpg.Pool:
+    return request.state.pool
+def get_embedder(request: Request) -> EmbedderProtocol:
+    return request.state.embedder
+def get_azure_scheme(request: Request):
+    return request.state.azure_scheme
+async def _auth_dependency(
+    request: Request,
+    azure_scheme=Depends(get_azure_scheme),
+):
     """Return the authenticated User under auth.mode=entra, None otherwise."""
-    if _azure_scheme is None:
+    if azure_scheme is None:
         return None
-    # Empty SecurityScopes: we don't enforce scope-level authorization beyond
-    # the token validation the scheme itself does. Without this arg the call
-    # signature mismatches what fastapi-azure-auth expects.
-    return await _azure_scheme(request, SecurityScopes())
+    return await azure_scheme(request, SecurityScopes())
 class SearchRequest(BaseModel):
-    query: str
+    query: str = Field(..., max_length=8000)
     user_name: str
     team_name: str
     area_name: str | None = None
-    limit: int = 5
+    limit: int = Field(5, ge=1, le=50)
 class SearchResult(BaseModel):
@@ -142,32 +177,35 @@ class SearchResponse(BaseModel):
 @app.get("/health")
-async def health() -> dict[str, Any]:
+async def health(request: Request) -> dict[str, Any]:
     """Health check endpoint."""
+    embedder = getattr(request.state, "embedder", None)
     return {
         "status": "ok",
-        "model": _embedder.model_name if _embedder else "not loaded",
+        "model": embedder.model_name if embedder else "not loaded",
     }
 @app.post("/search", response_model=SearchResponse)
-async def search(req: SearchRequest, user=Depends(_auth_dependency)) -> SearchResponse:
+async def search(
+    req: SearchRequest,
+    settings: Settings = Depends(get_settings),
+    pool: asyncpg.Pool = Depends(get_pool_dep),
+    embedder: EmbedderProtocol = Depends(get_embedder),
+    user=Depends(_auth_dependency),
+) -> SearchResponse:
     """Search indexed documentation by semantic similarity."""
     start = time.perf_counter()
-    if not _embedder:
-        raise HTTPException(status_code=503, detail="Embedding model not loaded yet")
     try:
-        query_vector = _embedder.embed_query(req.query)
+        query_vector = await embedder.aembed_query(req.query)
     except Exception as e:
         logger.error("Embedding failed: %s", e)
         raise HTTPException(status_code=500, detail="Failed to embed query")
-    settings = _get_settings()
     user_tags = [req.team_name] + ([req.area_name] if req.area_name else [])
     try:
-        pool = await get_pool(settings.database_url)
         async with pool.acquire() as conn:
             rows = await conn.fetch(
                 """
@@ -201,12 +239,8 @@ async def search(req: SearchRequest, user=Depends(_auth_dependency)) -> SearchRe
         logger.error("Database error during search: %s", e)
         raise HTTPException(status_code=503, detail="Database unavailable")
-    from docforge.query_log import log_query
     request_ms = int((time.perf_counter() - start) * 1000)
-    # team_name and area_name remain self-declared (routing hints, not identity).
-    # user_name and user_oid come from the token when present.
     await log_query(
         pool,
         user.preferred_username if user else req.user_name,
@@ -234,11 +268,12 @@ async def search(req: SearchRequest, user=Depends(_auth_dependency)) -> SearchRe
 @app.get("/sources")
-async def list_sources(user=Depends(_auth_dependency)) -> dict[str, Any]:
+async def list_sources(
+    pool: asyncpg.Pool = Depends(get_pool_dep),
+    user=Depends(_auth_dependency),
+) -> dict[str, Any]:
     """List all indexed documentation sources."""
-    settings = _get_settings()
     try:
-        pool = await get_pool(settings.database_url)
         async with pool.acquire() as conn:
             rows = await conn.fetch(
                 """

docforge-cli 0.2.1__tar.gz → 0.3.0__tar.gz

docforge-cli 0.2.1tar.gz → 0.3.0tar.gz