docforge-cli 0.3.0__tar.gz → 0.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/PKG-INFO +37 -1
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/README.md +33 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/pyproject.toml +5 -1
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/api.py +5 -4
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/cli.py +29 -3
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/config.py +1 -1
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/mcp_server.py +41 -23
- docforge_cli-0.4.1/src/docforge/remote_client.py +213 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/sources.py +1 -1
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge_cli.egg-info/PKG-INFO +37 -1
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge_cli.egg-info/SOURCES.txt +1 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge_cli.egg-info/requires.txt +4 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/LICENSE +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/setup.cfg +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/__init__.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/__main__.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/crawlers/__init__.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/crawlers/confluence.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/crawlers/git.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/db.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/embedder_api.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/ingest.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/lint.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/processors/__init__.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/processors/chunker.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/processors/embedder.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/processors/parser.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/query_log.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/ranking.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/scripts/__init__.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/scripts/eval_search.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/scripts/latency_report.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/sql/migrations/001_add_source_identifier.sql +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/sql/migrations/002_add_status_index.sql +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/sql/migrations/003_add_source_tags.sql +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/sql/migrations/004_add_query_log.sql +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/sql/migrations/005_add_query_log_user_oid.sql +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/sql/migrations/006_add_query_log_request_ms.sql +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/sql/schema.sql +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/templates/docforge.yml +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/templates/docker-compose.yml +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/templates/mcp_client.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/templates/sources.yml +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge_cli.egg-info/dependency_links.txt +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge_cli.egg-info/entry_points.txt +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge_cli.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docforge-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.1
|
|
4
4
|
Summary: Forge searchable context from Confluence and git repos for AI coding assistants
|
|
5
5
|
License: MIT
|
|
6
6
|
Project-URL: Homepage, https://GranatenUdo.github.io/docforge/
|
|
@@ -34,6 +34,9 @@ Provides-Extra: entra
|
|
|
34
34
|
Requires-Dist: fastapi-azure-auth<6.0,>=5.0; extra == "entra"
|
|
35
35
|
Requires-Dist: azure-identity<2.0,>=1.19; extra == "entra"
|
|
36
36
|
Requires-Dist: aiohttp<4.0,>=3.10; extra == "entra"
|
|
37
|
+
Provides-Extra: azure
|
|
38
|
+
Requires-Dist: azure-identity<2.0,>=1.19; extra == "azure"
|
|
39
|
+
Requires-Dist: aiohttp<4.0,>=3.10; extra == "azure"
|
|
37
40
|
Dynamic: license-file
|
|
38
41
|
|
|
39
42
|
# docforge
|
|
@@ -143,6 +146,39 @@ For team-wide use, deploy the search API to Azure (~$90/month at default SKUs wi
|
|
|
143
146
|
|
|
144
147
|
See [`deploy/azure/`](deploy/azure/) for Bicep templates and a full cost breakdown.
|
|
145
148
|
|
|
149
|
+
## Use a hosted instance (no local DB required)
|
|
150
|
+
|
|
151
|
+
If your team already operates a docforge deployment and you only want to *use* it from your editor (Claude Code, etc.), you don't need to clone, ingest, or run Postgres locally:
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
# Generic (no auth)
|
|
155
|
+
pip install docforge-cli
|
|
156
|
+
claude mcp add -s user -e DOCFORGE_API_URL=https://docforge.example.com \
|
|
157
|
+
docforge -- docforge serve --remote-api $DOCFORGE_API_URL
|
|
158
|
+
|
|
159
|
+
# Static Bearer token
|
|
160
|
+
pip install docforge-cli
|
|
161
|
+
claude mcp add -s user \
|
|
162
|
+
-e DOCFORGE_API_URL=https://docforge.example.com \
|
|
163
|
+
-e DOCFORGE_API_TOKEN=eyJ... \
|
|
164
|
+
-e DOCFORGE_AUTH=bearer \
|
|
165
|
+
docforge -- docforge serve --remote-api $DOCFORGE_API_URL --auth bearer
|
|
166
|
+
|
|
167
|
+
# Entra (Azure AD)
|
|
168
|
+
pip install docforge-cli[azure]
|
|
169
|
+
az login --tenant <your-tenant-id>
|
|
170
|
+
claude mcp add -s user \
|
|
171
|
+
-e DOCFORGE_API_URL=https://docforge.example.com \
|
|
172
|
+
-e DOCFORGE_AUDIENCE=api://<app-registration-uri> \
|
|
173
|
+
-e DOCFORGE_AUTH=azure \
|
|
174
|
+
-e DOCFORGE_TEAM=your-team \
|
|
175
|
+
docforge -- docforge serve --remote-api $DOCFORGE_API_URL --auth azure
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
With `--auth azure`, `user_name` is bound to your Entra JWT subject — you can't (and don't need to) configure it.
|
|
179
|
+
|
|
180
|
+
`DOCFORGE_TEAM` is optional but recommended for team-tag relevance boosting in search results.
|
|
181
|
+
|
|
146
182
|
## Self-hosting / forking
|
|
147
183
|
|
|
148
184
|
The embedder image bakes the EmbeddingGemma-300M model at build time,
|
|
@@ -105,6 +105,39 @@ For team-wide use, deploy the search API to Azure (~$90/month at default SKUs wi
|
|
|
105
105
|
|
|
106
106
|
See [`deploy/azure/`](deploy/azure/) for Bicep templates and a full cost breakdown.
|
|
107
107
|
|
|
108
|
+
## Use a hosted instance (no local DB required)
|
|
109
|
+
|
|
110
|
+
If your team already operates a docforge deployment and you only want to *use* it from your editor (Claude Code, etc.), you don't need to clone, ingest, or run Postgres locally:
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
# Generic (no auth)
|
|
114
|
+
pip install docforge-cli
|
|
115
|
+
claude mcp add -s user -e DOCFORGE_API_URL=https://docforge.example.com \
|
|
116
|
+
docforge -- docforge serve --remote-api $DOCFORGE_API_URL
|
|
117
|
+
|
|
118
|
+
# Static Bearer token
|
|
119
|
+
pip install docforge-cli
|
|
120
|
+
claude mcp add -s user \
|
|
121
|
+
-e DOCFORGE_API_URL=https://docforge.example.com \
|
|
122
|
+
-e DOCFORGE_API_TOKEN=eyJ... \
|
|
123
|
+
-e DOCFORGE_AUTH=bearer \
|
|
124
|
+
docforge -- docforge serve --remote-api $DOCFORGE_API_URL --auth bearer
|
|
125
|
+
|
|
126
|
+
# Entra (Azure AD)
|
|
127
|
+
pip install docforge-cli[azure]
|
|
128
|
+
az login --tenant <your-tenant-id>
|
|
129
|
+
claude mcp add -s user \
|
|
130
|
+
-e DOCFORGE_API_URL=https://docforge.example.com \
|
|
131
|
+
-e DOCFORGE_AUDIENCE=api://<app-registration-uri> \
|
|
132
|
+
-e DOCFORGE_AUTH=azure \
|
|
133
|
+
-e DOCFORGE_TEAM=your-team \
|
|
134
|
+
docforge -- docforge serve --remote-api $DOCFORGE_API_URL --auth azure
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
With `--auth azure`, `user_name` is bound to your Entra JWT subject — you can't (and don't need to) configure it.
|
|
138
|
+
|
|
139
|
+
`DOCFORGE_TEAM` is optional but recommended for team-tag relevance boosting in search results.
|
|
140
|
+
|
|
108
141
|
## Self-hosting / forking
|
|
109
142
|
|
|
110
143
|
The embedder image bakes the EmbeddingGemma-300M model at build time,
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "docforge-cli"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.4.1"
|
|
8
8
|
description = "Forge searchable context from Confluence and git repos for AI coding assistants"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = {text = "MIT"}
|
|
@@ -49,6 +49,10 @@ entra = [
|
|
|
49
49
|
# aiohttp is required by azure-identity.aio's async pipeline
|
|
50
50
|
"aiohttp>=3.10,<4.0",
|
|
51
51
|
]
|
|
52
|
+
azure = [
|
|
53
|
+
"azure-identity>=1.19,<2.0",
|
|
54
|
+
"aiohttp>=3.10,<4.0", # required by azure-identity.aio
|
|
55
|
+
]
|
|
52
56
|
|
|
53
57
|
[tool.setuptools.packages.find]
|
|
54
58
|
where = ["src"]
|
|
@@ -155,8 +155,8 @@ async def _auth_dependency(
|
|
|
155
155
|
|
|
156
156
|
class SearchRequest(BaseModel):
|
|
157
157
|
query: str = Field(..., max_length=8000)
|
|
158
|
-
user_name: str
|
|
159
|
-
team_name: str
|
|
158
|
+
user_name: str | None = None
|
|
159
|
+
team_name: str | None = None
|
|
160
160
|
area_name: str | None = None
|
|
161
161
|
limit: int = Field(5, ge=1, le=50)
|
|
162
162
|
|
|
@@ -203,7 +203,7 @@ async def search(
|
|
|
203
203
|
logger.error("Embedding failed: %s", e)
|
|
204
204
|
raise HTTPException(status_code=500, detail="Failed to embed query")
|
|
205
205
|
|
|
206
|
-
user_tags = [
|
|
206
|
+
user_tags = [t for t in (req.team_name, req.area_name) if t]
|
|
207
207
|
|
|
208
208
|
try:
|
|
209
209
|
async with pool.acquire() as conn:
|
|
@@ -241,9 +241,10 @@ async def search(
|
|
|
241
241
|
|
|
242
242
|
request_ms = int((time.perf_counter() - start) * 1000)
|
|
243
243
|
|
|
244
|
+
effective_user_name = user.preferred_username if user else (req.user_name or "anonymous")
|
|
244
245
|
await log_query(
|
|
245
246
|
pool,
|
|
246
|
-
|
|
247
|
+
effective_user_name,
|
|
247
248
|
req.team_name,
|
|
248
249
|
req.area_name,
|
|
249
250
|
req.query,
|
|
@@ -8,6 +8,8 @@ from pathlib import Path
|
|
|
8
8
|
|
|
9
9
|
import typer
|
|
10
10
|
|
|
11
|
+
from docforge.remote_client import AuthName
|
|
12
|
+
|
|
11
13
|
app = typer.Typer(
|
|
12
14
|
help="Forge searchable context from Confluence and git repos for AI coding assistants.",
|
|
13
15
|
)
|
|
@@ -117,10 +119,34 @@ def search(
|
|
|
117
119
|
|
|
118
120
|
|
|
119
121
|
@app.command()
|
|
120
|
-
def serve(
|
|
121
|
-
|
|
122
|
+
def serve(
|
|
123
|
+
api: bool = typer.Option(False, help="Run FastAPI search API instead of MCP"),
|
|
124
|
+
remote_api: str | None = typer.Option(
|
|
125
|
+
None,
|
|
126
|
+
"--remote-api",
|
|
127
|
+
help="Run MCP backed by a remote search API at this URL",
|
|
128
|
+
envvar="DOCFORGE_API_URL",
|
|
129
|
+
),
|
|
130
|
+
auth: AuthName = typer.Option(
|
|
131
|
+
AuthName.none,
|
|
132
|
+
"--auth",
|
|
133
|
+
help="Auth provider for --remote-api",
|
|
134
|
+
envvar="DOCFORGE_AUTH",
|
|
135
|
+
),
|
|
136
|
+
) -> None:
|
|
137
|
+
"""Run the MCP server (or FastAPI API with --api, or remote-backed MCP with --remote-api)."""
|
|
122
138
|
_setup_logging()
|
|
123
|
-
if api:
|
|
139
|
+
if remote_api and api:
|
|
140
|
+
typer.echo("Error: --api and --remote-api are mutually exclusive.", err=True)
|
|
141
|
+
raise typer.Exit(1)
|
|
142
|
+
if auth is not AuthName.none and not remote_api:
|
|
143
|
+
typer.echo("Warning: --auth has no effect without --remote-api.", err=True)
|
|
144
|
+
|
|
145
|
+
if remote_api:
|
|
146
|
+
from docforge.remote_client import run_remote_mcp
|
|
147
|
+
|
|
148
|
+
run_remote_mcp(url=remote_api, auth_name=auth)
|
|
149
|
+
elif api:
|
|
124
150
|
import uvicorn
|
|
125
151
|
|
|
126
152
|
from docforge.api import app as fastapi_app
|
|
@@ -104,7 +104,7 @@ class Settings(BaseSettings):
|
|
|
104
104
|
yml_path = Path("docforge.yml")
|
|
105
105
|
yml_values = {}
|
|
106
106
|
if yml_path.exists():
|
|
107
|
-
with open(yml_path) as f:
|
|
107
|
+
with open(yml_path, encoding="utf-8") as f:
|
|
108
108
|
yml = yaml.safe_load(f) or {}
|
|
109
109
|
# Flatten nested embedding config
|
|
110
110
|
if "embedding" in yml:
|
|
@@ -49,6 +49,32 @@ def _get_embedder() -> EmbedderProtocol:
|
|
|
49
49
|
return _embedder
|
|
50
50
|
|
|
51
51
|
|
|
52
|
+
def format_search_results_markdown(
|
|
53
|
+
results: list[dict],
|
|
54
|
+
*,
|
|
55
|
+
empty_message: str = "No documentation found matching your query.",
|
|
56
|
+
) -> str:
|
|
57
|
+
"""Render a list of search-result dicts as the canonical Markdown shape.
|
|
58
|
+
|
|
59
|
+
Each result must have keys: similarity, source_title, source_url, text.
|
|
60
|
+
Optional: section_title, source_tags.
|
|
61
|
+
"""
|
|
62
|
+
if not results:
|
|
63
|
+
return empty_message
|
|
64
|
+
|
|
65
|
+
parts: list[str] = []
|
|
66
|
+
for i, r in enumerate(results, 1):
|
|
67
|
+
header = f"**Result {i}** (relevance: {r['similarity']:.2f}) -- {r['source_title']}"
|
|
68
|
+
if r.get("section_title"):
|
|
69
|
+
header += f" > {r['section_title']}"
|
|
70
|
+
header += f"\nSource: {r['source_url']}"
|
|
71
|
+
tags = r.get("source_tags") or []
|
|
72
|
+
if tags:
|
|
73
|
+
header += f"\nTags: {', '.join(tags)}"
|
|
74
|
+
parts.append(f"{header}\n\n{r['text']}")
|
|
75
|
+
return "\n\n---\n\n".join(parts)
|
|
76
|
+
|
|
77
|
+
|
|
52
78
|
@mcp.tool()
|
|
53
79
|
async def search_documentation(
|
|
54
80
|
query: Annotated[str, Field(max_length=8000)],
|
|
@@ -115,31 +141,23 @@ async def search_documentation(
|
|
|
115
141
|
|
|
116
142
|
await log_query(pool, user_name, team_name, area_name, query, len(rows))
|
|
117
143
|
|
|
118
|
-
|
|
119
|
-
|
|
144
|
+
return format_search_results_markdown(
|
|
145
|
+
[
|
|
146
|
+
{
|
|
147
|
+
"similarity": row["similarity"],
|
|
148
|
+
"source_title": row["source_title"],
|
|
149
|
+
"source_url": row["source_url"],
|
|
150
|
+
"section_title": row["section_title"],
|
|
151
|
+
"source_tags": list(row["source_tags"] or []),
|
|
152
|
+
"text": row["text"],
|
|
153
|
+
}
|
|
154
|
+
for row in rows
|
|
155
|
+
],
|
|
156
|
+
empty_message=(
|
|
120
157
|
"No documentation found matching your query. "
|
|
121
158
|
"The index may be empty -- run `python -m docforge ingest` to populate it."
|
|
122
|
-
)
|
|
123
|
-
|
|
124
|
-
parts: list[str] = []
|
|
125
|
-
for i, row in enumerate(rows, 1):
|
|
126
|
-
similarity = row["similarity"]
|
|
127
|
-
source = row["source_title"]
|
|
128
|
-
url = row["source_url"]
|
|
129
|
-
section = row["section_title"]
|
|
130
|
-
text = row["text"]
|
|
131
|
-
tags = list(row["source_tags"] or [])
|
|
132
|
-
|
|
133
|
-
header = f"**Result {i}** (relevance: {similarity:.2f}) — {source}"
|
|
134
|
-
if section:
|
|
135
|
-
header += f" > {section}"
|
|
136
|
-
header += f"\nSource: {url}"
|
|
137
|
-
if tags:
|
|
138
|
-
header += f"\nTags: {', '.join(tags)}"
|
|
139
|
-
|
|
140
|
-
parts.append(f"{header}\n\n{text}")
|
|
141
|
-
|
|
142
|
-
return "\n\n---\n\n".join(parts)
|
|
159
|
+
),
|
|
160
|
+
)
|
|
143
161
|
|
|
144
162
|
|
|
145
163
|
@mcp.tool()
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
"""MCP server that proxies tool calls to a remote docforge search-api.
|
|
2
|
+
|
|
3
|
+
Used by `docforge serve --remote-api $URL --auth ...`. See the
|
|
4
|
+
2026-05-08-docforge-remote-api-mode-design.md spec for the full design.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
from enum import Enum
|
|
11
|
+
from typing import Protocol
|
|
12
|
+
|
|
13
|
+
import httpx
|
|
14
|
+
from fastmcp import FastMCP
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class AuthName(str, Enum):
|
|
18
|
+
"""Selectable auth providers for the --remote-api mode."""
|
|
19
|
+
|
|
20
|
+
none = "none"
|
|
21
|
+
bearer = "bearer"
|
|
22
|
+
azure = "azure"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class AuthProvider(Protocol):
|
|
26
|
+
"""Async source of HTTP headers attached to each remote request."""
|
|
27
|
+
|
|
28
|
+
async def headers(self) -> dict[str, str]: ...
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class NoneAuth:
|
|
32
|
+
"""No-op auth provider. Returns no headers."""
|
|
33
|
+
|
|
34
|
+
async def headers(self) -> dict[str, str]:
|
|
35
|
+
return {}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class BearerAuth:
|
|
39
|
+
"""Static Bearer token from DOCFORGE_API_TOKEN env var."""
|
|
40
|
+
|
|
41
|
+
def __init__(self) -> None:
|
|
42
|
+
token = os.environ.get("DOCFORGE_API_TOKEN", "").strip()
|
|
43
|
+
if not token:
|
|
44
|
+
raise RuntimeError("BearerAuth requires DOCFORGE_API_TOKEN env var to be set.")
|
|
45
|
+
self._token = token
|
|
46
|
+
|
|
47
|
+
async def headers(self) -> dict[str, str]:
|
|
48
|
+
return {"Authorization": f"Bearer {self._token}"}
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class AzureAuth:
|
|
52
|
+
"""Entra Bearer token via DefaultAzureCredential.
|
|
53
|
+
|
|
54
|
+
Requires `pip install docforge-cli[azure]`. Reads target audience
|
|
55
|
+
from DOCFORGE_AUDIENCE env var.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
def __init__(self) -> None:
|
|
59
|
+
try:
|
|
60
|
+
from azure.identity.aio import DefaultAzureCredential
|
|
61
|
+
except ImportError as e:
|
|
62
|
+
raise ImportError("Azure auth requires `pip install docforge-cli[azure]`.") from e
|
|
63
|
+
|
|
64
|
+
audience = os.environ.get("DOCFORGE_AUDIENCE", "").strip()
|
|
65
|
+
if not audience:
|
|
66
|
+
raise RuntimeError("AzureAuth requires DOCFORGE_AUDIENCE env var to be set.")
|
|
67
|
+
self._audience = audience
|
|
68
|
+
self._credential = DefaultAzureCredential()
|
|
69
|
+
|
|
70
|
+
async def headers(self) -> dict[str, str]:
|
|
71
|
+
token = await self._credential.get_token(f"{self._audience}/.default")
|
|
72
|
+
return {"Authorization": f"Bearer {token.token}"}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def make_auth_provider(name: AuthName | str) -> AuthProvider:
|
|
76
|
+
"""Return an AuthProvider instance for the given name."""
|
|
77
|
+
try:
|
|
78
|
+
name = AuthName(name) if isinstance(name, str) else name
|
|
79
|
+
except ValueError as e:
|
|
80
|
+
raise ValueError(f"Unknown auth provider: {name!r}. Valid: none, bearer, azure.") from e
|
|
81
|
+
if name is AuthName.none:
|
|
82
|
+
return NoneAuth()
|
|
83
|
+
if name is AuthName.bearer:
|
|
84
|
+
return BearerAuth()
|
|
85
|
+
if name is AuthName.azure:
|
|
86
|
+
return AzureAuth()
|
|
87
|
+
raise ValueError(f"Unknown auth provider: {name!r}.")
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class RemoteBackend:
|
|
91
|
+
"""Proxy to a remote docforge search-api over HTTP."""
|
|
92
|
+
|
|
93
|
+
def __init__(
|
|
94
|
+
self,
|
|
95
|
+
*,
|
|
96
|
+
url: str,
|
|
97
|
+
auth: AuthProvider,
|
|
98
|
+
transport: httpx.AsyncBaseTransport | None = None,
|
|
99
|
+
) -> None:
|
|
100
|
+
self._url = url.rstrip("/")
|
|
101
|
+
self._auth = auth
|
|
102
|
+
self._transport = transport
|
|
103
|
+
self._client: httpx.AsyncClient | None = None
|
|
104
|
+
|
|
105
|
+
async def _ensure_client(self) -> httpx.AsyncClient:
|
|
106
|
+
if self._client is None:
|
|
107
|
+
self._client = httpx.AsyncClient(transport=self._transport, timeout=30.0)
|
|
108
|
+
return self._client
|
|
109
|
+
|
|
110
|
+
async def aclose(self) -> None:
|
|
111
|
+
if self._client is not None:
|
|
112
|
+
await self._client.aclose()
|
|
113
|
+
self._client = None
|
|
114
|
+
|
|
115
|
+
def _identity_body(self) -> dict[str, str]:
|
|
116
|
+
out: dict[str, str] = {}
|
|
117
|
+
for env_var, body_key in (
|
|
118
|
+
("DOCFORGE_USER", "user_name"),
|
|
119
|
+
("DOCFORGE_TEAM", "team_name"),
|
|
120
|
+
("DOCFORGE_AREA", "area_name"),
|
|
121
|
+
):
|
|
122
|
+
val = os.environ.get(env_var, "").strip()
|
|
123
|
+
if val:
|
|
124
|
+
out[body_key] = val
|
|
125
|
+
return out
|
|
126
|
+
|
|
127
|
+
async def _request(
|
|
128
|
+
self,
|
|
129
|
+
method: str,
|
|
130
|
+
path: str,
|
|
131
|
+
*,
|
|
132
|
+
json: dict[str, object] | None = None,
|
|
133
|
+
) -> httpx.Response | str:
|
|
134
|
+
"""Perform an HTTP request with auth and uniform error handling.
|
|
135
|
+
|
|
136
|
+
Returns the Response on 2xx; an already-formatted error string otherwise.
|
|
137
|
+
"""
|
|
138
|
+
try:
|
|
139
|
+
headers = await self._auth.headers()
|
|
140
|
+
except Exception as e:
|
|
141
|
+
return f"Auth provider error: {e}"
|
|
142
|
+
|
|
143
|
+
client = await self._ensure_client()
|
|
144
|
+
try:
|
|
145
|
+
resp = await client.request(method, f"{self._url}{path}", json=json, headers=headers)
|
|
146
|
+
except httpx.ConnectError:
|
|
147
|
+
return f"Could not reach remote API at {self._url}."
|
|
148
|
+
except httpx.HTTPError as e:
|
|
149
|
+
return f"Remote API error: {e}"
|
|
150
|
+
|
|
151
|
+
if resp.status_code == 401:
|
|
152
|
+
return "Auth failed (401). Check DOCFORGE_API_URL and the --auth provider."
|
|
153
|
+
if 500 <= resp.status_code < 600:
|
|
154
|
+
return f"Remote API error ({resp.status_code}). Try again in a moment."
|
|
155
|
+
if resp.status_code != 200:
|
|
156
|
+
return f"Remote API returned {resp.status_code}: {resp.text[:200]}"
|
|
157
|
+
return resp
|
|
158
|
+
|
|
159
|
+
async def search(self, *, query: str, limit: int = 5) -> str:
|
|
160
|
+
"""Search the remote API and return Markdown-formatted results."""
|
|
161
|
+
body: dict[str, object] = {"query": query, "limit": limit}
|
|
162
|
+
body.update(self._identity_body())
|
|
163
|
+
result = await self._request("POST", "/search", json=body)
|
|
164
|
+
if isinstance(result, str):
|
|
165
|
+
return result
|
|
166
|
+
|
|
167
|
+
from docforge.mcp_server import format_search_results_markdown
|
|
168
|
+
|
|
169
|
+
data = result.json()
|
|
170
|
+
return format_search_results_markdown(data.get("results", []))
|
|
171
|
+
|
|
172
|
+
async def list_sources(self) -> str:
|
|
173
|
+
"""List indexed sources from the remote API."""
|
|
174
|
+
result = await self._request("GET", "/sources")
|
|
175
|
+
if isinstance(result, str):
|
|
176
|
+
return result
|
|
177
|
+
|
|
178
|
+
data = result.json()
|
|
179
|
+
sources = data.get("sources", [])
|
|
180
|
+
if not sources:
|
|
181
|
+
return "No sources indexed."
|
|
182
|
+
|
|
183
|
+
lines = [f"**{data.get('count', len(sources))} indexed sources:**\n"]
|
|
184
|
+
for s in sources:
|
|
185
|
+
lines.append(f"- **{s['title']}** ({s['chunk_count']} chunks, {s['status']})")
|
|
186
|
+
return "\n".join(lines)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
INSTRUCTIONS = (
|
|
190
|
+
"Search across your team's indexed documentation including team responsibilities, "
|
|
191
|
+
"coding guidelines, architecture standards, and cross-team interfaces. "
|
|
192
|
+
"Use the search_documentation tool when you need information about other teams, "
|
|
193
|
+
"shared coding practices, or organizational knowledge."
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def run_remote_mcp(*, url: str, auth_name: AuthName | str = AuthName.none) -> None:
|
|
198
|
+
"""Run an MCP server proxying tool calls to a remote docforge search-api."""
|
|
199
|
+
auth = make_auth_provider(auth_name)
|
|
200
|
+
backend = RemoteBackend(url=url, auth=auth)
|
|
201
|
+
mcp = FastMCP("docforge", instructions=INSTRUCTIONS)
|
|
202
|
+
|
|
203
|
+
@mcp.tool()
|
|
204
|
+
async def search_documentation(query: str, limit: int = 5) -> str:
|
|
205
|
+
"""Search across indexed documentation from Confluence pages and git repos."""
|
|
206
|
+
return await backend.search(query=query, limit=limit)
|
|
207
|
+
|
|
208
|
+
@mcp.tool()
|
|
209
|
+
async def list_sources() -> str:
|
|
210
|
+
"""List all documentation sources currently indexed."""
|
|
211
|
+
return await backend.list_sources()
|
|
212
|
+
|
|
213
|
+
mcp.run()
|
|
@@ -41,6 +41,6 @@ class SourcesFile(BaseModel):
|
|
|
41
41
|
|
|
42
42
|
def load_sources(path: str | Path) -> list[SourceConfig]:
|
|
43
43
|
"""Load source configurations from a YAML file."""
|
|
44
|
-
with open(path) as f:
|
|
44
|
+
with open(path, encoding="utf-8") as f:
|
|
45
45
|
data = yaml.safe_load(f)
|
|
46
46
|
return SourcesFile.model_validate(data).sources
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docforge-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.1
|
|
4
4
|
Summary: Forge searchable context from Confluence and git repos for AI coding assistants
|
|
5
5
|
License: MIT
|
|
6
6
|
Project-URL: Homepage, https://GranatenUdo.github.io/docforge/
|
|
@@ -34,6 +34,9 @@ Provides-Extra: entra
|
|
|
34
34
|
Requires-Dist: fastapi-azure-auth<6.0,>=5.0; extra == "entra"
|
|
35
35
|
Requires-Dist: azure-identity<2.0,>=1.19; extra == "entra"
|
|
36
36
|
Requires-Dist: aiohttp<4.0,>=3.10; extra == "entra"
|
|
37
|
+
Provides-Extra: azure
|
|
38
|
+
Requires-Dist: azure-identity<2.0,>=1.19; extra == "azure"
|
|
39
|
+
Requires-Dist: aiohttp<4.0,>=3.10; extra == "azure"
|
|
37
40
|
Dynamic: license-file
|
|
38
41
|
|
|
39
42
|
# docforge
|
|
@@ -143,6 +146,39 @@ For team-wide use, deploy the search API to Azure (~$90/month at default SKUs wi
|
|
|
143
146
|
|
|
144
147
|
See [`deploy/azure/`](deploy/azure/) for Bicep templates and a full cost breakdown.
|
|
145
148
|
|
|
149
|
+
## Use a hosted instance (no local DB required)
|
|
150
|
+
|
|
151
|
+
If your team already operates a docforge deployment and you only want to *use* it from your editor (Claude Code, etc.), you don't need to clone, ingest, or run Postgres locally:
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
# Generic (no auth)
|
|
155
|
+
pip install docforge-cli
|
|
156
|
+
claude mcp add -s user -e DOCFORGE_API_URL=https://docforge.example.com \
|
|
157
|
+
docforge -- docforge serve --remote-api $DOCFORGE_API_URL
|
|
158
|
+
|
|
159
|
+
# Static Bearer token
|
|
160
|
+
pip install docforge-cli
|
|
161
|
+
claude mcp add -s user \
|
|
162
|
+
-e DOCFORGE_API_URL=https://docforge.example.com \
|
|
163
|
+
-e DOCFORGE_API_TOKEN=eyJ... \
|
|
164
|
+
-e DOCFORGE_AUTH=bearer \
|
|
165
|
+
docforge -- docforge serve --remote-api $DOCFORGE_API_URL --auth bearer
|
|
166
|
+
|
|
167
|
+
# Entra (Azure AD)
|
|
168
|
+
pip install docforge-cli[azure]
|
|
169
|
+
az login --tenant <your-tenant-id>
|
|
170
|
+
claude mcp add -s user \
|
|
171
|
+
-e DOCFORGE_API_URL=https://docforge.example.com \
|
|
172
|
+
-e DOCFORGE_AUDIENCE=api://<app-registration-uri> \
|
|
173
|
+
-e DOCFORGE_AUTH=azure \
|
|
174
|
+
-e DOCFORGE_TEAM=your-team \
|
|
175
|
+
docforge -- docforge serve --remote-api $DOCFORGE_API_URL --auth azure
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
With `--auth azure`, `user_name` is bound to your Entra JWT subject — you can't (and don't need to) configure it.
|
|
179
|
+
|
|
180
|
+
`DOCFORGE_TEAM` is optional but recommended for team-tag relevance boosting in search results.
|
|
181
|
+
|
|
146
182
|
## Self-hosting / forking
|
|
147
183
|
|
|
148
184
|
The embedder image bakes the EmbeddingGemma-300M model at build time,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/sql/migrations/001_add_source_identifier.sql
RENAMED
|
File without changes
|
{docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/sql/migrations/002_add_status_index.sql
RENAMED
|
File without changes
|
{docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/sql/migrations/003_add_source_tags.sql
RENAMED
|
File without changes
|
|
File without changes
|
{docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/sql/migrations/005_add_query_log_user_oid.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|