docforge-cli 0.3.0__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/PKG-INFO +37 -1
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/README.md +33 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/pyproject.toml +5 -1
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/api.py +5 -4
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/cli.py +24 -2
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/config.py +1 -1
- docforge_cli-0.4.0/src/docforge/remote_client.py +199 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/sources.py +1 -1
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge_cli.egg-info/PKG-INFO +37 -1
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge_cli.egg-info/SOURCES.txt +1 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge_cli.egg-info/requires.txt +4 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/LICENSE +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/setup.cfg +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/__init__.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/__main__.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/crawlers/__init__.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/crawlers/confluence.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/crawlers/git.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/db.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/embedder_api.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/ingest.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/lint.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/mcp_server.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/processors/__init__.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/processors/chunker.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/processors/embedder.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/processors/parser.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/query_log.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/ranking.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/scripts/__init__.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/scripts/eval_search.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/scripts/latency_report.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/sql/migrations/001_add_source_identifier.sql +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/sql/migrations/002_add_status_index.sql +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/sql/migrations/003_add_source_tags.sql +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/sql/migrations/004_add_query_log.sql +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/sql/migrations/005_add_query_log_user_oid.sql +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/sql/migrations/006_add_query_log_request_ms.sql +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/sql/schema.sql +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/templates/docforge.yml +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/templates/docker-compose.yml +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/templates/mcp_client.py +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/templates/sources.yml +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge_cli.egg-info/dependency_links.txt +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge_cli.egg-info/entry_points.txt +0 -0
- {docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge_cli.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docforge-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Forge searchable context from Confluence and git repos for AI coding assistants
|
|
5
5
|
License: MIT
|
|
6
6
|
Project-URL: Homepage, https://GranatenUdo.github.io/docforge/
|
|
@@ -34,6 +34,9 @@ Provides-Extra: entra
|
|
|
34
34
|
Requires-Dist: fastapi-azure-auth<6.0,>=5.0; extra == "entra"
|
|
35
35
|
Requires-Dist: azure-identity<2.0,>=1.19; extra == "entra"
|
|
36
36
|
Requires-Dist: aiohttp<4.0,>=3.10; extra == "entra"
|
|
37
|
+
Provides-Extra: azure
|
|
38
|
+
Requires-Dist: azure-identity<2.0,>=1.19; extra == "azure"
|
|
39
|
+
Requires-Dist: aiohttp<4.0,>=3.10; extra == "azure"
|
|
37
40
|
Dynamic: license-file
|
|
38
41
|
|
|
39
42
|
# docforge
|
|
@@ -143,6 +146,39 @@ For team-wide use, deploy the search API to Azure (~$90/month at default SKUs wi
|
|
|
143
146
|
|
|
144
147
|
See [`deploy/azure/`](deploy/azure/) for Bicep templates and a full cost breakdown.
|
|
145
148
|
|
|
149
|
+
## Use a hosted instance (no local DB required)
|
|
150
|
+
|
|
151
|
+
If your team already operates a docforge deployment and you only want to *use* it from your editor (Claude Code, etc.), you don't need to clone, ingest, or run Postgres locally:
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
# Generic (no auth)
|
|
155
|
+
pip install docforge-cli
|
|
156
|
+
claude mcp add -s user -e DOCFORGE_API_URL=https://docforge.example.com \
|
|
157
|
+
docforge -- docforge serve --remote-api $DOCFORGE_API_URL
|
|
158
|
+
|
|
159
|
+
# Static Bearer token
|
|
160
|
+
pip install docforge-cli
|
|
161
|
+
claude mcp add -s user \
|
|
162
|
+
-e DOCFORGE_API_URL=https://docforge.example.com \
|
|
163
|
+
-e DOCFORGE_API_TOKEN=eyJ... \
|
|
164
|
+
-e DOCFORGE_AUTH=bearer \
|
|
165
|
+
docforge -- docforge serve --remote-api $DOCFORGE_API_URL --auth bearer
|
|
166
|
+
|
|
167
|
+
# Entra (Azure AD)
|
|
168
|
+
pip install docforge-cli[azure]
|
|
169
|
+
az login --tenant <your-tenant-id>
|
|
170
|
+
claude mcp add -s user \
|
|
171
|
+
-e DOCFORGE_API_URL=https://docforge.example.com \
|
|
172
|
+
-e DOCFORGE_AUDIENCE=api://<app-registration-uri> \
|
|
173
|
+
-e DOCFORGE_AUTH=azure \
|
|
174
|
+
-e DOCFORGE_TEAM=your-team \
|
|
175
|
+
docforge -- docforge serve --remote-api $DOCFORGE_API_URL --auth azure
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
With `--auth azure`, `user_name` is bound to your Entra JWT subject — you can't (and don't need to) configure it.
|
|
179
|
+
|
|
180
|
+
`DOCFORGE_TEAM` is optional but recommended for team-tag relevance boosting in search results.
|
|
181
|
+
|
|
146
182
|
## Self-hosting / forking
|
|
147
183
|
|
|
148
184
|
The embedder image bakes the EmbeddingGemma-300M model at build time,
|
|
@@ -105,6 +105,39 @@ For team-wide use, deploy the search API to Azure (~$90/month at default SKUs wi
|
|
|
105
105
|
|
|
106
106
|
See [`deploy/azure/`](deploy/azure/) for Bicep templates and a full cost breakdown.
|
|
107
107
|
|
|
108
|
+
## Use a hosted instance (no local DB required)
|
|
109
|
+
|
|
110
|
+
If your team already operates a docforge deployment and you only want to *use* it from your editor (Claude Code, etc.), you don't need to clone, ingest, or run Postgres locally:
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
# Generic (no auth)
|
|
114
|
+
pip install docforge-cli
|
|
115
|
+
claude mcp add -s user -e DOCFORGE_API_URL=https://docforge.example.com \
|
|
116
|
+
docforge -- docforge serve --remote-api $DOCFORGE_API_URL
|
|
117
|
+
|
|
118
|
+
# Static Bearer token
|
|
119
|
+
pip install docforge-cli
|
|
120
|
+
claude mcp add -s user \
|
|
121
|
+
-e DOCFORGE_API_URL=https://docforge.example.com \
|
|
122
|
+
-e DOCFORGE_API_TOKEN=eyJ... \
|
|
123
|
+
-e DOCFORGE_AUTH=bearer \
|
|
124
|
+
docforge -- docforge serve --remote-api $DOCFORGE_API_URL --auth bearer
|
|
125
|
+
|
|
126
|
+
# Entra (Azure AD)
|
|
127
|
+
pip install docforge-cli[azure]
|
|
128
|
+
az login --tenant <your-tenant-id>
|
|
129
|
+
claude mcp add -s user \
|
|
130
|
+
-e DOCFORGE_API_URL=https://docforge.example.com \
|
|
131
|
+
-e DOCFORGE_AUDIENCE=api://<app-registration-uri> \
|
|
132
|
+
-e DOCFORGE_AUTH=azure \
|
|
133
|
+
-e DOCFORGE_TEAM=your-team \
|
|
134
|
+
docforge -- docforge serve --remote-api $DOCFORGE_API_URL --auth azure
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
With `--auth azure`, `user_name` is bound to your Entra JWT subject — you can't (and don't need to) configure it.
|
|
138
|
+
|
|
139
|
+
`DOCFORGE_TEAM` is optional but recommended for team-tag relevance boosting in search results.
|
|
140
|
+
|
|
108
141
|
## Self-hosting / forking
|
|
109
142
|
|
|
110
143
|
The embedder image bakes the EmbeddingGemma-300M model at build time,
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "docforge-cli"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.4.0"
|
|
8
8
|
description = "Forge searchable context from Confluence and git repos for AI coding assistants"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = {text = "MIT"}
|
|
@@ -49,6 +49,10 @@ entra = [
|
|
|
49
49
|
# aiohttp is required by azure-identity.aio's async pipeline
|
|
50
50
|
"aiohttp>=3.10,<4.0",
|
|
51
51
|
]
|
|
52
|
+
azure = [
|
|
53
|
+
"azure-identity>=1.19,<2.0",
|
|
54
|
+
"aiohttp>=3.10,<4.0", # required by azure-identity.aio
|
|
55
|
+
]
|
|
52
56
|
|
|
53
57
|
[tool.setuptools.packages.find]
|
|
54
58
|
where = ["src"]
|
|
@@ -155,8 +155,8 @@ async def _auth_dependency(
|
|
|
155
155
|
|
|
156
156
|
class SearchRequest(BaseModel):
|
|
157
157
|
query: str = Field(..., max_length=8000)
|
|
158
|
-
user_name: str
|
|
159
|
-
team_name: str
|
|
158
|
+
user_name: str | None = None
|
|
159
|
+
team_name: str | None = None
|
|
160
160
|
area_name: str | None = None
|
|
161
161
|
limit: int = Field(5, ge=1, le=50)
|
|
162
162
|
|
|
@@ -203,7 +203,7 @@ async def search(
|
|
|
203
203
|
logger.error("Embedding failed: %s", e)
|
|
204
204
|
raise HTTPException(status_code=500, detail="Failed to embed query")
|
|
205
205
|
|
|
206
|
-
user_tags = [
|
|
206
|
+
user_tags = [t for t in (req.team_name, req.area_name) if t]
|
|
207
207
|
|
|
208
208
|
try:
|
|
209
209
|
async with pool.acquire() as conn:
|
|
@@ -241,9 +241,10 @@ async def search(
|
|
|
241
241
|
|
|
242
242
|
request_ms = int((time.perf_counter() - start) * 1000)
|
|
243
243
|
|
|
244
|
+
effective_user_name = user.preferred_username if user else (req.user_name or "anonymous")
|
|
244
245
|
await log_query(
|
|
245
246
|
pool,
|
|
246
|
-
|
|
247
|
+
effective_user_name,
|
|
247
248
|
req.team_name,
|
|
248
249
|
req.area_name,
|
|
249
250
|
req.query,
|
|
@@ -117,9 +117,31 @@ def search(
|
|
|
117
117
|
|
|
118
118
|
|
|
119
119
|
@app.command()
|
|
120
|
-
def serve(
|
|
121
|
-
|
|
120
|
+
def serve(
|
|
121
|
+
api: bool = typer.Option(False, help="Run FastAPI search API instead of MCP"),
|
|
122
|
+
remote_api: str | None = typer.Option(
|
|
123
|
+
None,
|
|
124
|
+
"--remote-api",
|
|
125
|
+
help="Run MCP backed by a remote search API at this URL",
|
|
126
|
+
envvar="DOCFORGE_API_URL",
|
|
127
|
+
),
|
|
128
|
+
auth: str = typer.Option(
|
|
129
|
+
"none",
|
|
130
|
+
"--auth",
|
|
131
|
+
help="Auth provider for --remote-api: none | bearer | azure",
|
|
132
|
+
envvar="DOCFORGE_AUTH",
|
|
133
|
+
),
|
|
134
|
+
) -> None:
|
|
135
|
+
"""Run the MCP server (or FastAPI API with --api, or remote-backed MCP with --remote-api)."""
|
|
122
136
|
_setup_logging()
|
|
137
|
+
if remote_api:
|
|
138
|
+
if api:
|
|
139
|
+
typer.echo("Error: --api and --remote-api are mutually exclusive.", err=True)
|
|
140
|
+
raise typer.Exit(1)
|
|
141
|
+
from docforge.remote_client import run_remote_mcp
|
|
142
|
+
|
|
143
|
+
run_remote_mcp(url=remote_api, auth_name=auth)
|
|
144
|
+
return
|
|
123
145
|
if api:
|
|
124
146
|
import uvicorn
|
|
125
147
|
|
|
@@ -104,7 +104,7 @@ class Settings(BaseSettings):
|
|
|
104
104
|
yml_path = Path("docforge.yml")
|
|
105
105
|
yml_values = {}
|
|
106
106
|
if yml_path.exists():
|
|
107
|
-
with open(yml_path) as f:
|
|
107
|
+
with open(yml_path, encoding="utf-8") as f:
|
|
108
108
|
yml = yaml.safe_load(f) or {}
|
|
109
109
|
# Flatten nested embedding config
|
|
110
110
|
if "embedding" in yml:
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
"""MCP server that proxies tool calls to a remote docforge search-api.
|
|
2
|
+
|
|
3
|
+
Used by `docforge serve --remote-api $URL --auth ...`. See the
|
|
4
|
+
2026-05-08-docforge-remote-api-mode-design.md spec for the full design.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
from typing import Protocol
|
|
11
|
+
|
|
12
|
+
import httpx
|
|
13
|
+
from fastmcp import FastMCP
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class AuthProvider(Protocol):
|
|
17
|
+
"""Async source of HTTP headers attached to each remote request."""
|
|
18
|
+
|
|
19
|
+
async def headers(self) -> dict[str, str]: ...
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class NoneAuth:
|
|
23
|
+
"""No-op auth provider. Returns no headers."""
|
|
24
|
+
|
|
25
|
+
async def headers(self) -> dict[str, str]:
|
|
26
|
+
return {}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class BearerAuth:
|
|
30
|
+
"""Static Bearer token from DOCFORGE_API_TOKEN env var."""
|
|
31
|
+
|
|
32
|
+
def __init__(self) -> None:
|
|
33
|
+
token = os.environ.get("DOCFORGE_API_TOKEN", "").strip()
|
|
34
|
+
if not token:
|
|
35
|
+
raise RuntimeError("BearerAuth requires DOCFORGE_API_TOKEN env var to be set.")
|
|
36
|
+
self._token = token
|
|
37
|
+
|
|
38
|
+
async def headers(self) -> dict[str, str]:
|
|
39
|
+
return {"Authorization": f"Bearer {self._token}"}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class AzureAuth:
|
|
43
|
+
"""Entra Bearer token via DefaultAzureCredential.
|
|
44
|
+
|
|
45
|
+
Requires `pip install docforge-cli[azure]`. Reads target audience
|
|
46
|
+
from DOCFORGE_AUDIENCE env var.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(self) -> None:
|
|
50
|
+
try:
|
|
51
|
+
from azure.identity.aio import DefaultAzureCredential
|
|
52
|
+
except ImportError as e:
|
|
53
|
+
raise ImportError("Azure auth requires `pip install docforge-cli[azure]`.") from e
|
|
54
|
+
|
|
55
|
+
audience = os.environ.get("DOCFORGE_AUDIENCE", "").strip()
|
|
56
|
+
if not audience:
|
|
57
|
+
raise RuntimeError("AzureAuth requires DOCFORGE_AUDIENCE env var to be set.")
|
|
58
|
+
self._audience = audience
|
|
59
|
+
self._credential = DefaultAzureCredential()
|
|
60
|
+
|
|
61
|
+
async def headers(self) -> dict[str, str]:
|
|
62
|
+
token = await self._credential.get_token(f"{self._audience}/.default")
|
|
63
|
+
return {"Authorization": f"Bearer {token.token}"}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def make_auth_provider(name: str) -> AuthProvider:
|
|
67
|
+
"""Return an AuthProvider instance for the given name."""
|
|
68
|
+
if name == "none":
|
|
69
|
+
return NoneAuth()
|
|
70
|
+
if name == "bearer":
|
|
71
|
+
return BearerAuth()
|
|
72
|
+
if name == "azure":
|
|
73
|
+
return AzureAuth()
|
|
74
|
+
raise ValueError(f"Unknown auth provider: {name!r}. Valid: none, bearer, azure.")
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class RemoteBackend:
|
|
78
|
+
"""Proxy to a remote docforge search-api over HTTP."""
|
|
79
|
+
|
|
80
|
+
def __init__(
|
|
81
|
+
self,
|
|
82
|
+
*,
|
|
83
|
+
url: str,
|
|
84
|
+
auth: AuthProvider,
|
|
85
|
+
transport: httpx.AsyncBaseTransport | None = None,
|
|
86
|
+
) -> None:
|
|
87
|
+
self._url = url.rstrip("/")
|
|
88
|
+
self._auth = auth
|
|
89
|
+
self._transport = transport # for tests
|
|
90
|
+
|
|
91
|
+
def _identity_body(self) -> dict[str, str]:
|
|
92
|
+
out: dict[str, str] = {}
|
|
93
|
+
for env_var, body_key in (
|
|
94
|
+
("DOCFORGE_USER", "user_name"),
|
|
95
|
+
("DOCFORGE_TEAM", "team_name"),
|
|
96
|
+
("DOCFORGE_AREA", "area_name"),
|
|
97
|
+
):
|
|
98
|
+
val = os.environ.get(env_var, "").strip()
|
|
99
|
+
if val:
|
|
100
|
+
out[body_key] = val
|
|
101
|
+
return out
|
|
102
|
+
|
|
103
|
+
async def search(self, *, query: str, limit: int = 5) -> str:
|
|
104
|
+
"""Search the remote API and return Markdown-formatted results."""
|
|
105
|
+
body: dict[str, object] = {"query": query, "limit": limit}
|
|
106
|
+
body.update(self._identity_body())
|
|
107
|
+
try:
|
|
108
|
+
headers = await self._auth.headers()
|
|
109
|
+
except Exception as e:
|
|
110
|
+
return f"Auth provider error: {e}"
|
|
111
|
+
|
|
112
|
+
try:
|
|
113
|
+
async with httpx.AsyncClient(transport=self._transport, timeout=30.0) as client:
|
|
114
|
+
resp = await client.post(f"{self._url}/search", json=body, headers=headers)
|
|
115
|
+
except httpx.ConnectError:
|
|
116
|
+
return f"Could not reach remote API at {self._url}."
|
|
117
|
+
except httpx.HTTPError as e:
|
|
118
|
+
return f"Remote API error: {e}"
|
|
119
|
+
|
|
120
|
+
if resp.status_code == 401:
|
|
121
|
+
return "Auth failed (401). Check DOCFORGE_API_URL and the --auth provider."
|
|
122
|
+
if 500 <= resp.status_code < 600:
|
|
123
|
+
return f"Remote API error ({resp.status_code}). Try again in a moment."
|
|
124
|
+
if resp.status_code != 200:
|
|
125
|
+
return f"Remote API returned {resp.status_code}: {resp.text[:200]}"
|
|
126
|
+
|
|
127
|
+
data = resp.json()
|
|
128
|
+
results = data.get("results", [])
|
|
129
|
+
if not results:
|
|
130
|
+
return "No documentation found matching your query."
|
|
131
|
+
|
|
132
|
+
parts: list[str] = []
|
|
133
|
+
for i, r in enumerate(results, 1):
|
|
134
|
+
header = f"**Result {i}** (relevance: {r['similarity']:.2f}) -- {r['source_title']}"
|
|
135
|
+
if r.get("section_title"):
|
|
136
|
+
header += f" > {r['section_title']}"
|
|
137
|
+
header += f"\nSource: {r['source_url']}"
|
|
138
|
+
tags = r.get("source_tags") or []
|
|
139
|
+
if tags:
|
|
140
|
+
header += f"\nTags: {', '.join(tags)}"
|
|
141
|
+
parts.append(f"{header}\n\n{r['text']}")
|
|
142
|
+
return "\n\n---\n\n".join(parts)
|
|
143
|
+
|
|
144
|
+
async def list_sources(self) -> str:
|
|
145
|
+
"""List indexed sources from the remote API."""
|
|
146
|
+
try:
|
|
147
|
+
headers = await self._auth.headers()
|
|
148
|
+
except Exception as e:
|
|
149
|
+
return f"Auth provider error: {e}"
|
|
150
|
+
|
|
151
|
+
try:
|
|
152
|
+
async with httpx.AsyncClient(transport=self._transport, timeout=10.0) as client:
|
|
153
|
+
resp = await client.get(f"{self._url}/sources", headers=headers)
|
|
154
|
+
except httpx.ConnectError:
|
|
155
|
+
return f"Could not reach remote API at {self._url}."
|
|
156
|
+
except httpx.HTTPError as e:
|
|
157
|
+
return f"Remote API error: {e}"
|
|
158
|
+
|
|
159
|
+
if resp.status_code == 401:
|
|
160
|
+
return "Auth failed (401). Check DOCFORGE_API_URL and the --auth provider."
|
|
161
|
+
if resp.status_code != 200:
|
|
162
|
+
return f"Remote API returned {resp.status_code}: {resp.text[:200]}"
|
|
163
|
+
|
|
164
|
+
data = resp.json()
|
|
165
|
+
sources = data.get("sources", [])
|
|
166
|
+
if not sources:
|
|
167
|
+
return "No sources indexed."
|
|
168
|
+
|
|
169
|
+
lines = [f"**{data.get('count', len(sources))} indexed sources:**\n"]
|
|
170
|
+
for s in sources:
|
|
171
|
+
lines.append(f"- **{s['title']}** ({s['chunk_count']} chunks, {s['status']})")
|
|
172
|
+
return "\n".join(lines)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
INSTRUCTIONS = (
|
|
176
|
+
"Search across your team's indexed documentation including team responsibilities, "
|
|
177
|
+
"coding guidelines, architecture standards, and cross-team interfaces. "
|
|
178
|
+
"Use the search_documentation tool when you need information about other teams, "
|
|
179
|
+
"shared coding practices, or organizational knowledge."
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def run_remote_mcp(*, url: str, auth_name: str = "none") -> None:
|
|
184
|
+
"""Run an MCP server proxying tool calls to a remote docforge search-api."""
|
|
185
|
+
auth = make_auth_provider(auth_name)
|
|
186
|
+
backend = RemoteBackend(url=url, auth=auth)
|
|
187
|
+
mcp = FastMCP("docforge", instructions=INSTRUCTIONS)
|
|
188
|
+
|
|
189
|
+
@mcp.tool()
|
|
190
|
+
async def search_documentation(query: str, limit: int = 5) -> str:
|
|
191
|
+
"""Search across indexed documentation from Confluence pages and git repos."""
|
|
192
|
+
return await backend.search(query=query, limit=limit)
|
|
193
|
+
|
|
194
|
+
@mcp.tool()
|
|
195
|
+
async def list_sources() -> str:
|
|
196
|
+
"""List all documentation sources currently indexed."""
|
|
197
|
+
return await backend.list_sources()
|
|
198
|
+
|
|
199
|
+
mcp.run()
|
|
@@ -41,6 +41,6 @@ class SourcesFile(BaseModel):
|
|
|
41
41
|
|
|
42
42
|
def load_sources(path: str | Path) -> list[SourceConfig]:
|
|
43
43
|
"""Load source configurations from a YAML file."""
|
|
44
|
-
with open(path) as f:
|
|
44
|
+
with open(path, encoding="utf-8") as f:
|
|
45
45
|
data = yaml.safe_load(f)
|
|
46
46
|
return SourcesFile.model_validate(data).sources
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docforge-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Forge searchable context from Confluence and git repos for AI coding assistants
|
|
5
5
|
License: MIT
|
|
6
6
|
Project-URL: Homepage, https://GranatenUdo.github.io/docforge/
|
|
@@ -34,6 +34,9 @@ Provides-Extra: entra
|
|
|
34
34
|
Requires-Dist: fastapi-azure-auth<6.0,>=5.0; extra == "entra"
|
|
35
35
|
Requires-Dist: azure-identity<2.0,>=1.19; extra == "entra"
|
|
36
36
|
Requires-Dist: aiohttp<4.0,>=3.10; extra == "entra"
|
|
37
|
+
Provides-Extra: azure
|
|
38
|
+
Requires-Dist: azure-identity<2.0,>=1.19; extra == "azure"
|
|
39
|
+
Requires-Dist: aiohttp<4.0,>=3.10; extra == "azure"
|
|
37
40
|
Dynamic: license-file
|
|
38
41
|
|
|
39
42
|
# docforge
|
|
@@ -143,6 +146,39 @@ For team-wide use, deploy the search API to Azure (~$90/month at default SKUs wi
|
|
|
143
146
|
|
|
144
147
|
See [`deploy/azure/`](deploy/azure/) for Bicep templates and a full cost breakdown.
|
|
145
148
|
|
|
149
|
+
## Use a hosted instance (no local DB required)
|
|
150
|
+
|
|
151
|
+
If your team already operates a docforge deployment and you only want to *use* it from your editor (Claude Code, etc.), you don't need to clone, ingest, or run Postgres locally:
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
# Generic (no auth)
|
|
155
|
+
pip install docforge-cli
|
|
156
|
+
claude mcp add -s user -e DOCFORGE_API_URL=https://docforge.example.com \
|
|
157
|
+
docforge -- docforge serve --remote-api $DOCFORGE_API_URL
|
|
158
|
+
|
|
159
|
+
# Static Bearer token
|
|
160
|
+
pip install docforge-cli
|
|
161
|
+
claude mcp add -s user \
|
|
162
|
+
-e DOCFORGE_API_URL=https://docforge.example.com \
|
|
163
|
+
-e DOCFORGE_API_TOKEN=eyJ... \
|
|
164
|
+
-e DOCFORGE_AUTH=bearer \
|
|
165
|
+
docforge -- docforge serve --remote-api $DOCFORGE_API_URL --auth bearer
|
|
166
|
+
|
|
167
|
+
# Entra (Azure AD)
|
|
168
|
+
pip install docforge-cli[azure]
|
|
169
|
+
az login --tenant <your-tenant-id>
|
|
170
|
+
claude mcp add -s user \
|
|
171
|
+
-e DOCFORGE_API_URL=https://docforge.example.com \
|
|
172
|
+
-e DOCFORGE_AUDIENCE=api://<app-registration-uri> \
|
|
173
|
+
-e DOCFORGE_AUTH=azure \
|
|
174
|
+
-e DOCFORGE_TEAM=your-team \
|
|
175
|
+
docforge -- docforge serve --remote-api $DOCFORGE_API_URL --auth azure
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
With `--auth azure`, `user_name` is bound to your Entra JWT subject — you can't (and don't need to) configure it.
|
|
179
|
+
|
|
180
|
+
`DOCFORGE_TEAM` is optional but recommended for team-tag relevance boosting in search results.
|
|
181
|
+
|
|
146
182
|
## Self-hosting / forking
|
|
147
183
|
|
|
148
184
|
The embedder image bakes the EmbeddingGemma-300M model at build time,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/sql/migrations/001_add_source_identifier.sql
RENAMED
|
File without changes
|
{docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/sql/migrations/002_add_status_index.sql
RENAMED
|
File without changes
|
{docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/sql/migrations/003_add_source_tags.sql
RENAMED
|
File without changes
|
|
File without changes
|
{docforge_cli-0.3.0 → docforge_cli-0.4.0}/src/docforge/sql/migrations/005_add_query_log_user_oid.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|