docforge-cli 0.3.0__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/PKG-INFO +37 -1
  2. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/README.md +33 -0
  3. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/pyproject.toml +5 -1
  4. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/api.py +5 -4
  5. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/cli.py +29 -3
  6. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/config.py +1 -1
  7. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/mcp_server.py +41 -23
  8. docforge_cli-0.4.1/src/docforge/remote_client.py +213 -0
  9. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/sources.py +1 -1
  10. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge_cli.egg-info/PKG-INFO +37 -1
  11. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge_cli.egg-info/SOURCES.txt +1 -0
  12. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge_cli.egg-info/requires.txt +4 -0
  13. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/LICENSE +0 -0
  14. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/setup.cfg +0 -0
  15. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/__init__.py +0 -0
  16. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/__main__.py +0 -0
  17. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/crawlers/__init__.py +0 -0
  18. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/crawlers/confluence.py +0 -0
  19. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/crawlers/git.py +0 -0
  20. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/db.py +0 -0
  21. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/embedder_api.py +0 -0
  22. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/ingest.py +0 -0
  23. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/lint.py +0 -0
  24. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/processors/__init__.py +0 -0
  25. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/processors/chunker.py +0 -0
  26. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/processors/embedder.py +0 -0
  27. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/processors/parser.py +0 -0
  28. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/query_log.py +0 -0
  29. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/ranking.py +0 -0
  30. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/scripts/__init__.py +0 -0
  31. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/scripts/eval_search.py +0 -0
  32. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/scripts/latency_report.py +0 -0
  33. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/sql/migrations/001_add_source_identifier.sql +0 -0
  34. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/sql/migrations/002_add_status_index.sql +0 -0
  35. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/sql/migrations/003_add_source_tags.sql +0 -0
  36. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/sql/migrations/004_add_query_log.sql +0 -0
  37. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/sql/migrations/005_add_query_log_user_oid.sql +0 -0
  38. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/sql/migrations/006_add_query_log_request_ms.sql +0 -0
  39. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/sql/schema.sql +0 -0
  40. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/templates/docforge.yml +0 -0
  41. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/templates/docker-compose.yml +0 -0
  42. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/templates/mcp_client.py +0 -0
  43. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge/templates/sources.yml +0 -0
  44. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge_cli.egg-info/dependency_links.txt +0 -0
  45. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge_cli.egg-info/entry_points.txt +0 -0
  46. {docforge_cli-0.3.0 → docforge_cli-0.4.1}/src/docforge_cli.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docforge-cli
3
- Version: 0.3.0
3
+ Version: 0.4.1
4
4
  Summary: Forge searchable context from Confluence and git repos for AI coding assistants
5
5
  License: MIT
6
6
  Project-URL: Homepage, https://GranatenUdo.github.io/docforge/
@@ -34,6 +34,9 @@ Provides-Extra: entra
34
34
  Requires-Dist: fastapi-azure-auth<6.0,>=5.0; extra == "entra"
35
35
  Requires-Dist: azure-identity<2.0,>=1.19; extra == "entra"
36
36
  Requires-Dist: aiohttp<4.0,>=3.10; extra == "entra"
37
+ Provides-Extra: azure
38
+ Requires-Dist: azure-identity<2.0,>=1.19; extra == "azure"
39
+ Requires-Dist: aiohttp<4.0,>=3.10; extra == "azure"
37
40
  Dynamic: license-file
38
41
 
39
42
  # docforge
@@ -143,6 +146,39 @@ For team-wide use, deploy the search API to Azure (~$90/month at default SKUs wi
143
146
 
144
147
  See [`deploy/azure/`](deploy/azure/) for Bicep templates and a full cost breakdown.
145
148
 
149
+ ## Use a hosted instance (no local DB required)
150
+
151
+ If your team already operates a docforge deployment and you only want to *use* it from your editor (Claude Code, etc.), you don't need to clone, ingest, or run Postgres locally:
152
+
153
+ ```bash
154
+ # Generic (no auth)
155
+ pip install docforge-cli
156
+ claude mcp add -s user -e DOCFORGE_API_URL=https://docforge.example.com \
157
+ docforge -- docforge serve --remote-api $DOCFORGE_API_URL
158
+
159
+ # Static Bearer token
160
+ pip install docforge-cli
161
+ claude mcp add -s user \
162
+ -e DOCFORGE_API_URL=https://docforge.example.com \
163
+ -e DOCFORGE_API_TOKEN=eyJ... \
164
+ -e DOCFORGE_AUTH=bearer \
165
+ docforge -- docforge serve --remote-api $DOCFORGE_API_URL --auth bearer
166
+
167
+ # Entra (Azure AD)
168
+ pip install docforge-cli[azure]
169
+ az login --tenant <your-tenant-id>
170
+ claude mcp add -s user \
171
+ -e DOCFORGE_API_URL=https://docforge.example.com \
172
+ -e DOCFORGE_AUDIENCE=api://<app-registration-uri> \
173
+ -e DOCFORGE_AUTH=azure \
174
+ -e DOCFORGE_TEAM=your-team \
175
+ docforge -- docforge serve --remote-api $DOCFORGE_API_URL --auth azure
176
+ ```
177
+
178
+ With `--auth azure`, `user_name` is bound to your Entra JWT subject — you can't (and don't need to) configure it.
179
+
180
+ `DOCFORGE_TEAM` is optional but recommended for team-tag relevance boosting in search results.
181
+
146
182
  ## Self-hosting / forking
147
183
 
148
184
  The embedder image bakes the EmbeddingGemma-300M model at build time,
@@ -105,6 +105,39 @@ For team-wide use, deploy the search API to Azure (~$90/month at default SKUs wi
105
105
 
106
106
  See [`deploy/azure/`](deploy/azure/) for Bicep templates and a full cost breakdown.
107
107
 
108
+ ## Use a hosted instance (no local DB required)
109
+
110
+ If your team already operates a docforge deployment and you only want to *use* it from your editor (Claude Code, etc.), you don't need to clone, ingest, or run Postgres locally:
111
+
112
+ ```bash
113
+ # Generic (no auth)
114
+ pip install docforge-cli
115
+ claude mcp add -s user -e DOCFORGE_API_URL=https://docforge.example.com \
116
+ docforge -- docforge serve --remote-api $DOCFORGE_API_URL
117
+
118
+ # Static Bearer token
119
+ pip install docforge-cli
120
+ claude mcp add -s user \
121
+ -e DOCFORGE_API_URL=https://docforge.example.com \
122
+ -e DOCFORGE_API_TOKEN=eyJ... \
123
+ -e DOCFORGE_AUTH=bearer \
124
+ docforge -- docforge serve --remote-api $DOCFORGE_API_URL --auth bearer
125
+
126
+ # Entra (Azure AD)
127
+ pip install docforge-cli[azure]
128
+ az login --tenant <your-tenant-id>
129
+ claude mcp add -s user \
130
+ -e DOCFORGE_API_URL=https://docforge.example.com \
131
+ -e DOCFORGE_AUDIENCE=api://<app-registration-uri> \
132
+ -e DOCFORGE_AUTH=azure \
133
+ -e DOCFORGE_TEAM=your-team \
134
+ docforge -- docforge serve --remote-api $DOCFORGE_API_URL --auth azure
135
+ ```
136
+
137
+ With `--auth azure`, `user_name` is bound to your Entra JWT subject — you can't (and don't need to) configure it.
138
+
139
+ `DOCFORGE_TEAM` is optional but recommended for team-tag relevance boosting in search results.
140
+
108
141
  ## Self-hosting / forking
109
142
 
110
143
  The embedder image bakes the EmbeddingGemma-300M model at build time,
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "docforge-cli"
7
- version = "0.3.0"
7
+ version = "0.4.1"
8
8
  description = "Forge searchable context from Confluence and git repos for AI coding assistants"
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}
@@ -49,6 +49,10 @@ entra = [
49
49
  # aiohttp is required by azure-identity.aio's async pipeline
50
50
  "aiohttp>=3.10,<4.0",
51
51
  ]
52
+ azure = [
53
+ "azure-identity>=1.19,<2.0",
54
+ "aiohttp>=3.10,<4.0", # required by azure-identity.aio
55
+ ]
52
56
 
53
57
  [tool.setuptools.packages.find]
54
58
  where = ["src"]
@@ -155,8 +155,8 @@ async def _auth_dependency(
155
155
 
156
156
  class SearchRequest(BaseModel):
157
157
  query: str = Field(..., max_length=8000)
158
- user_name: str
159
- team_name: str
158
+ user_name: str | None = None
159
+ team_name: str | None = None
160
160
  area_name: str | None = None
161
161
  limit: int = Field(5, ge=1, le=50)
162
162
 
@@ -203,7 +203,7 @@ async def search(
203
203
  logger.error("Embedding failed: %s", e)
204
204
  raise HTTPException(status_code=500, detail="Failed to embed query")
205
205
 
206
- user_tags = [req.team_name] + ([req.area_name] if req.area_name else [])
206
+ user_tags = [t for t in (req.team_name, req.area_name) if t]
207
207
 
208
208
  try:
209
209
  async with pool.acquire() as conn:
@@ -241,9 +241,10 @@ async def search(
241
241
 
242
242
  request_ms = int((time.perf_counter() - start) * 1000)
243
243
 
244
+ effective_user_name = user.preferred_username if user else (req.user_name or "anonymous")
244
245
  await log_query(
245
246
  pool,
246
- user.preferred_username if user else req.user_name,
247
+ effective_user_name,
247
248
  req.team_name,
248
249
  req.area_name,
249
250
  req.query,
@@ -8,6 +8,8 @@ from pathlib import Path
8
8
 
9
9
  import typer
10
10
 
11
+ from docforge.remote_client import AuthName
12
+
11
13
  app = typer.Typer(
12
14
  help="Forge searchable context from Confluence and git repos for AI coding assistants.",
13
15
  )
@@ -117,10 +119,34 @@ def search(
117
119
 
118
120
 
119
121
  @app.command()
120
- def serve(api: bool = typer.Option(False, help="Run FastAPI search API instead of MCP")):
121
- """Run the MCP server (or FastAPI API with --api)."""
122
+ def serve(
123
+ api: bool = typer.Option(False, help="Run FastAPI search API instead of MCP"),
124
+ remote_api: str | None = typer.Option(
125
+ None,
126
+ "--remote-api",
127
+ help="Run MCP backed by a remote search API at this URL",
128
+ envvar="DOCFORGE_API_URL",
129
+ ),
130
+ auth: AuthName = typer.Option(
131
+ AuthName.none,
132
+ "--auth",
133
+ help="Auth provider for --remote-api",
134
+ envvar="DOCFORGE_AUTH",
135
+ ),
136
+ ) -> None:
137
+ """Run the MCP server (or FastAPI API with --api, or remote-backed MCP with --remote-api)."""
122
138
  _setup_logging()
123
- if api:
139
+ if remote_api and api:
140
+ typer.echo("Error: --api and --remote-api are mutually exclusive.", err=True)
141
+ raise typer.Exit(1)
142
+ if auth is not AuthName.none and not remote_api:
143
+ typer.echo("Warning: --auth has no effect without --remote-api.", err=True)
144
+
145
+ if remote_api:
146
+ from docforge.remote_client import run_remote_mcp
147
+
148
+ run_remote_mcp(url=remote_api, auth_name=auth)
149
+ elif api:
124
150
  import uvicorn
125
151
 
126
152
  from docforge.api import app as fastapi_app
@@ -104,7 +104,7 @@ class Settings(BaseSettings):
104
104
  yml_path = Path("docforge.yml")
105
105
  yml_values = {}
106
106
  if yml_path.exists():
107
- with open(yml_path) as f:
107
+ with open(yml_path, encoding="utf-8") as f:
108
108
  yml = yaml.safe_load(f) or {}
109
109
  # Flatten nested embedding config
110
110
  if "embedding" in yml:
@@ -49,6 +49,32 @@ def _get_embedder() -> EmbedderProtocol:
49
49
  return _embedder
50
50
 
51
51
 
52
+ def format_search_results_markdown(
53
+ results: list[dict],
54
+ *,
55
+ empty_message: str = "No documentation found matching your query.",
56
+ ) -> str:
57
+ """Render a list of search-result dicts as the canonical Markdown shape.
58
+
59
+ Each result must have keys: similarity, source_title, source_url, text.
60
+ Optional: section_title, source_tags.
61
+ """
62
+ if not results:
63
+ return empty_message
64
+
65
+ parts: list[str] = []
66
+ for i, r in enumerate(results, 1):
67
+ header = f"**Result {i}** (relevance: {r['similarity']:.2f}) -- {r['source_title']}"
68
+ if r.get("section_title"):
69
+ header += f" > {r['section_title']}"
70
+ header += f"\nSource: {r['source_url']}"
71
+ tags = r.get("source_tags") or []
72
+ if tags:
73
+ header += f"\nTags: {', '.join(tags)}"
74
+ parts.append(f"{header}\n\n{r['text']}")
75
+ return "\n\n---\n\n".join(parts)
76
+
77
+
52
78
  @mcp.tool()
53
79
  async def search_documentation(
54
80
  query: Annotated[str, Field(max_length=8000)],
@@ -115,31 +141,23 @@ async def search_documentation(
115
141
 
116
142
  await log_query(pool, user_name, team_name, area_name, query, len(rows))
117
143
 
118
- if not rows:
119
- return (
144
+ return format_search_results_markdown(
145
+ [
146
+ {
147
+ "similarity": row["similarity"],
148
+ "source_title": row["source_title"],
149
+ "source_url": row["source_url"],
150
+ "section_title": row["section_title"],
151
+ "source_tags": list(row["source_tags"] or []),
152
+ "text": row["text"],
153
+ }
154
+ for row in rows
155
+ ],
156
+ empty_message=(
120
157
  "No documentation found matching your query. "
121
158
  "The index may be empty -- run `python -m docforge ingest` to populate it."
122
- )
123
-
124
- parts: list[str] = []
125
- for i, row in enumerate(rows, 1):
126
- similarity = row["similarity"]
127
- source = row["source_title"]
128
- url = row["source_url"]
129
- section = row["section_title"]
130
- text = row["text"]
131
- tags = list(row["source_tags"] or [])
132
-
133
- header = f"**Result {i}** (relevance: {similarity:.2f}) — {source}"
134
- if section:
135
- header += f" > {section}"
136
- header += f"\nSource: {url}"
137
- if tags:
138
- header += f"\nTags: {', '.join(tags)}"
139
-
140
- parts.append(f"{header}\n\n{text}")
141
-
142
- return "\n\n---\n\n".join(parts)
159
+ ),
160
+ )
143
161
 
144
162
 
145
163
  @mcp.tool()
@@ -0,0 +1,213 @@
1
+ """MCP server that proxies tool calls to a remote docforge search-api.
2
+
3
+ Used by `docforge serve --remote-api $URL --auth ...`. See the
4
+ 2026-05-08-docforge-remote-api-mode-design.md spec for the full design.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import os
10
+ from enum import Enum
11
+ from typing import Protocol
12
+
13
+ import httpx
14
+ from fastmcp import FastMCP
15
+
16
+
17
+ class AuthName(str, Enum):
18
+ """Selectable auth providers for the --remote-api mode."""
19
+
20
+ none = "none"
21
+ bearer = "bearer"
22
+ azure = "azure"
23
+
24
+
25
+ class AuthProvider(Protocol):
26
+ """Async source of HTTP headers attached to each remote request."""
27
+
28
+ async def headers(self) -> dict[str, str]: ...
29
+
30
+
31
+ class NoneAuth:
32
+ """No-op auth provider. Returns no headers."""
33
+
34
+ async def headers(self) -> dict[str, str]:
35
+ return {}
36
+
37
+
38
+ class BearerAuth:
39
+ """Static Bearer token from DOCFORGE_API_TOKEN env var."""
40
+
41
+ def __init__(self) -> None:
42
+ token = os.environ.get("DOCFORGE_API_TOKEN", "").strip()
43
+ if not token:
44
+ raise RuntimeError("BearerAuth requires DOCFORGE_API_TOKEN env var to be set.")
45
+ self._token = token
46
+
47
+ async def headers(self) -> dict[str, str]:
48
+ return {"Authorization": f"Bearer {self._token}"}
49
+
50
+
51
+ class AzureAuth:
52
+ """Entra Bearer token via DefaultAzureCredential.
53
+
54
+ Requires `pip install docforge-cli[azure]`. Reads target audience
55
+ from DOCFORGE_AUDIENCE env var.
56
+ """
57
+
58
+ def __init__(self) -> None:
59
+ try:
60
+ from azure.identity.aio import DefaultAzureCredential
61
+ except ImportError as e:
62
+ raise ImportError("Azure auth requires `pip install docforge-cli[azure]`.") from e
63
+
64
+ audience = os.environ.get("DOCFORGE_AUDIENCE", "").strip()
65
+ if not audience:
66
+ raise RuntimeError("AzureAuth requires DOCFORGE_AUDIENCE env var to be set.")
67
+ self._audience = audience
68
+ self._credential = DefaultAzureCredential()
69
+
70
+ async def headers(self) -> dict[str, str]:
71
+ token = await self._credential.get_token(f"{self._audience}/.default")
72
+ return {"Authorization": f"Bearer {token.token}"}
73
+
74
+
75
+ def make_auth_provider(name: AuthName | str) -> AuthProvider:
76
+ """Return an AuthProvider instance for the given name."""
77
+ try:
78
+ name = AuthName(name) if isinstance(name, str) else name
79
+ except ValueError as e:
80
+ raise ValueError(f"Unknown auth provider: {name!r}. Valid: none, bearer, azure.") from e
81
+ if name is AuthName.none:
82
+ return NoneAuth()
83
+ if name is AuthName.bearer:
84
+ return BearerAuth()
85
+ if name is AuthName.azure:
86
+ return AzureAuth()
87
+ raise ValueError(f"Unknown auth provider: {name!r}.")
88
+
89
+
90
+ class RemoteBackend:
91
+ """Proxy to a remote docforge search-api over HTTP."""
92
+
93
+ def __init__(
94
+ self,
95
+ *,
96
+ url: str,
97
+ auth: AuthProvider,
98
+ transport: httpx.AsyncBaseTransport | None = None,
99
+ ) -> None:
100
+ self._url = url.rstrip("/")
101
+ self._auth = auth
102
+ self._transport = transport
103
+ self._client: httpx.AsyncClient | None = None
104
+
105
+ async def _ensure_client(self) -> httpx.AsyncClient:
106
+ if self._client is None:
107
+ self._client = httpx.AsyncClient(transport=self._transport, timeout=30.0)
108
+ return self._client
109
+
110
+ async def aclose(self) -> None:
111
+ if self._client is not None:
112
+ await self._client.aclose()
113
+ self._client = None
114
+
115
+ def _identity_body(self) -> dict[str, str]:
116
+ out: dict[str, str] = {}
117
+ for env_var, body_key in (
118
+ ("DOCFORGE_USER", "user_name"),
119
+ ("DOCFORGE_TEAM", "team_name"),
120
+ ("DOCFORGE_AREA", "area_name"),
121
+ ):
122
+ val = os.environ.get(env_var, "").strip()
123
+ if val:
124
+ out[body_key] = val
125
+ return out
126
+
127
+ async def _request(
128
+ self,
129
+ method: str,
130
+ path: str,
131
+ *,
132
+ json: dict[str, object] | None = None,
133
+ ) -> httpx.Response | str:
134
+ """Perform an HTTP request with auth and uniform error handling.
135
+
136
+ Returns the Response on 2xx; an already-formatted error string otherwise.
137
+ """
138
+ try:
139
+ headers = await self._auth.headers()
140
+ except Exception as e:
141
+ return f"Auth provider error: {e}"
142
+
143
+ client = await self._ensure_client()
144
+ try:
145
+ resp = await client.request(method, f"{self._url}{path}", json=json, headers=headers)
146
+ except httpx.ConnectError:
147
+ return f"Could not reach remote API at {self._url}."
148
+ except httpx.HTTPError as e:
149
+ return f"Remote API error: {e}"
150
+
151
+ if resp.status_code == 401:
152
+ return "Auth failed (401). Check DOCFORGE_API_URL and the --auth provider."
153
+ if 500 <= resp.status_code < 600:
154
+ return f"Remote API error ({resp.status_code}). Try again in a moment."
155
+ if resp.status_code != 200:
156
+ return f"Remote API returned {resp.status_code}: {resp.text[:200]}"
157
+ return resp
158
+
159
+ async def search(self, *, query: str, limit: int = 5) -> str:
160
+ """Search the remote API and return Markdown-formatted results."""
161
+ body: dict[str, object] = {"query": query, "limit": limit}
162
+ body.update(self._identity_body())
163
+ result = await self._request("POST", "/search", json=body)
164
+ if isinstance(result, str):
165
+ return result
166
+
167
+ from docforge.mcp_server import format_search_results_markdown
168
+
169
+ data = result.json()
170
+ return format_search_results_markdown(data.get("results", []))
171
+
172
+ async def list_sources(self) -> str:
173
+ """List indexed sources from the remote API."""
174
+ result = await self._request("GET", "/sources")
175
+ if isinstance(result, str):
176
+ return result
177
+
178
+ data = result.json()
179
+ sources = data.get("sources", [])
180
+ if not sources:
181
+ return "No sources indexed."
182
+
183
+ lines = [f"**{data.get('count', len(sources))} indexed sources:**\n"]
184
+ for s in sources:
185
+ lines.append(f"- **{s['title']}** ({s['chunk_count']} chunks, {s['status']})")
186
+ return "\n".join(lines)
187
+
188
+
189
+ INSTRUCTIONS = (
190
+ "Search across your team's indexed documentation including team responsibilities, "
191
+ "coding guidelines, architecture standards, and cross-team interfaces. "
192
+ "Use the search_documentation tool when you need information about other teams, "
193
+ "shared coding practices, or organizational knowledge."
194
+ )
195
+
196
+
197
+ def run_remote_mcp(*, url: str, auth_name: AuthName | str = AuthName.none) -> None:
198
+ """Run an MCP server proxying tool calls to a remote docforge search-api."""
199
+ auth = make_auth_provider(auth_name)
200
+ backend = RemoteBackend(url=url, auth=auth)
201
+ mcp = FastMCP("docforge", instructions=INSTRUCTIONS)
202
+
203
+ @mcp.tool()
204
+ async def search_documentation(query: str, limit: int = 5) -> str:
205
+ """Search across indexed documentation from Confluence pages and git repos."""
206
+ return await backend.search(query=query, limit=limit)
207
+
208
+ @mcp.tool()
209
+ async def list_sources() -> str:
210
+ """List all documentation sources currently indexed."""
211
+ return await backend.list_sources()
212
+
213
+ mcp.run()
@@ -41,6 +41,6 @@ class SourcesFile(BaseModel):
41
41
 
42
42
  def load_sources(path: str | Path) -> list[SourceConfig]:
43
43
  """Load source configurations from a YAML file."""
44
- with open(path) as f:
44
+ with open(path, encoding="utf-8") as f:
45
45
  data = yaml.safe_load(f)
46
46
  return SourcesFile.model_validate(data).sources
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docforge-cli
3
- Version: 0.3.0
3
+ Version: 0.4.1
4
4
  Summary: Forge searchable context from Confluence and git repos for AI coding assistants
5
5
  License: MIT
6
6
  Project-URL: Homepage, https://GranatenUdo.github.io/docforge/
@@ -34,6 +34,9 @@ Provides-Extra: entra
34
34
  Requires-Dist: fastapi-azure-auth<6.0,>=5.0; extra == "entra"
35
35
  Requires-Dist: azure-identity<2.0,>=1.19; extra == "entra"
36
36
  Requires-Dist: aiohttp<4.0,>=3.10; extra == "entra"
37
+ Provides-Extra: azure
38
+ Requires-Dist: azure-identity<2.0,>=1.19; extra == "azure"
39
+ Requires-Dist: aiohttp<4.0,>=3.10; extra == "azure"
37
40
  Dynamic: license-file
38
41
 
39
42
  # docforge
@@ -143,6 +146,39 @@ For team-wide use, deploy the search API to Azure (~$90/month at default SKUs wi
143
146
 
144
147
  See [`deploy/azure/`](deploy/azure/) for Bicep templates and a full cost breakdown.
145
148
 
149
+ ## Use a hosted instance (no local DB required)
150
+
151
+ If your team already operates a docforge deployment and you only want to *use* it from your editor (Claude Code, etc.), you don't need to clone, ingest, or run Postgres locally:
152
+
153
+ ```bash
154
+ # Generic (no auth)
155
+ pip install docforge-cli
156
+ claude mcp add -s user -e DOCFORGE_API_URL=https://docforge.example.com \
157
+ docforge -- docforge serve --remote-api $DOCFORGE_API_URL
158
+
159
+ # Static Bearer token
160
+ pip install docforge-cli
161
+ claude mcp add -s user \
162
+ -e DOCFORGE_API_URL=https://docforge.example.com \
163
+ -e DOCFORGE_API_TOKEN=eyJ... \
164
+ -e DOCFORGE_AUTH=bearer \
165
+ docforge -- docforge serve --remote-api $DOCFORGE_API_URL --auth bearer
166
+
167
+ # Entra (Azure AD)
168
+ pip install docforge-cli[azure]
169
+ az login --tenant <your-tenant-id>
170
+ claude mcp add -s user \
171
+ -e DOCFORGE_API_URL=https://docforge.example.com \
172
+ -e DOCFORGE_AUDIENCE=api://<app-registration-uri> \
173
+ -e DOCFORGE_AUTH=azure \
174
+ -e DOCFORGE_TEAM=your-team \
175
+ docforge -- docforge serve --remote-api $DOCFORGE_API_URL --auth azure
176
+ ```
177
+
178
+ With `--auth azure`, `user_name` is bound to your Entra JWT subject — you can't (and don't need to) configure it.
179
+
180
+ `DOCFORGE_TEAM` is optional but recommended for team-tag relevance boosting in search results.
181
+
146
182
  ## Self-hosting / forking
147
183
 
148
184
  The embedder image bakes the EmbeddingGemma-300M model at build time,
@@ -13,6 +13,7 @@ src/docforge/lint.py
13
13
  src/docforge/mcp_server.py
14
14
  src/docforge/query_log.py
15
15
  src/docforge/ranking.py
16
+ src/docforge/remote_client.py
16
17
  src/docforge/sources.py
17
18
  src/docforge/crawlers/__init__.py
18
19
  src/docforge/crawlers/confluence.py
@@ -12,6 +12,10 @@ fastapi<1.0,>=0.115
12
12
  uvicorn<1.0,>=0.34
13
13
  numpy<3.0,>=1.26
14
14
 
15
+ [azure]
16
+ azure-identity<2.0,>=1.19
17
+ aiohttp<4.0,>=3.10
18
+
15
19
  [dev]
16
20
  pytest<10.0,>=9.0
17
21
  pytest-asyncio<2.0,>=1.0
File without changes
File without changes