docforge-cli 0.4.0__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/PKG-INFO +1 -1
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/pyproject.toml +1 -1
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/api.py +55 -23
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/cli.py +12 -8
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/config.py +11 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/mcp_server.py +41 -23
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/remote_client.py +61 -47
- docforge_cli-0.5.0/src/docforge/sql/migrations/007_add_chunks_text_tsv.sql +16 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge_cli.egg-info/PKG-INFO +1 -1
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge_cli.egg-info/SOURCES.txt +1 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/LICENSE +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/README.md +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/setup.cfg +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/__init__.py +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/__main__.py +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/crawlers/__init__.py +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/crawlers/confluence.py +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/crawlers/git.py +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/db.py +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/embedder_api.py +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/ingest.py +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/lint.py +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/processors/__init__.py +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/processors/chunker.py +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/processors/embedder.py +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/processors/parser.py +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/query_log.py +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/ranking.py +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/scripts/__init__.py +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/scripts/eval_search.py +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/scripts/latency_report.py +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/sources.py +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/sql/migrations/001_add_source_identifier.sql +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/sql/migrations/002_add_status_index.sql +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/sql/migrations/003_add_source_tags.sql +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/sql/migrations/004_add_query_log.sql +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/sql/migrations/005_add_query_log_user_oid.sql +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/sql/migrations/006_add_query_log_request_ms.sql +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/sql/schema.sql +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/templates/docforge.yml +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/templates/docker-compose.yml +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/templates/mcp_client.py +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/templates/sources.yml +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge_cli.egg-info/dependency_links.txt +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge_cli.egg-info/entry_points.txt +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge_cli.egg-info/requires.txt +0 -0
- {docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge_cli.egg-info/top_level.txt +0 -0
|
@@ -209,31 +209,63 @@ async def search(
|
|
|
209
209
|
async with pool.acquire() as conn:
|
|
210
210
|
rows = await conn.fetch(
|
|
211
211
|
"""
|
|
212
|
-
SELECT
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
212
|
+
WITH q_tsq AS (SELECT websearch_to_tsquery($8::regconfig, $2::text) AS q),
|
|
213
|
+
dense AS (
|
|
214
|
+
SELECT id, source_id, text, section_title,
|
|
215
|
+
ROW_NUMBER() OVER (ORDER BY dist) AS rank
|
|
216
|
+
FROM (
|
|
217
|
+
SELECT c.id, c.source_id, c.text, c.section_title,
|
|
218
|
+
c.embedding <=> $1::vector AS dist
|
|
219
|
+
FROM chunks c JOIN sources s ON c.source_id = s.id
|
|
220
|
+
WHERE s.status = 'active'
|
|
221
|
+
ORDER BY c.embedding <=> $1::vector
|
|
222
|
+
LIMIT $3
|
|
223
|
+
) AS t
|
|
224
|
+
),
|
|
225
|
+
sparse AS (
|
|
226
|
+
SELECT id, source_id, text, section_title,
|
|
227
|
+
ROW_NUMBER() OVER (ORDER BY rk DESC) AS rank
|
|
228
|
+
FROM (
|
|
229
|
+
SELECT c.id, c.source_id, c.text, c.section_title,
|
|
230
|
+
ts_rank_cd(c.text_tsv, (SELECT q FROM q_tsq)) AS rk
|
|
231
|
+
FROM chunks c JOIN sources s ON c.source_id = s.id
|
|
232
|
+
WHERE s.status = 'active'
|
|
233
|
+
AND c.text_tsv @@ (SELECT q FROM q_tsq)
|
|
234
|
+
ORDER BY ts_rank_cd(c.text_tsv, (SELECT q FROM q_tsq)) DESC
|
|
235
|
+
LIMIT $3
|
|
236
|
+
) AS t
|
|
237
|
+
),
|
|
238
|
+
fused AS (
|
|
239
|
+
SELECT COALESCE(d.id, sp.id) AS id,
|
|
240
|
+
COALESCE(d.source_id, sp.source_id) AS source_id,
|
|
241
|
+
COALESCE(d.text, sp.text) AS text,
|
|
242
|
+
COALESCE(d.section_title, sp.section_title) AS section_title,
|
|
243
|
+
COALESCE(1.0/($9 + d.rank), 0)
|
|
244
|
+
+ COALESCE(1.0/($9 + sp.rank), 0) AS rrf
|
|
245
|
+
FROM dense d FULL OUTER JOIN sparse sp ON d.id = sp.id
|
|
246
|
+
)
|
|
247
|
+
SELECT f.text, f.section_title,
|
|
248
|
+
s.title AS source_title, s.url AS source_url, s.tags AS source_tags,
|
|
249
|
+
f.rrf AS similarity,
|
|
250
|
+
f.rrf * (1
|
|
251
|
+
+ $4::float * cardinality(
|
|
252
|
+
ARRAY(SELECT unnest(s.tags) INTERSECT SELECT unnest($5::text[]))
|
|
253
|
+
)
|
|
254
|
+
+ $6::float * (CASE WHEN 'org' = ANY(s.tags) THEN 1 ELSE 0 END)
|
|
255
|
+
) AS boosted_score
|
|
256
|
+
FROM fused f JOIN sources s ON f.source_id = s.id
|
|
229
257
|
ORDER BY boosted_score DESC
|
|
230
|
-
LIMIT $
|
|
258
|
+
LIMIT $7
|
|
231
259
|
""",
|
|
232
|
-
np.array(query_vector, dtype=np.float32),
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
settings.
|
|
236
|
-
|
|
260
|
+
np.array(query_vector, dtype=np.float32), # $1
|
|
261
|
+
req.query, # $2
|
|
262
|
+
settings.hybrid_pool_size, # $3
|
|
263
|
+
settings.tag_match_weight, # $4
|
|
264
|
+
user_tags, # $5
|
|
265
|
+
settings.org_tag_weight, # $6
|
|
266
|
+
req.limit, # $7
|
|
267
|
+
settings.fts_language, # $8
|
|
268
|
+
settings.rrf_k, # $9
|
|
237
269
|
)
|
|
238
270
|
except Exception as e:
|
|
239
271
|
logger.error("Database error during search: %s", e)
|
|
@@ -8,6 +8,8 @@ from pathlib import Path
|
|
|
8
8
|
|
|
9
9
|
import typer
|
|
10
10
|
|
|
11
|
+
from docforge.remote_client import AuthName
|
|
12
|
+
|
|
11
13
|
app = typer.Typer(
|
|
12
14
|
help="Forge searchable context from Confluence and git repos for AI coding assistants.",
|
|
13
15
|
)
|
|
@@ -125,24 +127,26 @@ def serve(
|
|
|
125
127
|
help="Run MCP backed by a remote search API at this URL",
|
|
126
128
|
envvar="DOCFORGE_API_URL",
|
|
127
129
|
),
|
|
128
|
-
auth:
|
|
129
|
-
|
|
130
|
+
auth: AuthName = typer.Option(
|
|
131
|
+
AuthName.none,
|
|
130
132
|
"--auth",
|
|
131
|
-
help="Auth provider for --remote-api
|
|
133
|
+
help="Auth provider for --remote-api",
|
|
132
134
|
envvar="DOCFORGE_AUTH",
|
|
133
135
|
),
|
|
134
136
|
) -> None:
|
|
135
137
|
"""Run the MCP server (or FastAPI API with --api, or remote-backed MCP with --remote-api)."""
|
|
136
138
|
_setup_logging()
|
|
139
|
+
if remote_api and api:
|
|
140
|
+
typer.echo("Error: --api and --remote-api are mutually exclusive.", err=True)
|
|
141
|
+
raise typer.Exit(1)
|
|
142
|
+
if auth is not AuthName.none and not remote_api:
|
|
143
|
+
typer.echo("Warning: --auth has no effect without --remote-api.", err=True)
|
|
144
|
+
|
|
137
145
|
if remote_api:
|
|
138
|
-
if api:
|
|
139
|
-
typer.echo("Error: --api and --remote-api are mutually exclusive.", err=True)
|
|
140
|
-
raise typer.Exit(1)
|
|
141
146
|
from docforge.remote_client import run_remote_mcp
|
|
142
147
|
|
|
143
148
|
run_remote_mcp(url=remote_api, auth_name=auth)
|
|
144
|
-
|
|
145
|
-
if api:
|
|
149
|
+
elif api:
|
|
146
150
|
import uvicorn
|
|
147
151
|
|
|
148
152
|
from docforge.api import app as fastapi_app
|
|
@@ -66,6 +66,17 @@ class Settings(BaseSettings):
|
|
|
66
66
|
tag_match_weight: float = 0.1
|
|
67
67
|
org_tag_weight: float = 0.05
|
|
68
68
|
|
|
69
|
+
# Hybrid retrieval (RRF over dense + sparse). rrf_k=60 matches the universal
|
|
70
|
+
# default (Azure AI Search, Elasticsearch, OpenSearch); higher k flattens
|
|
71
|
+
# the rank distribution, lower amplifies. hybrid_pool_size is the top-N
|
|
72
|
+
# from each retriever feeding RRF — 4-10x req.limit is the standard rule,
|
|
73
|
+
# and req.limit caps at 50 so 100 covers under-recalled queries with margin.
|
|
74
|
+
# fts_language is the Postgres text-search config; switch to 'simple' if
|
|
75
|
+
# non-English content appears in the corpus.
|
|
76
|
+
rrf_k: int = 60
|
|
77
|
+
hybrid_pool_size: int = 100
|
|
78
|
+
fts_language: str = "english"
|
|
79
|
+
|
|
69
80
|
# Default identity (used as CLI flag defaults when set via env/yml)
|
|
70
81
|
default_user_name: str = ""
|
|
71
82
|
default_team_name: str = ""
|
|
@@ -49,6 +49,32 @@ def _get_embedder() -> EmbedderProtocol:
|
|
|
49
49
|
return _embedder
|
|
50
50
|
|
|
51
51
|
|
|
52
|
+
def format_search_results_markdown(
|
|
53
|
+
results: list[dict],
|
|
54
|
+
*,
|
|
55
|
+
empty_message: str = "No documentation found matching your query.",
|
|
56
|
+
) -> str:
|
|
57
|
+
"""Render a list of search-result dicts as the canonical Markdown shape.
|
|
58
|
+
|
|
59
|
+
Each result must have keys: similarity, source_title, source_url, text.
|
|
60
|
+
Optional: section_title, source_tags.
|
|
61
|
+
"""
|
|
62
|
+
if not results:
|
|
63
|
+
return empty_message
|
|
64
|
+
|
|
65
|
+
parts: list[str] = []
|
|
66
|
+
for i, r in enumerate(results, 1):
|
|
67
|
+
header = f"**Result {i}** (relevance: {r['similarity']:.2f}) -- {r['source_title']}"
|
|
68
|
+
if r.get("section_title"):
|
|
69
|
+
header += f" > {r['section_title']}"
|
|
70
|
+
header += f"\nSource: {r['source_url']}"
|
|
71
|
+
tags = r.get("source_tags") or []
|
|
72
|
+
if tags:
|
|
73
|
+
header += f"\nTags: {', '.join(tags)}"
|
|
74
|
+
parts.append(f"{header}\n\n{r['text']}")
|
|
75
|
+
return "\n\n---\n\n".join(parts)
|
|
76
|
+
|
|
77
|
+
|
|
52
78
|
@mcp.tool()
|
|
53
79
|
async def search_documentation(
|
|
54
80
|
query: Annotated[str, Field(max_length=8000)],
|
|
@@ -115,31 +141,23 @@ async def search_documentation(
|
|
|
115
141
|
|
|
116
142
|
await log_query(pool, user_name, team_name, area_name, query, len(rows))
|
|
117
143
|
|
|
118
|
-
|
|
119
|
-
|
|
144
|
+
return format_search_results_markdown(
|
|
145
|
+
[
|
|
146
|
+
{
|
|
147
|
+
"similarity": row["similarity"],
|
|
148
|
+
"source_title": row["source_title"],
|
|
149
|
+
"source_url": row["source_url"],
|
|
150
|
+
"section_title": row["section_title"],
|
|
151
|
+
"source_tags": list(row["source_tags"] or []),
|
|
152
|
+
"text": row["text"],
|
|
153
|
+
}
|
|
154
|
+
for row in rows
|
|
155
|
+
],
|
|
156
|
+
empty_message=(
|
|
120
157
|
"No documentation found matching your query. "
|
|
121
158
|
"The index may be empty -- run `python -m docforge ingest` to populate it."
|
|
122
|
-
)
|
|
123
|
-
|
|
124
|
-
parts: list[str] = []
|
|
125
|
-
for i, row in enumerate(rows, 1):
|
|
126
|
-
similarity = row["similarity"]
|
|
127
|
-
source = row["source_title"]
|
|
128
|
-
url = row["source_url"]
|
|
129
|
-
section = row["section_title"]
|
|
130
|
-
text = row["text"]
|
|
131
|
-
tags = list(row["source_tags"] or [])
|
|
132
|
-
|
|
133
|
-
header = f"**Result {i}** (relevance: {similarity:.2f}) — {source}"
|
|
134
|
-
if section:
|
|
135
|
-
header += f" > {section}"
|
|
136
|
-
header += f"\nSource: {url}"
|
|
137
|
-
if tags:
|
|
138
|
-
header += f"\nTags: {', '.join(tags)}"
|
|
139
|
-
|
|
140
|
-
parts.append(f"{header}\n\n{text}")
|
|
141
|
-
|
|
142
|
-
return "\n\n---\n\n".join(parts)
|
|
159
|
+
),
|
|
160
|
+
)
|
|
143
161
|
|
|
144
162
|
|
|
145
163
|
@mcp.tool()
|
|
@@ -7,12 +7,21 @@ Used by `docforge serve --remote-api $URL --auth ...`. See the
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
9
|
import os
|
|
10
|
+
from enum import Enum
|
|
10
11
|
from typing import Protocol
|
|
11
12
|
|
|
12
13
|
import httpx
|
|
13
14
|
from fastmcp import FastMCP
|
|
14
15
|
|
|
15
16
|
|
|
17
|
+
class AuthName(str, Enum):
|
|
18
|
+
"""Selectable auth providers for the --remote-api mode."""
|
|
19
|
+
|
|
20
|
+
none = "none"
|
|
21
|
+
bearer = "bearer"
|
|
22
|
+
azure = "azure"
|
|
23
|
+
|
|
24
|
+
|
|
16
25
|
class AuthProvider(Protocol):
|
|
17
26
|
"""Async source of HTTP headers attached to each remote request."""
|
|
18
27
|
|
|
@@ -63,15 +72,19 @@ class AzureAuth:
|
|
|
63
72
|
return {"Authorization": f"Bearer {token.token}"}
|
|
64
73
|
|
|
65
74
|
|
|
66
|
-
def make_auth_provider(name: str) -> AuthProvider:
|
|
75
|
+
def make_auth_provider(name: AuthName | str) -> AuthProvider:
|
|
67
76
|
"""Return an AuthProvider instance for the given name."""
|
|
68
|
-
|
|
77
|
+
try:
|
|
78
|
+
name = AuthName(name) if isinstance(name, str) else name
|
|
79
|
+
except ValueError as e:
|
|
80
|
+
raise ValueError(f"Unknown auth provider: {name!r}. Valid: none, bearer, azure.") from e
|
|
81
|
+
if name is AuthName.none:
|
|
69
82
|
return NoneAuth()
|
|
70
|
-
if name
|
|
83
|
+
if name is AuthName.bearer:
|
|
71
84
|
return BearerAuth()
|
|
72
|
-
if name
|
|
85
|
+
if name is AuthName.azure:
|
|
73
86
|
return AzureAuth()
|
|
74
|
-
raise ValueError(f"Unknown auth provider: {name!r}.
|
|
87
|
+
raise ValueError(f"Unknown auth provider: {name!r}.")
|
|
75
88
|
|
|
76
89
|
|
|
77
90
|
class RemoteBackend:
|
|
@@ -86,7 +99,18 @@ class RemoteBackend:
|
|
|
86
99
|
) -> None:
|
|
87
100
|
self._url = url.rstrip("/")
|
|
88
101
|
self._auth = auth
|
|
89
|
-
self._transport = transport
|
|
102
|
+
self._transport = transport
|
|
103
|
+
self._client: httpx.AsyncClient | None = None
|
|
104
|
+
|
|
105
|
+
async def _ensure_client(self) -> httpx.AsyncClient:
|
|
106
|
+
if self._client is None:
|
|
107
|
+
self._client = httpx.AsyncClient(transport=self._transport, timeout=30.0)
|
|
108
|
+
return self._client
|
|
109
|
+
|
|
110
|
+
async def aclose(self) -> None:
|
|
111
|
+
if self._client is not None:
|
|
112
|
+
await self._client.aclose()
|
|
113
|
+
self._client = None
|
|
90
114
|
|
|
91
115
|
def _identity_body(self) -> dict[str, str]:
|
|
92
116
|
out: dict[str, str] = {}
|
|
@@ -100,18 +124,25 @@ class RemoteBackend:
|
|
|
100
124
|
out[body_key] = val
|
|
101
125
|
return out
|
|
102
126
|
|
|
103
|
-
async def
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
127
|
+
async def _request(
|
|
128
|
+
self,
|
|
129
|
+
method: str,
|
|
130
|
+
path: str,
|
|
131
|
+
*,
|
|
132
|
+
json: dict[str, object] | None = None,
|
|
133
|
+
) -> httpx.Response | str:
|
|
134
|
+
"""Perform an HTTP request with auth and uniform error handling.
|
|
135
|
+
|
|
136
|
+
Returns the Response on 2xx; an already-formatted error string otherwise.
|
|
137
|
+
"""
|
|
107
138
|
try:
|
|
108
139
|
headers = await self._auth.headers()
|
|
109
140
|
except Exception as e:
|
|
110
141
|
return f"Auth provider error: {e}"
|
|
111
142
|
|
|
143
|
+
client = await self._ensure_client()
|
|
112
144
|
try:
|
|
113
|
-
|
|
114
|
-
resp = await client.post(f"{self._url}/search", json=body, headers=headers)
|
|
145
|
+
resp = await client.request(method, f"{self._url}{path}", json=json, headers=headers)
|
|
115
146
|
except httpx.ConnectError:
|
|
116
147
|
return f"Could not reach remote API at {self._url}."
|
|
117
148
|
except httpx.HTTPError as e:
|
|
@@ -123,45 +154,28 @@ class RemoteBackend:
|
|
|
123
154
|
return f"Remote API error ({resp.status_code}). Try again in a moment."
|
|
124
155
|
if resp.status_code != 200:
|
|
125
156
|
return f"Remote API returned {resp.status_code}: {resp.text[:200]}"
|
|
157
|
+
return resp
|
|
126
158
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
header = f"**Result {i}** (relevance: {r['similarity']:.2f}) -- {r['source_title']}"
|
|
135
|
-
if r.get("section_title"):
|
|
136
|
-
header += f" > {r['section_title']}"
|
|
137
|
-
header += f"\nSource: {r['source_url']}"
|
|
138
|
-
tags = r.get("source_tags") or []
|
|
139
|
-
if tags:
|
|
140
|
-
header += f"\nTags: {', '.join(tags)}"
|
|
141
|
-
parts.append(f"{header}\n\n{r['text']}")
|
|
142
|
-
return "\n\n---\n\n".join(parts)
|
|
159
|
+
async def search(self, *, query: str, limit: int = 5) -> str:
|
|
160
|
+
"""Search the remote API and return Markdown-formatted results."""
|
|
161
|
+
body: dict[str, object] = {"query": query, "limit": limit}
|
|
162
|
+
body.update(self._identity_body())
|
|
163
|
+
result = await self._request("POST", "/search", json=body)
|
|
164
|
+
if isinstance(result, str):
|
|
165
|
+
return result
|
|
143
166
|
|
|
144
|
-
|
|
145
|
-
"""List indexed sources from the remote API."""
|
|
146
|
-
try:
|
|
147
|
-
headers = await self._auth.headers()
|
|
148
|
-
except Exception as e:
|
|
149
|
-
return f"Auth provider error: {e}"
|
|
167
|
+
from docforge.mcp_server import format_search_results_markdown
|
|
150
168
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
resp = await client.get(f"{self._url}/sources", headers=headers)
|
|
154
|
-
except httpx.ConnectError:
|
|
155
|
-
return f"Could not reach remote API at {self._url}."
|
|
156
|
-
except httpx.HTTPError as e:
|
|
157
|
-
return f"Remote API error: {e}"
|
|
169
|
+
data = result.json()
|
|
170
|
+
return format_search_results_markdown(data.get("results", []))
|
|
158
171
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
172
|
+
async def list_sources(self) -> str:
|
|
173
|
+
"""List indexed sources from the remote API."""
|
|
174
|
+
result = await self._request("GET", "/sources")
|
|
175
|
+
if isinstance(result, str):
|
|
176
|
+
return result
|
|
163
177
|
|
|
164
|
-
data =
|
|
178
|
+
data = result.json()
|
|
165
179
|
sources = data.get("sources", [])
|
|
166
180
|
if not sources:
|
|
167
181
|
return "No sources indexed."
|
|
@@ -180,7 +194,7 @@ INSTRUCTIONS = (
|
|
|
180
194
|
)
|
|
181
195
|
|
|
182
196
|
|
|
183
|
-
def run_remote_mcp(*, url: str, auth_name: str =
|
|
197
|
+
def run_remote_mcp(*, url: str, auth_name: AuthName | str = AuthName.none) -> None:
|
|
184
198
|
"""Run an MCP server proxying tool calls to a remote docforge search-api."""
|
|
185
199
|
auth = make_auth_provider(auth_name)
|
|
186
200
|
backend = RemoteBackend(url=url, auth=auth)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
-- Migration 007: add tsvector column and GIN index for hybrid retrieval.
|
|
2
|
+
--
|
|
3
|
+
-- text_tsv is GENERATED ALWAYS AS STORED, so Postgres backfills existing
|
|
4
|
+
-- rows as part of the ALTER TABLE and auto-populates on every INSERT.
|
|
5
|
+
-- No application changes required for ingest.
|
|
6
|
+
--
|
|
7
|
+
-- The GIN index is built non-concurrently. For the current chunk count
|
|
8
|
+
-- (~tens of thousands) this is sub-second. If chunks grows past ~1M
|
|
9
|
+
-- rows, switch a future migration to CREATE INDEX CONCURRENTLY (which
|
|
10
|
+
-- requires running outside a transaction).
|
|
11
|
+
|
|
12
|
+
ALTER TABLE chunks
|
|
13
|
+
ADD COLUMN IF NOT EXISTS text_tsv tsvector
|
|
14
|
+
GENERATED ALWAYS AS (to_tsvector('english', text)) STORED;
|
|
15
|
+
|
|
16
|
+
CREATE INDEX IF NOT EXISTS chunks_text_tsv_idx ON chunks USING GIN (text_tsv);
|
|
@@ -32,6 +32,7 @@ src/docforge/sql/migrations/003_add_source_tags.sql
|
|
|
32
32
|
src/docforge/sql/migrations/004_add_query_log.sql
|
|
33
33
|
src/docforge/sql/migrations/005_add_query_log_user_oid.sql
|
|
34
34
|
src/docforge/sql/migrations/006_add_query_log_request_ms.sql
|
|
35
|
+
src/docforge/sql/migrations/007_add_chunks_text_tsv.sql
|
|
35
36
|
src/docforge/templates/docforge.yml
|
|
36
37
|
src/docforge/templates/docker-compose.yml
|
|
37
38
|
src/docforge/templates/mcp_client.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/sql/migrations/001_add_source_identifier.sql
RENAMED
|
File without changes
|
{docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/sql/migrations/002_add_status_index.sql
RENAMED
|
File without changes
|
{docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/sql/migrations/003_add_source_tags.sql
RENAMED
|
File without changes
|
|
File without changes
|
{docforge_cli-0.4.0 → docforge_cli-0.5.0}/src/docforge/sql/migrations/005_add_query_log_user_oid.sql
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|