afs-server 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- afs_server-0.1.0/.gitignore +41 -0
- afs_server-0.1.0/PKG-INFO +76 -0
- afs_server-0.1.0/README.md +50 -0
- afs_server-0.1.0/pyproject.toml +55 -0
- afs_server-0.1.0/src/afs_server/__init__.py +3 -0
- afs_server-0.1.0/src/afs_server/app.py +75 -0
- afs_server-0.1.0/src/afs_server/auth.py +70 -0
- afs_server-0.1.0/src/afs_server/dependencies.py +51 -0
- afs_server-0.1.0/src/afs_server/extraction/__init__.py +48 -0
- afs_server-0.1.0/src/afs_server/extraction/pipeline.py +50 -0
- afs_server-0.1.0/src/afs_server/extraction/text_native.py +41 -0
- afs_server-0.1.0/src/afs_server/mcp/__init__.py +5 -0
- afs_server-0.1.0/src/afs_server/mcp/server.py +86 -0
- afs_server-0.1.0/src/afs_server/py.typed +0 -0
- afs_server-0.1.0/src/afs_server/routers/__init__.py +1 -0
- afs_server-0.1.0/src/afs_server/routers/fs.py +47 -0
- afs_server-0.1.0/src/afs_server/routers/ingest.py +38 -0
- afs_server-0.1.0/src/afs_server/routers/meta.py +38 -0
- afs_server-0.1.0/src/afs_server/schemas.py +38 -0
- afs_server-0.1.0/src/afs_server/services/__init__.py +6 -0
- afs_server-0.1.0/src/afs_server/services/fs.py +95 -0
- afs_server-0.1.0/src/afs_server/services/ingest.py +153 -0
- afs_server-0.1.0/src/afs_server/settings.py +40 -0
- afs_server-0.1.0/src/afs_server/stores/__init__.py +83 -0
- afs_server-0.1.0/src/afs_server/stores/catalog_dynamodb.py +434 -0
- afs_server-0.1.0/src/afs_server/stores/objects_s3.py +202 -0
- afs_server-0.1.0/tests/test_app.py +124 -0
- afs_server-0.1.0/tests/test_catalog_dynamodb_conformance.py +65 -0
- afs_server-0.1.0/tests/test_extraction.py +59 -0
- afs_server-0.1.0/tests/test_fs_service.py +89 -0
- afs_server-0.1.0/tests/test_ingest_service.py +73 -0
- afs_server-0.1.0/tests/test_mcp.py +74 -0
- afs_server-0.1.0/tests/test_objects_s3_conformance.py +26 -0
- afs_server-0.1.0/tests/test_registry.py +33 -0
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# --- OS / editor ---
|
|
2
|
+
.DS_Store
|
|
3
|
+
Thumbs.db
|
|
4
|
+
*.swp
|
|
5
|
+
.idea/
|
|
6
|
+
.vscode/
|
|
7
|
+
|
|
8
|
+
# --- Python (packages land in M0+) ---
|
|
9
|
+
__pycache__/
|
|
10
|
+
*.py[cod]
|
|
11
|
+
.venv/
|
|
12
|
+
venv/
|
|
13
|
+
.uv/
|
|
14
|
+
*.egg-info/
|
|
15
|
+
.ruff_cache/
|
|
16
|
+
.pytest_cache/
|
|
17
|
+
.mypy_cache/
|
|
18
|
+
.ty_cache/
|
|
19
|
+
dist/
|
|
20
|
+
build/
|
|
21
|
+
|
|
22
|
+
# --- Node (workers/mcp-edge lands later) ---
|
|
23
|
+
node_modules/
|
|
24
|
+
npm-debug.log*
|
|
25
|
+
|
|
26
|
+
# --- Secrets / local env ---
|
|
27
|
+
.env
|
|
28
|
+
.env.*
|
|
29
|
+
!.env.example
|
|
30
|
+
*.secret.*
|
|
31
|
+
|
|
32
|
+
# --- Terraform ---
|
|
33
|
+
# Detailed Terraform ignores live in terraform/.gitignore; these catch any
|
|
34
|
+
# stray state/plan artifacts produced outside that tree.
|
|
35
|
+
*.tfstate
|
|
36
|
+
*.tfstate.*
|
|
37
|
+
*.tfplan
|
|
38
|
+
.terraform/
|
|
39
|
+
|
|
40
|
+
# Agent worktrees (isolated background-agent checkouts)
|
|
41
|
+
.claude/worktrees/
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: afs-server
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: agentic-fs server: stores, services, REST + MCP. Implements the afs-core contracts.
|
|
5
|
+
Project-URL: Homepage, https://github.com/vivekkhimani/agentic-fs
|
|
6
|
+
Project-URL: Repository, https://github.com/vivekkhimani/agentic-fs
|
|
7
|
+
Project-URL: Issues, https://github.com/vivekkhimani/agentic-fs/issues
|
|
8
|
+
Author-email: Vivek Khimani <vivekkhimani07@gmail.com>
|
|
9
|
+
License-Expression: Apache-2.0
|
|
10
|
+
Keywords: agentic-fs,agents,dynamodb,fastapi,filesystem,mcp,s3
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Framework :: FastAPI
|
|
13
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Typing :: Typed
|
|
16
|
+
Requires-Python: >=3.12
|
|
17
|
+
Requires-Dist: afs-core
|
|
18
|
+
Requires-Dist: boto3>=1.34
|
|
19
|
+
Requires-Dist: fastapi>=0.115
|
|
20
|
+
Requires-Dist: fastmcp>=2
|
|
21
|
+
Requires-Dist: pydantic-settings>=2.2
|
|
22
|
+
Requires-Dist: uvicorn[standard]>=0.30
|
|
23
|
+
Provides-Extra: postgres
|
|
24
|
+
Provides-Extra: search
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
|
|
27
|
+
# afs-server
|
|
28
|
+
|
|
29
|
+
The agentic-fs service: the concrete backends (stores, search, extraction), the
|
|
30
|
+
services, and the REST + MCP surface. Implements the `afs-core` contracts.
|
|
31
|
+
|
|
32
|
+
## Status
|
|
33
|
+
|
|
34
|
+
Store layer (in progress):
|
|
35
|
+
|
|
36
|
+
- `afs_server.settings` — `AFS_*` env config; every swappable layer is selected
|
|
37
|
+
by a backend *name* and every AWS-shaped backend takes an `endpoint_url`
|
|
38
|
+
override.
|
|
39
|
+
- `afs_server.stores` — the **store registry**: `get_object_store(settings)`
|
|
40
|
+
selects a builtin or an installed plugin (`afs.object_stores` entry-point group).
|
|
41
|
+
- `afs_server.stores.objects_s3.S3ObjectStore` — the S3 `ObjectStore`. Because it
|
|
42
|
+
speaks plain S3, it *is* your store for any S3-compatible endpoint (MinIO,
|
|
43
|
+
Cloudflare R2, Wasabi, Backblaze B2) via `AFS_S3_ENDPOINT_URL` — no code change.
|
|
44
|
+
- `afs_server.stores.catalog_dynamodb.DynamoDBCatalogStore` — the DynamoDB
|
|
45
|
+
`CatalogStore` over the single-table schema (`AFS_DYNAMODB_ENDPOINT_URL` points
|
|
46
|
+
at DynamoDB Local for dev).
|
|
47
|
+
|
|
48
|
+
Both stores are certified by the afs-core conformance kits via `moto`.
|
|
49
|
+
|
|
50
|
+
- `afs_server.services.FsService` — the read path (`list` / `stat` / ranged
|
|
51
|
+
`read`) over the stores, with scope + namespace enforcement and 404-not-403
|
|
52
|
+
misses.
|
|
53
|
+
- `afs_server.app` — the FastAPI app: `/v1/healthz`, `/readyz`, `/me`, and
|
|
54
|
+
`fs/{ns}/{entries,stat,doc}`; dev auth (static principal, never prod); every
|
|
55
|
+
`AfsError` rendered as RFC 9457 `problem+json`.
|
|
56
|
+
- `afs_server.mcp` — the MCP surface mounted at `/mcp` (FastMCP): `whoami`,
|
|
57
|
+
`fs_list`, `fs_stat`, `fs_read` over the *same* `FsService` (in-process, no HTTP
|
|
58
|
+
self-calls). The full middleware chain + remaining tools land with their slices.
|
|
59
|
+
|
|
60
|
+
The image (`../../Dockerfile`) runs this app on Lambda / Fargate / locally;
|
|
61
|
+
`make dev` from the repo root runs it against MinIO + DynamoDB Local. Coming
|
|
62
|
+
next: the MCP mount (shares `FsService` in-process).
|
|
63
|
+
|
|
64
|
+
## Swapping a backend (plug-and-play)
|
|
65
|
+
|
|
66
|
+
See [`docs/swap-guides/`](../../docs/swap-guides/). In short: S3-compatible
|
|
67
|
+
storage needs only an env var; anything else implements the `ObjectStore`
|
|
68
|
+
Protocol, registers an entry point, and certifies against
|
|
69
|
+
`afs_core.testing.ObjectStoreConformance`.
|
|
70
|
+
|
|
71
|
+
## Develop
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
uv sync
|
|
75
|
+
uv run pytest packages/afs-server # conformance kits run against moto
|
|
76
|
+
```
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# afs-server
|
|
2
|
+
|
|
3
|
+
The agentic-fs service: the concrete backends (stores, search, extraction), the
|
|
4
|
+
services, and the REST + MCP surface. Implements the `afs-core` contracts.
|
|
5
|
+
|
|
6
|
+
## Status
|
|
7
|
+
|
|
8
|
+
Store layer (in progress):
|
|
9
|
+
|
|
10
|
+
- `afs_server.settings` — `AFS_*` env config; every swappable layer is selected
|
|
11
|
+
by a backend *name* and every AWS-shaped backend takes an `endpoint_url`
|
|
12
|
+
override.
|
|
13
|
+
- `afs_server.stores` — the **store registry**: `get_object_store(settings)`
|
|
14
|
+
selects a builtin or an installed plugin (`afs.object_stores` entry-point group).
|
|
15
|
+
- `afs_server.stores.objects_s3.S3ObjectStore` — the S3 `ObjectStore`. Because it
|
|
16
|
+
speaks plain S3, it *is* your store for any S3-compatible endpoint (MinIO,
|
|
17
|
+
Cloudflare R2, Wasabi, Backblaze B2) via `AFS_S3_ENDPOINT_URL` — no code change.
|
|
18
|
+
- `afs_server.stores.catalog_dynamodb.DynamoDBCatalogStore` — the DynamoDB
|
|
19
|
+
`CatalogStore` over the single-table schema (`AFS_DYNAMODB_ENDPOINT_URL` points
|
|
20
|
+
at DynamoDB Local for dev).
|
|
21
|
+
|
|
22
|
+
Both stores are certified by the afs-core conformance kits via `moto`.
|
|
23
|
+
|
|
24
|
+
- `afs_server.services.FsService` — the read path (`list` / `stat` / ranged
|
|
25
|
+
`read`) over the stores, with scope + namespace enforcement and 404-not-403
|
|
26
|
+
misses.
|
|
27
|
+
- `afs_server.app` — the FastAPI app: `/v1/healthz`, `/readyz`, `/me`, and
|
|
28
|
+
`fs/{ns}/{entries,stat,doc}`; dev auth (static principal, never prod); every
|
|
29
|
+
`AfsError` rendered as RFC 9457 `problem+json`.
|
|
30
|
+
- `afs_server.mcp` — the MCP surface mounted at `/mcp` (FastMCP): `whoami`,
|
|
31
|
+
`fs_list`, `fs_stat`, `fs_read` over the *same* `FsService` (in-process, no HTTP
|
|
32
|
+
self-calls). The full middleware chain + remaining tools land with their slices.
|
|
33
|
+
|
|
34
|
+
The image (`../../Dockerfile`) runs this app on Lambda / Fargate / locally;
|
|
35
|
+
`make dev` from the repo root runs it against MinIO + DynamoDB Local. Coming
|
|
36
|
+
next: the MCP mount (shares `FsService` in-process).
|
|
37
|
+
|
|
38
|
+
## Swapping a backend (plug-and-play)
|
|
39
|
+
|
|
40
|
+
See [`docs/swap-guides/`](../../docs/swap-guides/). In short: S3-compatible
|
|
41
|
+
storage needs only an env var; anything else implements the `ObjectStore`
|
|
42
|
+
Protocol, registers an entry point, and certifies against
|
|
43
|
+
`afs_core.testing.ObjectStoreConformance`.
|
|
44
|
+
|
|
45
|
+
## Develop
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
uv sync
|
|
49
|
+
uv run pytest packages/afs-server # conformance kits run against moto
|
|
50
|
+
```
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "afs-server"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "agentic-fs server: stores, services, REST + MCP. Implements the afs-core contracts."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.12"
|
|
7
|
+
license = "Apache-2.0"
|
|
8
|
+
authors = [{ name = "Vivek Khimani", email = "vivekkhimani07@gmail.com" }]
|
|
9
|
+
keywords = [
|
|
10
|
+
"agentic-fs",
|
|
11
|
+
"agents",
|
|
12
|
+
"mcp",
|
|
13
|
+
"fastapi",
|
|
14
|
+
"s3",
|
|
15
|
+
"dynamodb",
|
|
16
|
+
"filesystem",
|
|
17
|
+
]
|
|
18
|
+
classifiers = [
|
|
19
|
+
"Development Status :: 3 - Alpha",
|
|
20
|
+
"License :: OSI Approved :: Apache Software License",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Typing :: Typed",
|
|
23
|
+
"Framework :: FastAPI",
|
|
24
|
+
]
|
|
25
|
+
dependencies = [
|
|
26
|
+
"afs-core",
|
|
27
|
+
"boto3>=1.34",
|
|
28
|
+
"pydantic-settings>=2.2",
|
|
29
|
+
"fastapi>=0.115",
|
|
30
|
+
"uvicorn[standard]>=0.30",
|
|
31
|
+
"fastmcp>=2",
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
[project.optional-dependencies]
|
|
35
|
+
# Reserved for future backends. Intentionally empty until the dependencies
|
|
36
|
+
# exist — declaring the names now keeps the install surface stable.
|
|
37
|
+
# TODO: add the Postgres catalog backend deps when that store lands.
|
|
38
|
+
postgres = []
|
|
39
|
+
# TODO: add the search/vector backend deps when that store lands.
|
|
40
|
+
search = []
|
|
41
|
+
|
|
42
|
+
[project.urls]
|
|
43
|
+
Homepage = "https://github.com/vivekkhimani/agentic-fs"
|
|
44
|
+
Repository = "https://github.com/vivekkhimani/agentic-fs"
|
|
45
|
+
Issues = "https://github.com/vivekkhimani/agentic-fs/issues"
|
|
46
|
+
|
|
47
|
+
[build-system]
|
|
48
|
+
requires = ["hatchling"]
|
|
49
|
+
build-backend = "hatchling.build"
|
|
50
|
+
|
|
51
|
+
[tool.hatch.build.targets.wheel]
|
|
52
|
+
packages = ["src/afs_server"]
|
|
53
|
+
|
|
54
|
+
[tool.uv.sources]
|
|
55
|
+
afs-core = { workspace = true }
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""ASGI application factory.
|
|
2
|
+
|
|
3
|
+
Assembles the REST surface + the MCP mount (sharing one ``FsService`` in-process,
|
|
4
|
+
no HTTP self-calls), wires the configured stores, and renders every ``AfsError``
|
|
5
|
+
as an RFC 9457 ``application/problem+json`` envelope.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
from collections.abc import AsyncIterator
|
|
12
|
+
from contextlib import asynccontextmanager
|
|
13
|
+
|
|
14
|
+
from fastapi import FastAPI, Request
|
|
15
|
+
from fastapi.responses import JSONResponse
|
|
16
|
+
|
|
17
|
+
from afs_core.errors import AfsError
|
|
18
|
+
from afs_server import __version__
|
|
19
|
+
from afs_server.extraction import build_pipeline
|
|
20
|
+
from afs_server.mcp import build_mcp
|
|
21
|
+
from afs_server.routers import fs, ingest, meta
|
|
22
|
+
from afs_server.services import FsService
|
|
23
|
+
from afs_server.settings import load_settings
|
|
24
|
+
from afs_server.stores import get_catalog_store, get_object_store
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger("afs_server")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
async def _afs_error_handler(request: Request, exc: AfsError) -> JSONResponse:
|
|
30
|
+
return JSONResponse(
|
|
31
|
+
status_code=exc.http_status,
|
|
32
|
+
content=exc.to_problem(instance=request.url.path),
|
|
33
|
+
media_type="application/problem+json",
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def create_app() -> FastAPI:
|
|
38
|
+
settings = load_settings()
|
|
39
|
+
# Stores are lazy (no I/O / credentials at construction), so we can build the
|
|
40
|
+
# service + MCP server now and share the service between REST and MCP.
|
|
41
|
+
catalog = get_catalog_store(settings)
|
|
42
|
+
objects = get_object_store(settings)
|
|
43
|
+
fs_service = FsService(catalog, objects)
|
|
44
|
+
extraction_pipeline = build_pipeline()
|
|
45
|
+
|
|
46
|
+
mcp_app = build_mcp(fs_service, settings).http_app(path="/")
|
|
47
|
+
|
|
48
|
+
@asynccontextmanager
|
|
49
|
+
async def lifespan(app: FastAPI) -> AsyncIterator[None]:
|
|
50
|
+
app.state.settings = settings
|
|
51
|
+
app.state.catalog = catalog
|
|
52
|
+
app.state.objects = objects
|
|
53
|
+
app.state.extraction_pipeline = extraction_pipeline
|
|
54
|
+
logger.info(
|
|
55
|
+
"afs-server %s started (object_store=%s, catalog=%s, auth=%s)",
|
|
56
|
+
__version__,
|
|
57
|
+
settings.object_store_backend,
|
|
58
|
+
settings.catalog_backend,
|
|
59
|
+
settings.auth_mode,
|
|
60
|
+
)
|
|
61
|
+
# The MCP session manager runs under its own lifespan — nest it so the
|
|
62
|
+
# mounted /mcp app works (Starlette does not propagate sub-app lifespans).
|
|
63
|
+
async with mcp_app.lifespan(app):
|
|
64
|
+
yield
|
|
65
|
+
|
|
66
|
+
app = FastAPI(title="agentic-fs", version=__version__, lifespan=lifespan)
|
|
67
|
+
app.add_exception_handler(AfsError, _afs_error_handler) # type: ignore[arg-type]
|
|
68
|
+
app.include_router(meta.router)
|
|
69
|
+
app.include_router(fs.router)
|
|
70
|
+
app.include_router(ingest.router)
|
|
71
|
+
app.mount("/mcp", mcp_app)
|
|
72
|
+
return app
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
app = create_app()
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""Authentication → a resolved tenant context.
|
|
2
|
+
|
|
3
|
+
This slice ships **dev auth only**: a static local principal selected when
|
|
4
|
+
``AFS_AUTH_MODE=dev``. Any other mode fails closed (401) until the OAuth 2.1
|
|
5
|
+
resource server lands — so a misconfigured deployment never silently serves data
|
|
6
|
+
with no identity. No tokens or secrets are baked into the image.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from typing import TYPE_CHECKING
|
|
14
|
+
|
|
15
|
+
from afs_core.errors import InsufficientScopeError, UnauthenticatedError
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from afs_server.settings import Settings
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger("afs_server.auth")
|
|
21
|
+
|
|
22
|
+
# Full scope set — granted to the dev principal only.
|
|
23
|
+
_ALL_SCOPES = frozenset({"fs:read", "fs:search", "fs:write:scratch", "ingest", "admin"})
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass(frozen=True)
|
|
27
|
+
class TenantContext:
|
|
28
|
+
"""The authority resolved from a request: who, in which tenant, with what."""
|
|
29
|
+
|
|
30
|
+
tenant_id: str
|
|
31
|
+
principal_id: str
|
|
32
|
+
scopes: frozenset[str] = field(default_factory=frozenset)
|
|
33
|
+
# None = all namespaces in the tenant are granted (dev convenience).
|
|
34
|
+
namespaces: frozenset[str] | None = None
|
|
35
|
+
|
|
36
|
+
def require_scope(self, scope: str) -> None:
|
|
37
|
+
if scope not in self.scopes:
|
|
38
|
+
raise InsufficientScopeError(f"missing required scope: {scope}")
|
|
39
|
+
|
|
40
|
+
def allows_namespace(self, namespace: str) -> bool:
|
|
41
|
+
return self.namespaces is None or namespace in self.namespaces
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
_dev_warned = False
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def resolve_dev_context(settings: Settings) -> TenantContext:
|
|
48
|
+
"""The static dev principal. Loud, intentional, never production."""
|
|
49
|
+
global _dev_warned
|
|
50
|
+
if not _dev_warned:
|
|
51
|
+
logger.warning(
|
|
52
|
+
"AFS_AUTH_MODE=dev — serving with a STATIC dev principal and no token "
|
|
53
|
+
"verification. Never run this in production."
|
|
54
|
+
)
|
|
55
|
+
_dev_warned = True
|
|
56
|
+
return TenantContext(
|
|
57
|
+
tenant_id=settings.dev_tenant_id,
|
|
58
|
+
principal_id=settings.dev_principal_id,
|
|
59
|
+
scopes=_ALL_SCOPES,
|
|
60
|
+
namespaces=None,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def resolve_context(settings: Settings) -> TenantContext:
|
|
65
|
+
if settings.auth_mode == "dev":
|
|
66
|
+
return resolve_dev_context(settings)
|
|
67
|
+
# oidc and anything else: not implemented yet → fail closed.
|
|
68
|
+
raise UnauthenticatedError(
|
|
69
|
+
f"auth_mode {settings.auth_mode!r} is not available yet; only 'dev' is implemented"
|
|
70
|
+
)
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""Shared FastAPI dependencies (typed aliases keep the routers thin)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from functools import lru_cache
|
|
6
|
+
from typing import TYPE_CHECKING, Annotated
|
|
7
|
+
|
|
8
|
+
from fastapi import Depends, Request
|
|
9
|
+
|
|
10
|
+
from afs_server.auth import TenantContext, resolve_context
|
|
11
|
+
from afs_server.services import FsService, IngestService
|
|
12
|
+
from afs_server.settings import Settings, load_settings
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from afs_core.contracts import CatalogStore, ObjectStore
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@lru_cache
|
|
19
|
+
def get_settings() -> Settings:
|
|
20
|
+
return load_settings()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def get_catalog(request: Request) -> CatalogStore:
|
|
24
|
+
return request.app.state.catalog
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get_objects(request: Request) -> ObjectStore:
|
|
28
|
+
return request.app.state.objects
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def get_fs_service(request: Request) -> FsService:
|
|
32
|
+
return FsService(request.app.state.catalog, request.app.state.objects)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def get_ingest_service(request: Request) -> IngestService:
|
|
36
|
+
return IngestService(
|
|
37
|
+
request.app.state.catalog,
|
|
38
|
+
request.app.state.objects,
|
|
39
|
+
request.app.state.extraction_pipeline,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def get_principal(settings: Annotated[Settings, Depends(get_settings)]) -> TenantContext:
|
|
44
|
+
return resolve_context(settings)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
SettingsDep = Annotated[Settings, Depends(get_settings)]
|
|
48
|
+
CatalogDep = Annotated["CatalogStore", Depends(get_catalog)]
|
|
49
|
+
FsDep = Annotated[FsService, Depends(get_fs_service)]
|
|
50
|
+
IngestDep = Annotated[IngestService, Depends(get_ingest_service)]
|
|
51
|
+
PrincipalDep = Annotated[TenantContext, Depends(get_principal)]
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
"""Extraction — the pluggable parser layer.
|
|
2
|
+
|
|
3
|
+
A `Normalizer` (text_native builtin, or a third-party plugin) is selected by name
|
|
4
|
+
into a ladder, exactly like the store registry. Add your own parser: implement
|
|
5
|
+
`afs_core.contracts.Normalizer`, certify it with
|
|
6
|
+
`afs_core.testing.NormalizerConformance`, register it under the `afs.normalizers`
|
|
7
|
+
entry-point group, and name it in the ladder. See `docs/swap-guides/` (extraction).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from importlib.metadata import entry_points
|
|
13
|
+
|
|
14
|
+
from afs_core.contracts import Normalizer
|
|
15
|
+
from afs_server.extraction.pipeline import ExtractionOutcome, ExtractionPipeline
|
|
16
|
+
from afs_server.extraction.text_native import TextNativeNormalizer
|
|
17
|
+
|
|
18
|
+
_NORMALIZER_ENTRY_GROUP = "afs.normalizers"
|
|
19
|
+
|
|
20
|
+
# Builtin normalizers: name -> factory.
|
|
21
|
+
_BUILTIN_NORMALIZERS = {
|
|
22
|
+
"text_native": TextNativeNormalizer,
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
# Default ladder (config, not code — extended as rungs like docling land).
|
|
26
|
+
DEFAULT_LADDER = ["text_native"]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _build_normalizer(name: str) -> Normalizer:
|
|
30
|
+
builtin = _BUILTIN_NORMALIZERS.get(name)
|
|
31
|
+
if builtin is not None:
|
|
32
|
+
return builtin()
|
|
33
|
+
for ep in entry_points(group=_NORMALIZER_ENTRY_GROUP):
|
|
34
|
+
if ep.name == name:
|
|
35
|
+
return ep.load()()
|
|
36
|
+
available = sorted(_BUILTIN_NORMALIZERS) + [
|
|
37
|
+
ep.name for ep in entry_points(group=_NORMALIZER_ENTRY_GROUP)
|
|
38
|
+
]
|
|
39
|
+
raise ValueError(f"unknown normalizer {name!r}; available: {', '.join(available) or 'none'}")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def build_pipeline(ladder: list[str] | None = None) -> ExtractionPipeline:
|
|
43
|
+
"""Build the extraction pipeline from a ladder of normalizer names."""
|
|
44
|
+
names = ladder or DEFAULT_LADDER
|
|
45
|
+
return ExtractionPipeline([_build_normalizer(n) for n in names])
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
__all__ = ["ExtractionOutcome", "ExtractionPipeline", "build_pipeline"]
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""The extraction pipeline — orders normalizers into a ladder, gates on quality,
|
|
2
|
+
and degrades to catalog_only (plan §5.4, §9.2).
|
|
3
|
+
|
|
4
|
+
This is the boundary the maintainer's feedback identified: parsers (`Normalizer`s)
|
|
5
|
+
produce a `NormalizedDocument`; the pipeline decides which rung wins and whether
|
|
6
|
+
the result is good enough — neither knows about S3 keys or catalog rows.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
from typing import TYPE_CHECKING
|
|
14
|
+
|
|
15
|
+
from afs_core.contracts import NormalizationError
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from afs_core.contracts import Normalizer
|
|
19
|
+
from afs_core.models import NormalizedDocument, SourceDocument
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger("afs_server.extraction")
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass(frozen=True)
|
|
25
|
+
class ExtractionOutcome:
|
|
26
|
+
document: NormalizedDocument
|
|
27
|
+
extractor: str # which rung produced it (recorded on the catalog row)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ExtractionPipeline:
|
|
31
|
+
"""Walks the ladder in order; the first rung that accepts the document and
|
|
32
|
+
produces an above-quality-gate result wins. Returns ``None`` ⇒ catalog_only."""
|
|
33
|
+
|
|
34
|
+
def __init__(self, ladder: list[Normalizer], *, min_chars_per_page: int = 1) -> None:
|
|
35
|
+
self._ladder = ladder
|
|
36
|
+
self._min_chars = min_chars_per_page
|
|
37
|
+
|
|
38
|
+
async def run(self, doc: SourceDocument) -> ExtractionOutcome | None:
|
|
39
|
+
for nz in self._ladder:
|
|
40
|
+
if not nz.accepts(doc):
|
|
41
|
+
continue
|
|
42
|
+
try:
|
|
43
|
+
result = await nz.normalize(doc)
|
|
44
|
+
except NormalizationError as err:
|
|
45
|
+
logger.info("normalizer %s declined %s: %s", nz.name, doc.filename, err.reason)
|
|
46
|
+
continue
|
|
47
|
+
if result.pages and result.quality.min_chars_per_page >= self._min_chars:
|
|
48
|
+
return ExtractionOutcome(document=result, extractor=nz.name)
|
|
49
|
+
# below the quality gate — fall through to the next (escalation) rung.
|
|
50
|
+
return None
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
"""The text_native rung — the first (and cheapest) Normalizer.
|
|
2
|
+
|
|
3
|
+
Markdown/text/csv/json/html/… are already text, so "extraction" is just reading
|
|
4
|
+
the bytes as one page. The richer rungs (docling for PDFs/Office, llamaparse on
|
|
5
|
+
quality failure) are additional `Normalizer`s registered the same way.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import TYPE_CHECKING
|
|
11
|
+
|
|
12
|
+
from afs_core.contracts import NormalizationError
|
|
13
|
+
from afs_core.models import NormalizedDocument, PageText, QualityReport
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from afs_core.models import SourceDocument
|
|
17
|
+
|
|
18
|
+
_TEXT_CONTENT_TYPES = {"application/json", "application/xml", "application/x-ndjson"}
|
|
19
|
+
_TEXT_EXTENSIONS = {
|
|
20
|
+
".md", ".markdown", ".txt", ".text", ".csv", ".tsv",
|
|
21
|
+
".json", ".xml", ".html", ".htm", ".yaml", ".yml", ".log",
|
|
22
|
+
} # fmt: skip
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class TextNativeNormalizer:
|
|
26
|
+
name = "text_native"
|
|
27
|
+
|
|
28
|
+
def accepts(self, doc: SourceDocument) -> bool:
|
|
29
|
+
ct = doc.content_type or ""
|
|
30
|
+
if ct.startswith("text/") or ct in _TEXT_CONTENT_TYPES:
|
|
31
|
+
return True
|
|
32
|
+
return doc.local_path.suffix.lower() in _TEXT_EXTENSIONS
|
|
33
|
+
|
|
34
|
+
async def normalize(self, doc: SourceDocument) -> NormalizedDocument:
|
|
35
|
+
text = doc.local_path.read_bytes().decode("utf-8", errors="replace")
|
|
36
|
+
if not text.strip():
|
|
37
|
+
raise NormalizationError("empty_document")
|
|
38
|
+
return NormalizedDocument(
|
|
39
|
+
pages=[PageText(number=1, markdown=text, source_locator="text:1")],
|
|
40
|
+
quality=QualityReport(page_count=1, char_count=len(text), min_chars_per_page=len(text)),
|
|
41
|
+
)
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""The MCP tool surface (FastMCP), backed by the same ``FsService`` the REST
|
|
2
|
+
routes use — shared in-process, no HTTP self-calls (plan §7).
|
|
3
|
+
|
|
4
|
+
This slice exposes the read-path tools (`whoami`, `fs_list`, `fs_stat`,
|
|
5
|
+
`fs_read`) under the dev principal. The full middleware chain (per-connection
|
|
6
|
+
JWKS auth, claims-filtered `tools/list`, budgets, audit) and the remaining tools
|
|
7
|
+
(`fs_glob`/`fs_grep`/`fs_search`/`scratch_*`) land with their services.
|
|
8
|
+
|
|
9
|
+
Tools are flat `snake_case`; the docstring **is** the tool description (it states
|
|
10
|
+
the find→read flow and the bounds), per the plan.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from typing import TYPE_CHECKING, Any
|
|
16
|
+
|
|
17
|
+
from fastmcp import FastMCP
|
|
18
|
+
from fastmcp.exceptions import ToolError
|
|
19
|
+
|
|
20
|
+
from afs_core.errors import AfsError
|
|
21
|
+
from afs_server.auth import resolve_context
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from collections.abc import Awaitable
|
|
25
|
+
|
|
26
|
+
from pydantic import BaseModel
|
|
27
|
+
|
|
28
|
+
from afs_server.services import FsService
|
|
29
|
+
from afs_server.settings import Settings
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
async def _result(coro: Awaitable[BaseModel]) -> dict[str, Any]:
|
|
33
|
+
"""Await a service call; surface expected AfsErrors as MCP ToolErrors."""
|
|
34
|
+
try:
|
|
35
|
+
model = await coro
|
|
36
|
+
except AfsError as err:
|
|
37
|
+
raise ToolError(err.message) from err
|
|
38
|
+
return model.model_dump(mode="json")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def build_mcp(fs: FsService, settings: Settings) -> FastMCP:
|
|
42
|
+
mcp: FastMCP = FastMCP("agentic-fs")
|
|
43
|
+
|
|
44
|
+
@mcp.tool
|
|
45
|
+
async def whoami() -> dict[str, Any]:
|
|
46
|
+
"""Return the calling principal: tenant, scopes, and granted namespaces."""
|
|
47
|
+
ctx = resolve_context(settings)
|
|
48
|
+
return {
|
|
49
|
+
"tenant_id": ctx.tenant_id,
|
|
50
|
+
"principal_id": ctx.principal_id,
|
|
51
|
+
"scopes": sorted(ctx.scopes),
|
|
52
|
+
"namespaces": sorted(ctx.namespaces) if ctx.namespaces is not None else None,
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
@mcp.tool
|
|
56
|
+
async def fs_list(namespace: str, prefix: str = "", limit: int = 100) -> dict[str, Any]:
|
|
57
|
+
"""List catalog entries in a namespace under an optional path prefix.
|
|
58
|
+
|
|
59
|
+
Start here to discover documents, then fs_read to fetch their text.
|
|
60
|
+
Returns up to `limit` entries and a `next_cursor` to page further.
|
|
61
|
+
"""
|
|
62
|
+
return await _result(
|
|
63
|
+
fs.list_entries(resolve_context(settings), namespace, prefix=prefix, limit=limit)
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
@mcp.tool
|
|
67
|
+
async def fs_stat(namespace: str, path: str) -> dict[str, Any]:
|
|
68
|
+
"""Return one document's catalog record (size, title, extraction status…)."""
|
|
69
|
+
return await _result(fs.stat(resolve_context(settings), namespace, path))
|
|
70
|
+
|
|
71
|
+
@mcp.tool
|
|
72
|
+
async def fs_read(
|
|
73
|
+
namespace: str, path: str, start_page: int = 1, end_page: int | None = None
|
|
74
|
+
) -> dict[str, Any]:
|
|
75
|
+
"""Read a bounded page range (<= 20 pages) of a document's extracted text.
|
|
76
|
+
|
|
77
|
+
A `catalog_only` document exists and is citeable but isn't readable yet —
|
|
78
|
+
you'll get a tool error saying so; you can still reference it by path.
|
|
79
|
+
"""
|
|
80
|
+
return await _result(
|
|
81
|
+
fs.read(
|
|
82
|
+
resolve_context(settings), namespace, path, start_page=start_page, end_page=end_page
|
|
83
|
+
)
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
return mcp
|
|
File without changes
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""HTTP routers."""
|