afs-server 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. afs_server-0.1.0/.gitignore +41 -0
  2. afs_server-0.1.0/PKG-INFO +76 -0
  3. afs_server-0.1.0/README.md +50 -0
  4. afs_server-0.1.0/pyproject.toml +55 -0
  5. afs_server-0.1.0/src/afs_server/__init__.py +3 -0
  6. afs_server-0.1.0/src/afs_server/app.py +75 -0
  7. afs_server-0.1.0/src/afs_server/auth.py +70 -0
  8. afs_server-0.1.0/src/afs_server/dependencies.py +51 -0
  9. afs_server-0.1.0/src/afs_server/extraction/__init__.py +48 -0
  10. afs_server-0.1.0/src/afs_server/extraction/pipeline.py +50 -0
  11. afs_server-0.1.0/src/afs_server/extraction/text_native.py +41 -0
  12. afs_server-0.1.0/src/afs_server/mcp/__init__.py +5 -0
  13. afs_server-0.1.0/src/afs_server/mcp/server.py +86 -0
  14. afs_server-0.1.0/src/afs_server/py.typed +0 -0
  15. afs_server-0.1.0/src/afs_server/routers/__init__.py +1 -0
  16. afs_server-0.1.0/src/afs_server/routers/fs.py +47 -0
  17. afs_server-0.1.0/src/afs_server/routers/ingest.py +38 -0
  18. afs_server-0.1.0/src/afs_server/routers/meta.py +38 -0
  19. afs_server-0.1.0/src/afs_server/schemas.py +38 -0
  20. afs_server-0.1.0/src/afs_server/services/__init__.py +6 -0
  21. afs_server-0.1.0/src/afs_server/services/fs.py +95 -0
  22. afs_server-0.1.0/src/afs_server/services/ingest.py +153 -0
  23. afs_server-0.1.0/src/afs_server/settings.py +40 -0
  24. afs_server-0.1.0/src/afs_server/stores/__init__.py +83 -0
  25. afs_server-0.1.0/src/afs_server/stores/catalog_dynamodb.py +434 -0
  26. afs_server-0.1.0/src/afs_server/stores/objects_s3.py +202 -0
  27. afs_server-0.1.0/tests/test_app.py +124 -0
  28. afs_server-0.1.0/tests/test_catalog_dynamodb_conformance.py +65 -0
  29. afs_server-0.1.0/tests/test_extraction.py +59 -0
  30. afs_server-0.1.0/tests/test_fs_service.py +89 -0
  31. afs_server-0.1.0/tests/test_ingest_service.py +73 -0
  32. afs_server-0.1.0/tests/test_mcp.py +74 -0
  33. afs_server-0.1.0/tests/test_objects_s3_conformance.py +26 -0
  34. afs_server-0.1.0/tests/test_registry.py +33 -0
@@ -0,0 +1,41 @@
1
+ # --- OS / editor ---
2
+ .DS_Store
3
+ Thumbs.db
4
+ *.swp
5
+ .idea/
6
+ .vscode/
7
+
8
+ # --- Python (packages land in M0+) ---
9
+ __pycache__/
10
+ *.py[cod]
11
+ .venv/
12
+ venv/
13
+ .uv/
14
+ *.egg-info/
15
+ .ruff_cache/
16
+ .pytest_cache/
17
+ .mypy_cache/
18
+ .ty_cache/
19
+ dist/
20
+ build/
21
+
22
+ # --- Node (workers/mcp-edge lands later) ---
23
+ node_modules/
24
+ npm-debug.log*
25
+
26
+ # --- Secrets / local env ---
27
+ .env
28
+ .env.*
29
+ !.env.example
30
+ *.secret.*
31
+
32
+ # --- Terraform ---
33
+ # Detailed Terraform ignores live in terraform/.gitignore; these catch any
34
+ # stray state/plan artifacts produced outside that tree.
35
+ *.tfstate
36
+ *.tfstate.*
37
+ *.tfplan
38
+ .terraform/
39
+
40
+ # Agent worktrees (isolated background-agent checkouts)
41
+ .claude/worktrees/
@@ -0,0 +1,76 @@
1
+ Metadata-Version: 2.4
2
+ Name: afs-server
3
+ Version: 0.1.0
4
+ Summary: agentic-fs server: stores, services, REST + MCP. Implements the afs-core contracts.
5
+ Project-URL: Homepage, https://github.com/vivekkhimani/agentic-fs
6
+ Project-URL: Repository, https://github.com/vivekkhimani/agentic-fs
7
+ Project-URL: Issues, https://github.com/vivekkhimani/agentic-fs/issues
8
+ Author-email: Vivek Khimani <vivekkhimani07@gmail.com>
9
+ License-Expression: Apache-2.0
10
+ Keywords: agentic-fs,agents,dynamodb,fastapi,filesystem,mcp,s3
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Framework :: FastAPI
13
+ Classifier: License :: OSI Approved :: Apache Software License
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Typing :: Typed
16
+ Requires-Python: >=3.12
17
+ Requires-Dist: afs-core
18
+ Requires-Dist: boto3>=1.34
19
+ Requires-Dist: fastapi>=0.115
20
+ Requires-Dist: fastmcp>=2
21
+ Requires-Dist: pydantic-settings>=2.2
22
+ Requires-Dist: uvicorn[standard]>=0.30
23
+ Provides-Extra: postgres
24
+ Provides-Extra: search
25
+ Description-Content-Type: text/markdown
26
+
27
+ # afs-server
28
+
29
+ The agentic-fs service: the concrete backends (stores, search, extraction), the
30
+ services, and the REST + MCP surface. Implements the `afs-core` contracts.
31
+
32
+ ## Status
33
+
34
+ Store layer (in progress):
35
+
36
+ - `afs_server.settings` — `AFS_*` env config; every swappable layer is selected
37
+ by a backend *name* and every AWS-shaped backend takes an `endpoint_url`
38
+ override.
39
+ - `afs_server.stores` — the **store registry**: `get_object_store(settings)`
40
+ selects a builtin or an installed plugin (`afs.object_stores` entry-point group).
41
+ - `afs_server.stores.objects_s3.S3ObjectStore` — the S3 `ObjectStore`. Because it
42
+ speaks plain S3, it *is* your store for any S3-compatible endpoint (MinIO,
43
+ Cloudflare R2, Wasabi, Backblaze B2) via `AFS_S3_ENDPOINT_URL` — no code change.
44
+ - `afs_server.stores.catalog_dynamodb.DynamoDBCatalogStore` — the DynamoDB
45
+ `CatalogStore` over the single-table schema (`AFS_DYNAMODB_ENDPOINT_URL` points
46
+ at DynamoDB Local for dev).
47
+
48
+ Both stores are certified by the afs-core conformance kits via `moto`.
49
+
50
+ - `afs_server.services.FsService` — the read path (`list` / `stat` / ranged
51
+ `read`) over the stores, with scope + namespace enforcement and 404-not-403
52
+ misses.
53
+ - `afs_server.app` — the FastAPI app: `/v1/healthz`, `/readyz`, `/me`, and
54
+ `fs/{ns}/{entries,stat,doc}`; dev auth (static principal, never prod); every
55
+ `AfsError` rendered as RFC 9457 `problem+json`.
56
+ - `afs_server.mcp` — the MCP surface mounted at `/mcp` (FastMCP): `whoami`,
57
+ `fs_list`, `fs_stat`, `fs_read` over the *same* `FsService` (in-process, no HTTP
58
+ self-calls). The full middleware chain + remaining tools land with their slices.
59
+
60
+ The image (`../../Dockerfile`) runs this app on Lambda / Fargate / locally;
61
+ `make dev` from the repo root runs it against MinIO + DynamoDB Local. Coming
62
+ next: the MCP mount (shares `FsService` in-process).
63
+
64
+ ## Swapping a backend (plug-and-play)
65
+
66
+ See [`docs/swap-guides/`](../../docs/swap-guides/). In short: S3-compatible
67
+ storage needs only an env var; anything else implements the `ObjectStore`
68
+ Protocol, registers an entry point, and certifies against
69
+ `afs_core.testing.ObjectStoreConformance`.
70
+
71
+ ## Develop
72
+
73
+ ```bash
74
+ uv sync
75
+ uv run pytest packages/afs-server # conformance kits run against moto
76
+ ```
@@ -0,0 +1,50 @@
1
+ # afs-server
2
+
3
+ The agentic-fs service: the concrete backends (stores, search, extraction), the
4
+ services, and the REST + MCP surface. Implements the `afs-core` contracts.
5
+
6
+ ## Status
7
+
8
+ Store layer (in progress):
9
+
10
+ - `afs_server.settings` — `AFS_*` env config; every swappable layer is selected
11
+ by a backend *name* and every AWS-shaped backend takes an `endpoint_url`
12
+ override.
13
+ - `afs_server.stores` — the **store registry**: `get_object_store(settings)`
14
+ selects a builtin or an installed plugin (`afs.object_stores` entry-point group).
15
+ - `afs_server.stores.objects_s3.S3ObjectStore` — the S3 `ObjectStore`. Because it
16
+ speaks plain S3, it *is* your store for any S3-compatible endpoint (MinIO,
17
+ Cloudflare R2, Wasabi, Backblaze B2) via `AFS_S3_ENDPOINT_URL` — no code change.
18
+ - `afs_server.stores.catalog_dynamodb.DynamoDBCatalogStore` — the DynamoDB
19
+ `CatalogStore` over the single-table schema (`AFS_DYNAMODB_ENDPOINT_URL` points
20
+ at DynamoDB Local for dev).
21
+
22
+ Both stores are certified by the afs-core conformance kits via `moto`.
23
+
24
+ - `afs_server.services.FsService` — the read path (`list` / `stat` / ranged
25
+ `read`) over the stores, with scope + namespace enforcement and 404-not-403
26
+ misses.
27
+ - `afs_server.app` — the FastAPI app: `/v1/healthz`, `/readyz`, `/me`, and
28
+ `fs/{ns}/{entries,stat,doc}`; dev auth (static principal, never prod); every
29
+ `AfsError` rendered as RFC 9457 `problem+json`.
30
+ - `afs_server.mcp` — the MCP surface mounted at `/mcp` (FastMCP): `whoami`,
31
+ `fs_list`, `fs_stat`, `fs_read` over the *same* `FsService` (in-process, no HTTP
32
+ self-calls). The full middleware chain + remaining tools land with their slices.
33
+
34
+ The image (`../../Dockerfile`) runs this app on Lambda / Fargate / locally;
35
+ `make dev` from the repo root runs it against MinIO + DynamoDB Local. Coming
36
+ next: the MCP mount (shares `FsService` in-process).
37
+
38
+ ## Swapping a backend (plug-and-play)
39
+
40
+ See [`docs/swap-guides/`](../../docs/swap-guides/). In short: S3-compatible
41
+ storage needs only an env var; anything else implements the `ObjectStore`
42
+ Protocol, registers an entry point, and certifies against
43
+ `afs_core.testing.ObjectStoreConformance`.
44
+
45
+ ## Develop
46
+
47
+ ```bash
48
+ uv sync
49
+ uv run pytest packages/afs-server # conformance kits run against moto
50
+ ```
@@ -0,0 +1,55 @@
1
+ [project]
2
+ name = "afs-server"
3
+ version = "0.1.0"
4
+ description = "agentic-fs server: stores, services, REST + MCP. Implements the afs-core contracts."
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ license = "Apache-2.0"
8
+ authors = [{ name = "Vivek Khimani", email = "vivekkhimani07@gmail.com" }]
9
+ keywords = [
10
+ "agentic-fs",
11
+ "agents",
12
+ "mcp",
13
+ "fastapi",
14
+ "s3",
15
+ "dynamodb",
16
+ "filesystem",
17
+ ]
18
+ classifiers = [
19
+ "Development Status :: 3 - Alpha",
20
+ "License :: OSI Approved :: Apache Software License",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Typing :: Typed",
23
+ "Framework :: FastAPI",
24
+ ]
25
+ dependencies = [
26
+ "afs-core",
27
+ "boto3>=1.34",
28
+ "pydantic-settings>=2.2",
29
+ "fastapi>=0.115",
30
+ "uvicorn[standard]>=0.30",
31
+ "fastmcp>=2",
32
+ ]
33
+
34
+ [project.optional-dependencies]
35
+ # Reserved for future backends. Intentionally empty until the dependencies
36
+ # exist — declaring the names now keeps the install surface stable.
37
+ # TODO: add the Postgres catalog backend deps when that store lands.
38
+ postgres = []
39
+ # TODO: add the search/vector backend deps when that store lands.
40
+ search = []
41
+
42
+ [project.urls]
43
+ Homepage = "https://github.com/vivekkhimani/agentic-fs"
44
+ Repository = "https://github.com/vivekkhimani/agentic-fs"
45
+ Issues = "https://github.com/vivekkhimani/agentic-fs/issues"
46
+
47
+ [build-system]
48
+ requires = ["hatchling"]
49
+ build-backend = "hatchling.build"
50
+
51
+ [tool.hatch.build.targets.wheel]
52
+ packages = ["src/afs_server"]
53
+
54
+ [tool.uv.sources]
55
+ afs-core = { workspace = true }
@@ -0,0 +1,3 @@
1
+ """agentic-fs server: stores, services, REST + MCP — implements the afs-core contracts."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,75 @@
1
+ """ASGI application factory.
2
+
3
+ Assembles the REST surface + the MCP mount (sharing one ``FsService`` in-process,
4
+ no HTTP self-calls), wires the configured stores, and renders every ``AfsError``
5
+ as an RFC 9457 ``application/problem+json`` envelope.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ from collections.abc import AsyncIterator
12
+ from contextlib import asynccontextmanager
13
+
14
+ from fastapi import FastAPI, Request
15
+ from fastapi.responses import JSONResponse
16
+
17
+ from afs_core.errors import AfsError
18
+ from afs_server import __version__
19
+ from afs_server.extraction import build_pipeline
20
+ from afs_server.mcp import build_mcp
21
+ from afs_server.routers import fs, ingest, meta
22
+ from afs_server.services import FsService
23
+ from afs_server.settings import load_settings
24
+ from afs_server.stores import get_catalog_store, get_object_store
25
+
26
+ logger = logging.getLogger("afs_server")
27
+
28
+
29
+ async def _afs_error_handler(request: Request, exc: AfsError) -> JSONResponse:
30
+ return JSONResponse(
31
+ status_code=exc.http_status,
32
+ content=exc.to_problem(instance=request.url.path),
33
+ media_type="application/problem+json",
34
+ )
35
+
36
+
37
+ def create_app() -> FastAPI:
38
+ settings = load_settings()
39
+ # Stores are lazy (no I/O / credentials at construction), so we can build the
40
+ # service + MCP server now and share the service between REST and MCP.
41
+ catalog = get_catalog_store(settings)
42
+ objects = get_object_store(settings)
43
+ fs_service = FsService(catalog, objects)
44
+ extraction_pipeline = build_pipeline()
45
+
46
+ mcp_app = build_mcp(fs_service, settings).http_app(path="/")
47
+
48
+ @asynccontextmanager
49
+ async def lifespan(app: FastAPI) -> AsyncIterator[None]:
50
+ app.state.settings = settings
51
+ app.state.catalog = catalog
52
+ app.state.objects = objects
53
+ app.state.extraction_pipeline = extraction_pipeline
54
+ logger.info(
55
+ "afs-server %s started (object_store=%s, catalog=%s, auth=%s)",
56
+ __version__,
57
+ settings.object_store_backend,
58
+ settings.catalog_backend,
59
+ settings.auth_mode,
60
+ )
61
+ # The MCP session manager runs under its own lifespan — nest it so the
62
+ # mounted /mcp app works (Starlette does not propagate sub-app lifespans).
63
+ async with mcp_app.lifespan(app):
64
+ yield
65
+
66
+ app = FastAPI(title="agentic-fs", version=__version__, lifespan=lifespan)
67
+ app.add_exception_handler(AfsError, _afs_error_handler) # type: ignore[arg-type]
68
+ app.include_router(meta.router)
69
+ app.include_router(fs.router)
70
+ app.include_router(ingest.router)
71
+ app.mount("/mcp", mcp_app)
72
+ return app
73
+
74
+
75
+ app = create_app()
@@ -0,0 +1,70 @@
1
+ """Authentication → a resolved tenant context.
2
+
3
+ This slice ships **dev auth only**: a static local principal selected when
4
+ ``AFS_AUTH_MODE=dev``. Any other mode fails closed (401) until the OAuth 2.1
5
+ resource server lands — so a misconfigured deployment never silently serves data
6
+ with no identity. No tokens or secrets are baked into the image.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+ from dataclasses import dataclass, field
13
+ from typing import TYPE_CHECKING
14
+
15
+ from afs_core.errors import InsufficientScopeError, UnauthenticatedError
16
+
17
+ if TYPE_CHECKING:
18
+ from afs_server.settings import Settings
19
+
20
+ logger = logging.getLogger("afs_server.auth")
21
+
22
+ # Full scope set — granted to the dev principal only.
23
+ _ALL_SCOPES = frozenset({"fs:read", "fs:search", "fs:write:scratch", "ingest", "admin"})
24
+
25
+
26
+ @dataclass(frozen=True)
27
+ class TenantContext:
28
+ """The authority resolved from a request: who, in which tenant, with what."""
29
+
30
+ tenant_id: str
31
+ principal_id: str
32
+ scopes: frozenset[str] = field(default_factory=frozenset)
33
+ # None = all namespaces in the tenant are granted (dev convenience).
34
+ namespaces: frozenset[str] | None = None
35
+
36
+ def require_scope(self, scope: str) -> None:
37
+ if scope not in self.scopes:
38
+ raise InsufficientScopeError(f"missing required scope: {scope}")
39
+
40
+ def allows_namespace(self, namespace: str) -> bool:
41
+ return self.namespaces is None or namespace in self.namespaces
42
+
43
+
44
+ _dev_warned = False
45
+
46
+
47
+ def resolve_dev_context(settings: Settings) -> TenantContext:
48
+ """The static dev principal. Loud, intentional, never production."""
49
+ global _dev_warned
50
+ if not _dev_warned:
51
+ logger.warning(
52
+ "AFS_AUTH_MODE=dev — serving with a STATIC dev principal and no token "
53
+ "verification. Never run this in production."
54
+ )
55
+ _dev_warned = True
56
+ return TenantContext(
57
+ tenant_id=settings.dev_tenant_id,
58
+ principal_id=settings.dev_principal_id,
59
+ scopes=_ALL_SCOPES,
60
+ namespaces=None,
61
+ )
62
+
63
+
64
+ def resolve_context(settings: Settings) -> TenantContext:
65
+ if settings.auth_mode == "dev":
66
+ return resolve_dev_context(settings)
67
+ # oidc and anything else: not implemented yet → fail closed.
68
+ raise UnauthenticatedError(
69
+ f"auth_mode {settings.auth_mode!r} is not available yet; only 'dev' is implemented"
70
+ )
@@ -0,0 +1,51 @@
1
+ """Shared FastAPI dependencies (typed aliases keep the routers thin)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from functools import lru_cache
6
+ from typing import TYPE_CHECKING, Annotated
7
+
8
+ from fastapi import Depends, Request
9
+
10
+ from afs_server.auth import TenantContext, resolve_context
11
+ from afs_server.services import FsService, IngestService
12
+ from afs_server.settings import Settings, load_settings
13
+
14
+ if TYPE_CHECKING:
15
+ from afs_core.contracts import CatalogStore, ObjectStore
16
+
17
+
18
+ @lru_cache
19
+ def get_settings() -> Settings:
20
+ return load_settings()
21
+
22
+
23
+ def get_catalog(request: Request) -> CatalogStore:
24
+ return request.app.state.catalog
25
+
26
+
27
+ def get_objects(request: Request) -> ObjectStore:
28
+ return request.app.state.objects
29
+
30
+
31
+ def get_fs_service(request: Request) -> FsService:
32
+ return FsService(request.app.state.catalog, request.app.state.objects)
33
+
34
+
35
+ def get_ingest_service(request: Request) -> IngestService:
36
+ return IngestService(
37
+ request.app.state.catalog,
38
+ request.app.state.objects,
39
+ request.app.state.extraction_pipeline,
40
+ )
41
+
42
+
43
+ def get_principal(settings: Annotated[Settings, Depends(get_settings)]) -> TenantContext:
44
+ return resolve_context(settings)
45
+
46
+
47
+ SettingsDep = Annotated[Settings, Depends(get_settings)]
48
+ CatalogDep = Annotated["CatalogStore", Depends(get_catalog)]
49
+ FsDep = Annotated[FsService, Depends(get_fs_service)]
50
+ IngestDep = Annotated[IngestService, Depends(get_ingest_service)]
51
+ PrincipalDep = Annotated[TenantContext, Depends(get_principal)]
@@ -0,0 +1,48 @@
1
+ """Extraction — the pluggable parser layer.
2
+
3
+ A `Normalizer` (text_native builtin, or a third-party plugin) is selected by name
4
+ into a ladder, exactly like the store registry. Add your own parser: implement
5
+ `afs_core.contracts.Normalizer`, certify it with
6
+ `afs_core.testing.NormalizerConformance`, register it under the `afs.normalizers`
7
+ entry-point group, and name it in the ladder. See `docs/swap-guides/` (extraction).
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from importlib.metadata import entry_points
13
+
14
+ from afs_core.contracts import Normalizer
15
+ from afs_server.extraction.pipeline import ExtractionOutcome, ExtractionPipeline
16
+ from afs_server.extraction.text_native import TextNativeNormalizer
17
+
18
+ _NORMALIZER_ENTRY_GROUP = "afs.normalizers"
19
+
20
+ # Builtin normalizers: name -> factory.
21
+ _BUILTIN_NORMALIZERS = {
22
+ "text_native": TextNativeNormalizer,
23
+ }
24
+
25
+ # Default ladder (config, not code — extended as rungs like docling land).
26
+ DEFAULT_LADDER = ["text_native"]
27
+
28
+
29
+ def _build_normalizer(name: str) -> Normalizer:
30
+ builtin = _BUILTIN_NORMALIZERS.get(name)
31
+ if builtin is not None:
32
+ return builtin()
33
+ for ep in entry_points(group=_NORMALIZER_ENTRY_GROUP):
34
+ if ep.name == name:
35
+ return ep.load()()
36
+ available = sorted(_BUILTIN_NORMALIZERS) + [
37
+ ep.name for ep in entry_points(group=_NORMALIZER_ENTRY_GROUP)
38
+ ]
39
+ raise ValueError(f"unknown normalizer {name!r}; available: {', '.join(available) or 'none'}")
40
+
41
+
42
+ def build_pipeline(ladder: list[str] | None = None) -> ExtractionPipeline:
43
+ """Build the extraction pipeline from a ladder of normalizer names."""
44
+ names = ladder or DEFAULT_LADDER
45
+ return ExtractionPipeline([_build_normalizer(n) for n in names])
46
+
47
+
48
+ __all__ = ["ExtractionOutcome", "ExtractionPipeline", "build_pipeline"]
@@ -0,0 +1,50 @@
1
+ """The extraction pipeline — orders normalizers into a ladder, gates on quality,
2
+ and degrades to catalog_only (plan §5.4, §9.2).
3
+
4
+ This is the boundary the maintainer's feedback identified: parsers (`Normalizer`s)
5
+ produce a `NormalizedDocument`; the pipeline decides which rung wins and whether
6
+ the result is good enough — neither knows about S3 keys or catalog rows.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import logging
12
+ from dataclasses import dataclass
13
+ from typing import TYPE_CHECKING
14
+
15
+ from afs_core.contracts import NormalizationError
16
+
17
+ if TYPE_CHECKING:
18
+ from afs_core.contracts import Normalizer
19
+ from afs_core.models import NormalizedDocument, SourceDocument
20
+
21
+ logger = logging.getLogger("afs_server.extraction")
22
+
23
+
24
+ @dataclass(frozen=True)
25
+ class ExtractionOutcome:
26
+ document: NormalizedDocument
27
+ extractor: str # which rung produced it (recorded on the catalog row)
28
+
29
+
30
+ class ExtractionPipeline:
31
+ """Walks the ladder in order; the first rung that accepts the document and
32
+ produces an above-quality-gate result wins. Returns ``None`` ⇒ catalog_only."""
33
+
34
+ def __init__(self, ladder: list[Normalizer], *, min_chars_per_page: int = 1) -> None:
35
+ self._ladder = ladder
36
+ self._min_chars = min_chars_per_page
37
+
38
+ async def run(self, doc: SourceDocument) -> ExtractionOutcome | None:
39
+ for nz in self._ladder:
40
+ if not nz.accepts(doc):
41
+ continue
42
+ try:
43
+ result = await nz.normalize(doc)
44
+ except NormalizationError as err:
45
+ logger.info("normalizer %s declined %s: %s", nz.name, doc.filename, err.reason)
46
+ continue
47
+ if result.pages and result.quality.min_chars_per_page >= self._min_chars:
48
+ return ExtractionOutcome(document=result, extractor=nz.name)
49
+ # below the quality gate — fall through to the next (escalation) rung.
50
+ return None
@@ -0,0 +1,41 @@
1
+ """The text_native rung — the first (and cheapest) Normalizer.
2
+
3
+ Markdown/text/csv/json/html/… are already text, so "extraction" is just reading
4
+ the bytes as one page. The richer rungs (docling for PDFs/Office, llamaparse on
5
+ quality failure) are additional `Normalizer`s registered the same way.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import TYPE_CHECKING
11
+
12
+ from afs_core.contracts import NormalizationError
13
+ from afs_core.models import NormalizedDocument, PageText, QualityReport
14
+
15
+ if TYPE_CHECKING:
16
+ from afs_core.models import SourceDocument
17
+
18
+ _TEXT_CONTENT_TYPES = {"application/json", "application/xml", "application/x-ndjson"}
19
+ _TEXT_EXTENSIONS = {
20
+ ".md", ".markdown", ".txt", ".text", ".csv", ".tsv",
21
+ ".json", ".xml", ".html", ".htm", ".yaml", ".yml", ".log",
22
+ } # fmt: skip
23
+
24
+
25
+ class TextNativeNormalizer:
26
+ name = "text_native"
27
+
28
+ def accepts(self, doc: SourceDocument) -> bool:
29
+ ct = doc.content_type or ""
30
+ if ct.startswith("text/") or ct in _TEXT_CONTENT_TYPES:
31
+ return True
32
+ return doc.local_path.suffix.lower() in _TEXT_EXTENSIONS
33
+
34
+ async def normalize(self, doc: SourceDocument) -> NormalizedDocument:
35
+ text = doc.local_path.read_bytes().decode("utf-8", errors="replace")
36
+ if not text.strip():
37
+ raise NormalizationError("empty_document")
38
+ return NormalizedDocument(
39
+ pages=[PageText(number=1, markdown=text, source_locator="text:1")],
40
+ quality=QualityReport(page_count=1, char_count=len(text), min_chars_per_page=len(text)),
41
+ )
@@ -0,0 +1,5 @@
1
+ """MCP surface — FastMCP tools over the shared service layer (no HTTP self-calls)."""
2
+
3
+ from afs_server.mcp.server import build_mcp
4
+
5
+ __all__ = ["build_mcp"]
@@ -0,0 +1,86 @@
1
+ """The MCP tool surface (FastMCP), backed by the same ``FsService`` the REST
2
+ routes use — shared in-process, no HTTP self-calls (plan §7).
3
+
4
+ This slice exposes the read-path tools (`whoami`, `fs_list`, `fs_stat`,
5
+ `fs_read`) under the dev principal. The full middleware chain (per-connection
6
+ JWKS auth, claims-filtered `tools/list`, budgets, audit) and the remaining tools
7
+ (`fs_glob`/`fs_grep`/`fs_search`/`scratch_*`) land with their services.
8
+
9
+ Tools are flat `snake_case`; the docstring **is** the tool description (it states
10
+ the find→read flow and the bounds), per the plan.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from typing import TYPE_CHECKING, Any
16
+
17
+ from fastmcp import FastMCP
18
+ from fastmcp.exceptions import ToolError
19
+
20
+ from afs_core.errors import AfsError
21
+ from afs_server.auth import resolve_context
22
+
23
+ if TYPE_CHECKING:
24
+ from collections.abc import Awaitable
25
+
26
+ from pydantic import BaseModel
27
+
28
+ from afs_server.services import FsService
29
+ from afs_server.settings import Settings
30
+
31
+
32
+ async def _result(coro: Awaitable[BaseModel]) -> dict[str, Any]:
33
+ """Await a service call; surface expected AfsErrors as MCP ToolErrors."""
34
+ try:
35
+ model = await coro
36
+ except AfsError as err:
37
+ raise ToolError(err.message) from err
38
+ return model.model_dump(mode="json")
39
+
40
+
41
+ def build_mcp(fs: FsService, settings: Settings) -> FastMCP:
42
+ mcp: FastMCP = FastMCP("agentic-fs")
43
+
44
+ @mcp.tool
45
+ async def whoami() -> dict[str, Any]:
46
+ """Return the calling principal: tenant, scopes, and granted namespaces."""
47
+ ctx = resolve_context(settings)
48
+ return {
49
+ "tenant_id": ctx.tenant_id,
50
+ "principal_id": ctx.principal_id,
51
+ "scopes": sorted(ctx.scopes),
52
+ "namespaces": sorted(ctx.namespaces) if ctx.namespaces is not None else None,
53
+ }
54
+
55
+ @mcp.tool
56
+ async def fs_list(namespace: str, prefix: str = "", limit: int = 100) -> dict[str, Any]:
57
+ """List catalog entries in a namespace under an optional path prefix.
58
+
59
+ Start here to discover documents, then fs_read to fetch their text.
60
+ Returns up to `limit` entries and a `next_cursor` to page further.
61
+ """
62
+ return await _result(
63
+ fs.list_entries(resolve_context(settings), namespace, prefix=prefix, limit=limit)
64
+ )
65
+
66
+ @mcp.tool
67
+ async def fs_stat(namespace: str, path: str) -> dict[str, Any]:
68
+ """Return one document's catalog record (size, title, extraction status…)."""
69
+ return await _result(fs.stat(resolve_context(settings), namespace, path))
70
+
71
+ @mcp.tool
72
+ async def fs_read(
73
+ namespace: str, path: str, start_page: int = 1, end_page: int | None = None
74
+ ) -> dict[str, Any]:
75
+ """Read a bounded page range (<= 20 pages) of a document's extracted text.
76
+
77
+ A `catalog_only` document exists and is citeable but isn't readable yet —
78
+ you'll get a tool error saying so; you can still reference it by path.
79
+ """
80
+ return await _result(
81
+ fs.read(
82
+ resolve_context(settings), namespace, path, start_page=start_page, end_page=end_page
83
+ )
84
+ )
85
+
86
+ return mcp
File without changes
@@ -0,0 +1 @@
1
+ """HTTP routers."""