microfish 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
microfish/__init__.py ADDED
File without changes
microfish/auth.py ADDED
@@ -0,0 +1,92 @@
1
+ import contextlib
2
+ import hmac
3
+ from collections.abc import Iterator
4
+ from contextvars import ContextVar
5
+
6
+ from starlette.middleware import Middleware
7
+ from starlette.middleware.base import BaseHTTPMiddleware
8
+
9
+ from microfish.settings import Settings
10
+
11
+ current_tinyfish_api_key: ContextVar[str | None] = ContextVar(
12
+ "current_tinyfish_api_key",
13
+ default=None,
14
+ )
15
+ current_mcp_authenticated: ContextVar[bool] = ContextVar(
16
+ "current_mcp_authenticated",
17
+ default=False,
18
+ )
19
+
20
+
21
+ class AuthenticationError(ValueError):
22
+ pass
23
+
24
+
25
+ def parse_bearer_token(authorization: str | None) -> str | None:
26
+ if not authorization:
27
+ return None
28
+ scheme, separator, token = authorization.strip().partition(" ")
29
+ if separator != " " or scheme.lower() != "bearer":
30
+ return None
31
+ cleaned = token.strip()
32
+ return cleaned or None
33
+
34
+
35
+ def require_api_key() -> str:
36
+ token = current_tinyfish_api_key.get()
37
+ if not token:
38
+ raise AuthenticationError("Missing Authorization Bearer token")
39
+ return token
40
+
41
+
42
+ def require_mcp_authentication() -> None:
43
+ if not current_mcp_authenticated.get():
44
+ raise AuthenticationError("Missing or invalid Authorization Bearer token")
45
+
46
+
47
+ @contextlib.contextmanager
48
+ def local_mcp_authentication() -> Iterator[None]:
49
+ tinyfish_context = current_tinyfish_api_key.set(None)
50
+ mcp_context = current_mcp_authenticated.set(True)
51
+ try:
52
+ yield
53
+ finally:
54
+ current_tinyfish_api_key.reset(tinyfish_context)
55
+ current_mcp_authenticated.reset(mcp_context)
56
+
57
+
58
+ def mask_token(token: str | None) -> str:
59
+ if not token:
60
+ return "<empty>"
61
+ if len(token) <= 8:
62
+ return "<redacted>"
63
+ return f"{token[:4]}...{token[-4:]}"
64
+
65
+
66
+ class BearerTokenMiddleware(BaseHTTPMiddleware):
67
+ def __init__(self, app, settings: Settings) -> None:
68
+ super().__init__(app)
69
+ self.settings = settings
70
+
71
+ @classmethod
72
+ def as_starlette_middleware(cls, settings: Settings) -> Middleware:
73
+ return Middleware(cls, settings=settings)
74
+
75
+ async def dispatch(self, request, call_next):
76
+ token = parse_bearer_token(request.headers.get("authorization"))
77
+ tinyfish_context = current_tinyfish_api_key.set(None)
78
+ mcp_context = current_mcp_authenticated.set(False)
79
+ try:
80
+ if self.settings.polling_enabled:
81
+ expected = self.settings.mcp_auth_token
82
+ authenticated = expected is None or (
83
+ token is not None and hmac.compare_digest(token, expected)
84
+ )
85
+ current_mcp_authenticated.set(authenticated)
86
+ else:
87
+ current_tinyfish_api_key.set(token)
88
+ current_mcp_authenticated.set(token is not None)
89
+ return await call_next(request)
90
+ finally:
91
+ current_tinyfish_api_key.reset(tinyfish_context)
92
+ current_mcp_authenticated.reset(mcp_context)
microfish/server.py ADDED
@@ -0,0 +1,87 @@
1
+ import argparse
2
+ import contextlib
3
+
4
+ import uvicorn
5
+ from mcp.server.fastmcp import FastMCP
6
+ from starlette.applications import Starlette
7
+ from starlette.responses import JSONResponse
8
+ from starlette.routing import Mount, Route
9
+
10
+ from microfish.auth import BearerTokenMiddleware, local_mcp_authentication
11
+ from microfish.settings import Settings, load_settings
12
+ from microfish.tinyfish_client import TinyFishClient
13
+ from microfish.tools import TinyFishToolExecutor, register_tools
14
+
15
+
16
+ async def health_check(request):
17
+ return JSONResponse({"status": "ok"})
18
+
19
+
20
+ def create_mcp(settings: Settings, client: TinyFishClient | None = None) -> FastMCP:
21
+ mcp = FastMCP(
22
+ "microfish",
23
+ stateless_http=True,
24
+ json_response=True,
25
+ streamable_http_path=settings.mcp_path,
26
+ )
27
+ resolved_client = client or TinyFishClient.from_settings(settings)
28
+ register_tools(mcp, TinyFishToolExecutor(settings, resolved_client))
29
+ return mcp
30
+
31
+
32
+ def create_app(settings: Settings | None = None, client: TinyFishClient | None = None) -> Starlette:
33
+ resolved_settings = settings or load_settings()
34
+ mcp = create_mcp(resolved_settings, client)
35
+
36
+ @contextlib.asynccontextmanager
37
+ async def lifespan(app):
38
+ async with mcp.session_manager.run():
39
+ yield
40
+
41
+ return Starlette(
42
+ routes=[
43
+ Route("/health", health_check, methods=["GET"]),
44
+ Mount("/", app=mcp.streamable_http_app()),
45
+ ],
46
+ middleware=[BearerTokenMiddleware.as_starlette_middleware(resolved_settings)],
47
+ lifespan=lifespan,
48
+ )
49
+
50
+
51
+ def parse_args() -> argparse.Namespace:
52
+ parser = argparse.ArgumentParser(prog="microfish")
53
+ parser.add_argument("--transport", choices=("http", "stdio"), default=None)
54
+ return parser.parse_args()
55
+
56
+
57
+ def settings_from_args() -> Settings:
58
+ settings = load_settings()
59
+ args = parse_args()
60
+ if args.transport is None:
61
+ return settings
62
+ return settings.model_copy(update={"transport": args.transport})
63
+
64
+
65
+ def run_http(settings: Settings) -> None:
66
+ uvicorn.run(
67
+ create_app(settings),
68
+ host=settings.host,
69
+ port=settings.port,
70
+ log_level=settings.log_level,
71
+ )
72
+
73
+
74
+ def run_stdio(settings: Settings) -> None:
75
+ if not settings.polling_enabled:
76
+ raise RuntimeError("stdio transport requires TINYFISH_KEYS")
77
+ mcp = create_mcp(settings)
78
+ with local_mcp_authentication():
79
+ mcp.run(transport="stdio")
80
+
81
+
82
+ def main() -> None:
83
+ settings = settings_from_args()
84
+ if settings.transport == "stdio":
85
+ run_stdio(settings)
86
+ return
87
+ run_http(settings)
microfish/settings.py ADDED
@@ -0,0 +1,65 @@
1
+ from typing import Annotated, Literal
2
+
3
+ from pydantic import Field, field_validator
4
+ from pydantic_settings import BaseSettings, NoDecode, SettingsConfigDict
5
+
6
+ TransportMode = Literal["http", "stdio"]
7
+
8
+
9
+ class Settings(BaseSettings):
10
+ model_config = SettingsConfigDict(env_prefix="MICROFISH_", populate_by_name=True)
11
+
12
+ host: str = "0.0.0.0"
13
+ port: int = 8000
14
+ mcp_path: str = "/mcp"
15
+ transport: TransportMode = "http"
16
+ tinyfish_search_url: str = "https://api.search.tinyfish.ai"
17
+ tinyfish_fetch_url: str = "https://api.fetch.tinyfish.ai"
18
+ request_timeout_seconds: float = Field(default=30.0, gt=0)
19
+ log_level: str = "info"
20
+ tinyfish_keys: Annotated[list[str], NoDecode] = Field(
21
+ default_factory=list,
22
+ validation_alias="TINYFISH_KEYS",
23
+ repr=False,
24
+ )
25
+ mcp_auth_token: str | None = Field(
26
+ default=None,
27
+ validation_alias="MCP_AUTH_TOKEN",
28
+ repr=False,
29
+ )
30
+
31
+ @field_validator("tinyfish_keys", mode="before")
32
+ @classmethod
33
+ def parse_tinyfish_keys(cls, value: object) -> list[str]:
34
+ if value is None:
35
+ return []
36
+ if isinstance(value, str):
37
+ return [key.strip() for key in value.split(",") if key.strip()]
38
+ if isinstance(value, list):
39
+ return [str(key).strip() for key in value if str(key).strip()]
40
+ return []
41
+
42
+ @field_validator("mcp_auth_token", mode="before")
43
+ @classmethod
44
+ def normalize_mcp_auth_token(cls, value: object) -> str | None:
45
+ if value is None:
46
+ return None
47
+ cleaned = str(value).strip()
48
+ return cleaned or None
49
+
50
+ @field_validator("transport", mode="before")
51
+ @classmethod
52
+ def normalize_transport(cls, value: object) -> object:
53
+ if value is None:
54
+ return "http"
55
+ if isinstance(value, str):
56
+ return value.strip().lower()
57
+ return value
58
+
59
+ @property
60
+ def polling_enabled(self) -> bool:
61
+ return bool(self.tinyfish_keys)
62
+
63
+
64
+ def load_settings() -> Settings:
65
+ return Settings()
@@ -0,0 +1,209 @@
1
+ from typing import Any, Literal
2
+
3
+ import httpx
4
+ from pydantic import BaseModel, Field, HttpUrl, field_validator
5
+
6
+ from microfish.settings import Settings
7
+
8
+
9
+ class TinyFishApiError(RuntimeError):
10
+ def __init__(self, status_code: int, code: str, message: str) -> None:
11
+ super().__init__(f"TinyFish API error {status_code} {code}: {message}")
12
+ self.status_code = status_code
13
+ self.code = code
14
+ self.message = message
15
+
16
+ def to_payload(self) -> dict[str, Any]:
17
+ return {
18
+ "ok": False,
19
+ "error": {
20
+ "status_code": self.status_code,
21
+ "code": self.code,
22
+ "message": self.message,
23
+ },
24
+ }
25
+
26
+
27
+ class SearchRequest(BaseModel):
28
+ query: str = Field(
29
+ min_length=1,
30
+ max_length=2000,
31
+ description="Search query for TinyFish Search.",
32
+ )
33
+ location: str | None = Field(
34
+ default=None,
35
+ description="Country code for geo-targeted results.",
36
+ )
37
+ language: str | None = Field(
38
+ default=None,
39
+ description="Language code for result language.",
40
+ )
41
+ page: int = Field(
42
+ default=0,
43
+ ge=0,
44
+ le=10,
45
+ description="Page number for pagination, starting from 0.",
46
+ )
47
+ include_thumbnail: bool = Field(
48
+ default=False,
49
+ description="When true, include thumbnail_url in search results when available.",
50
+ )
51
+
52
+ def to_query_params(self) -> dict[str, Any]:
53
+ params = self.model_dump(exclude_none=True)
54
+ params["include_thumbnail"] = "true" if self.include_thumbnail else "false"
55
+ return params
56
+
57
+
58
+ class FetchRequest(BaseModel):
59
+ urls: list[HttpUrl] = Field(
60
+ min_length=1,
61
+ max_length=10,
62
+ description="Array of URLs to fetch. Each URL is processed independently.",
63
+ )
64
+ format: Literal["markdown", "html", "json"] = Field(
65
+ default="markdown",
66
+ description="Output format for extracted content.",
67
+ )
68
+ include_html_head: bool = Field(
69
+ default=False,
70
+ description="When true and format is html, include a complete HTML document head.",
71
+ )
72
+ links: bool = Field(
73
+ default=False,
74
+ description="Extract outbound links from each page.",
75
+ )
76
+ image_links: bool = Field(
77
+ default=False,
78
+ description="Extract image URLs from each page.",
79
+ )
80
+
81
+ @field_validator("urls")
82
+ @classmethod
83
+ def reject_duplicate_urls(cls, urls: list[HttpUrl]) -> list[HttpUrl]:
84
+ normalized = [str(url) for url in urls]
85
+ if len(normalized) != len(set(normalized)):
86
+ raise ValueError("urls must not contain duplicates")
87
+ return urls
88
+
89
+
90
+ UsageStatus = Literal["completed", "failed"]
91
+
92
+
93
+ class SearchUsageRequest(BaseModel):
94
+ start_after: str | None = Field(
95
+ default=None, description="Return records created after this time."
96
+ )
97
+ end_before: str | None = Field(
98
+ default=None, description="Return records created before this time."
99
+ )
100
+ status: UsageStatus | None = Field(
101
+ default=None, description="Filter by completed or failed status."
102
+ )
103
+ limit: int = Field(
104
+ default=100, ge=1, le=1000, description="Page size for search usage records."
105
+ )
106
+ page: int = Field(default=1, ge=1, description="Usage page number, starting from 1.")
107
+
108
+
109
+ class FetchUsageRequest(BaseModel):
110
+ start_after: str | None = Field(
111
+ default=None, description="Return records created after this time."
112
+ )
113
+ end_before: str | None = Field(
114
+ default=None, description="Return records created before this time."
115
+ )
116
+ status: UsageStatus | None = Field(
117
+ default=None, description="Filter by completed or failed status."
118
+ )
119
+ limit: int = Field(default=20, ge=1, le=100, description="Page size for fetch usage records.")
120
+ page: int = Field(default=1, ge=1, description="Usage page number, starting from 1.")
121
+
122
+
123
+ class TinyFishClient:
124
+ def __init__(
125
+ self,
126
+ search_url: str,
127
+ fetch_url: str,
128
+ timeout_seconds: float,
129
+ http_client: httpx.AsyncClient | None = None,
130
+ ) -> None:
131
+ self.search_url = search_url.rstrip("/")
132
+ self.fetch_url = fetch_url.rstrip("/")
133
+ self.timeout = httpx.Timeout(timeout_seconds)
134
+ self.http_client = http_client
135
+
136
+ @classmethod
137
+ def from_settings(cls, settings: Settings) -> "TinyFishClient":
138
+ return cls(
139
+ search_url=settings.tinyfish_search_url,
140
+ fetch_url=settings.tinyfish_fetch_url,
141
+ timeout_seconds=settings.request_timeout_seconds,
142
+ )
143
+
144
+ async def search(self, api_key: str, request: SearchRequest) -> dict[str, Any]:
145
+ return await self._request(
146
+ "GET",
147
+ self.search_url,
148
+ api_key,
149
+ params=request.to_query_params(),
150
+ )
151
+
152
+ async def fetch_content(self, api_key: str, request: FetchRequest) -> dict[str, Any]:
153
+ payload = request.model_dump(mode="json")
154
+ return await self._request("POST", self.fetch_url, api_key, json=payload)
155
+
156
+ async def get_search_usage(self, api_key: str, request: SearchUsageRequest) -> dict[str, Any]:
157
+ return await self._request(
158
+ "GET",
159
+ f"{self.search_url}/usage",
160
+ api_key,
161
+ params=request.model_dump(exclude_none=True),
162
+ )
163
+
164
+ async def list_fetch_usage(self, api_key: str, request: FetchUsageRequest) -> dict[str, Any]:
165
+ return await self._request(
166
+ "GET",
167
+ f"{self.fetch_url}/usage",
168
+ api_key,
169
+ params=request.model_dump(exclude_none=True),
170
+ )
171
+
172
+ async def _request(self, method: str, url: str, api_key: str, **kwargs: Any) -> dict[str, Any]:
173
+ headers = {"X-API-Key": api_key}
174
+ client = self.http_client or httpx.AsyncClient(timeout=self.timeout)
175
+ close_client = self.http_client is None
176
+ try:
177
+ response = await client.request(method, url, headers=headers, **kwargs)
178
+ return self._parse_response(response)
179
+ except httpx.TimeoutException as exc:
180
+ raise TinyFishApiError(504, "TIMEOUT", "TinyFish request timed out") from exc
181
+ except httpx.HTTPError as exc:
182
+ raise TinyFishApiError(502, "UPSTREAM_ERROR", str(exc)) from exc
183
+ finally:
184
+ if close_client:
185
+ await client.aclose()
186
+
187
+ def _parse_response(self, response: httpx.Response) -> dict[str, Any]:
188
+ try:
189
+ payload = response.json()
190
+ except ValueError as exc:
191
+ raise TinyFishApiError(
192
+ response.status_code, "INVALID_JSON", response.text[:200]
193
+ ) from exc
194
+
195
+ if response.is_error:
196
+ error = payload.get("error") if isinstance(payload, dict) else None
197
+ code = str(error.get("code", "HTTP_ERROR")) if isinstance(error, dict) else "HTTP_ERROR"
198
+ message = (
199
+ str(error.get("message", response.reason_phrase))
200
+ if isinstance(error, dict)
201
+ else response.reason_phrase
202
+ )
203
+ raise TinyFishApiError(response.status_code, code, message)
204
+
205
+ if not isinstance(payload, dict):
206
+ raise TinyFishApiError(
207
+ response.status_code, "INVALID_RESPONSE", "Expected a JSON object"
208
+ )
209
+ return {"ok": True, "data": payload}
@@ -0,0 +1,167 @@
1
+ from dataclasses import dataclass
2
+
3
+ FREE_TOOL_NAMES = frozenset(
4
+ {
5
+ "search",
6
+ "fetch_content",
7
+ "get_search_usage",
8
+ "list_fetch_usage",
9
+ }
10
+ )
11
+
12
+ BLOCKED_TOOL_NAMES = frozenset(
13
+ {
14
+ "run_web_automation",
15
+ "run_web_automation_async",
16
+ "get_run",
17
+ "list_runs",
18
+ "cancel_run",
19
+ "poll_status",
20
+ "get_steps",
21
+ "discover_run",
22
+ "batch_create",
23
+ "batch_status",
24
+ "batch_cancel",
25
+ "create_browser_session",
26
+ "list_browser_sessions",
27
+ }
28
+ )
29
+
30
+
31
+ @dataclass(frozen=True)
32
+ class ToolDecision:
33
+ name: str
34
+ group: str
35
+ retained: bool
36
+ evidence: str
37
+ risk: str
38
+
39
+
40
+ TOOL_MATRIX = (
41
+ ToolDecision(
42
+ "search",
43
+ "Web Search",
44
+ True,
45
+ "TinyFish Search API is documented as a free ranked search endpoint",
46
+ "low",
47
+ ),
48
+ ToolDecision(
49
+ "fetch_content",
50
+ "Content Extraction",
51
+ True,
52
+ "TinyFish Fetch API is documented as a free content extraction endpoint "
53
+ "supporting up to 10 URLs",
54
+ "low",
55
+ ),
56
+ ToolDecision(
57
+ "get_search_usage",
58
+ "Web Search",
59
+ True,
60
+ "Search usage endpoint lists historical Search API records without starting automation",
61
+ "low",
62
+ ),
63
+ ToolDecision(
64
+ "list_fetch_usage",
65
+ "Content Extraction",
66
+ True,
67
+ "Fetch usage endpoint lists historical Fetch API records without starting automation",
68
+ "low",
69
+ ),
70
+ ToolDecision(
71
+ "run_web_automation",
72
+ "Agent Automation",
73
+ False,
74
+ "Agent automation is a credits-backed surface outside the Search and Fetch gateway",
75
+ "blocked",
76
+ ),
77
+ ToolDecision(
78
+ "run_web_automation_async",
79
+ "Agent Automation",
80
+ False,
81
+ "Async Agent automation is a credits-backed surface outside the Search and Fetch gateway",
82
+ "blocked",
83
+ ),
84
+ ToolDecision(
85
+ "get_run",
86
+ "Automation Run Tracking",
87
+ False,
88
+ "Automation run metadata belongs to the Agent surface and is not a Search or Fetch API",
89
+ "blocked",
90
+ ),
91
+ ToolDecision(
92
+ "list_runs",
93
+ "Automation Run Tracking",
94
+ False,
95
+ "Automation run listing belongs to the Agent surface and is not a Search or Fetch API",
96
+ "blocked",
97
+ ),
98
+ ToolDecision(
99
+ "cancel_run",
100
+ "Automation Run Tracking",
101
+ False,
102
+ "Automation run cancellation controls Agent jobs and exceeds the free gateway boundary",
103
+ "blocked",
104
+ ),
105
+ ToolDecision(
106
+ "poll_status",
107
+ "Automation Run Tracking",
108
+ False,
109
+ "Polling run status depends on Agent automation runs and must not be exposed by microfish",
110
+ "blocked",
111
+ ),
112
+ ToolDecision(
113
+ "get_steps",
114
+ "Automation Run Tracking",
115
+ False,
116
+ "Run step inspection depends on Agent automation runs and must not be exposed by microfish",
117
+ "blocked",
118
+ ),
119
+ ToolDecision(
120
+ "discover_run",
121
+ "Automation Run Tracking",
122
+ False,
123
+ "Run discovery depends on Agent automation sessions and must not be exposed by microfish",
124
+ "blocked",
125
+ ),
126
+ ToolDecision(
127
+ "batch_create",
128
+ "Batch Automation",
129
+ False,
130
+ "Batch creation starts automation work outside the Search and Fetch gateway",
131
+ "blocked",
132
+ ),
133
+ ToolDecision(
134
+ "batch_status",
135
+ "Batch Automation",
136
+ False,
137
+ "Batch status reads automation work outside the Search and Fetch gateway",
138
+ "blocked",
139
+ ),
140
+ ToolDecision(
141
+ "batch_cancel",
142
+ "Batch Automation",
143
+ False,
144
+ "Batch cancellation controls automation work outside the Search and Fetch gateway",
145
+ "blocked",
146
+ ),
147
+ ToolDecision(
148
+ "create_browser_session",
149
+ "Browser Sessions",
150
+ False,
151
+ "Browser sessions belong to the Browser API, "
152
+ "which is outside the free Search and Fetch surfaces",
153
+ "blocked",
154
+ ),
155
+ ToolDecision(
156
+ "list_browser_sessions",
157
+ "Browser Sessions",
158
+ False,
159
+ "Browser session listing belongs to the Browser API, "
160
+ "which is outside the free Search and Fetch surfaces",
161
+ "blocked",
162
+ ),
163
+ )
164
+
165
+
166
+ def retained_tool_names() -> set[str]:
167
+ return {decision.name for decision in TOOL_MATRIX if decision.retained}
microfish/tools.py ADDED
@@ -0,0 +1,237 @@
1
+ import asyncio
2
+ from collections.abc import Awaitable, Callable
3
+ from typing import Annotated, Any, Literal
4
+
5
+ from mcp.server.fastmcp import FastMCP
6
+ from pydantic import Field, ValidationError
7
+
8
+ from microfish.auth import AuthenticationError, require_api_key, require_mcp_authentication
9
+ from microfish.settings import Settings
10
+ from microfish.tinyfish_client import (
11
+ FetchRequest,
12
+ FetchUsageRequest,
13
+ SearchRequest,
14
+ SearchUsageRequest,
15
+ TinyFishApiError,
16
+ TinyFishClient,
17
+ )
18
+ from microfish.tool_policy import FREE_TOOL_NAMES, retained_tool_names
19
+
20
+
21
+ def error_payload(code: str, message: str, details: Any | None = None) -> dict[str, Any]:
22
+ return {"ok": False, "error": {"code": code, "message": message, "details": details}}
23
+
24
+
25
+ def safe_call_error(exc: Exception) -> dict[str, Any]:
26
+ if isinstance(exc, AuthenticationError):
27
+ return error_payload("AUTHENTICATION_REQUIRED", str(exc))
28
+ if isinstance(exc, ValidationError):
29
+ return error_payload("INVALID_INPUT", "Input validation failed", exc.errors())
30
+ if isinstance(exc, TinyFishApiError):
31
+ return exc.to_payload()
32
+ return error_payload("INTERNAL_ERROR", "Unexpected tool error")
33
+
34
+
35
+ def assert_policy_consistent() -> None:
36
+ retained = retained_tool_names()
37
+ if retained != FREE_TOOL_NAMES:
38
+ raise RuntimeError("Tool policy and allowlist are inconsistent")
39
+
40
+
41
+ class TinyFishKeyPool:
42
+ def __init__(self, keys: list[str], max_extra_retries: int = 3) -> None:
43
+ if not keys:
44
+ raise ValueError("TinyFish key pool requires at least one key")
45
+ self._keys = keys
46
+ self._max_attempts = min(len(keys), max_extra_retries + 1)
47
+ self._next_index = 0
48
+ self._lock = asyncio.Lock()
49
+
50
+ async def next_attempt_keys(self) -> list[str]:
51
+ async with self._lock:
52
+ start_index = self._next_index
53
+ self._next_index = (self._next_index + 1) % len(self._keys)
54
+ return [
55
+ self._keys[(start_index + offset) % len(self._keys)]
56
+ for offset in range(self._max_attempts)
57
+ ]
58
+
59
+
60
+ class TinyFishToolExecutor:
61
+ def __init__(self, settings: Settings, client: TinyFishClient) -> None:
62
+ self.settings = settings
63
+ self.client = client
64
+ self.key_pool = (
65
+ TinyFishKeyPool(settings.tinyfish_keys) if settings.polling_enabled else None
66
+ )
67
+
68
+ async def run(self, operation: Callable[[str], Awaitable[dict[str, Any]]]) -> dict[str, Any]:
69
+ if not self.settings.polling_enabled:
70
+ return await operation(require_api_key())
71
+
72
+ require_mcp_authentication()
73
+ if self.key_pool is None:
74
+ raise AuthenticationError("TinyFish key pool is not configured")
75
+
76
+ last_error: TinyFishApiError | None = None
77
+ for api_key in await self.key_pool.next_attempt_keys():
78
+ try:
79
+ return await operation(api_key)
80
+ except TinyFishApiError as exc:
81
+ last_error = exc
82
+
83
+ if last_error is not None:
84
+ raise last_error
85
+ raise TinyFishApiError(502, "UPSTREAM_ERROR", "TinyFish request failed")
86
+
87
+ async def search(self, request: SearchRequest) -> dict[str, Any]:
88
+ return await self.run(lambda api_key: self.client.search(api_key, request))
89
+
90
+ async def fetch_content(self, request: FetchRequest) -> dict[str, Any]:
91
+ return await self.run(lambda api_key: self.client.fetch_content(api_key, request))
92
+
93
+ async def get_search_usage(self, request: SearchUsageRequest) -> dict[str, Any]:
94
+ return await self.run(lambda api_key: self.client.get_search_usage(api_key, request))
95
+
96
+ async def list_fetch_usage(self, request: FetchUsageRequest) -> dict[str, Any]:
97
+ return await self.run(lambda api_key: self.client.list_fetch_usage(api_key, request))
98
+
99
+
100
+ def register_tools(mcp: FastMCP, executor: TinyFishToolExecutor) -> None:
101
+ assert_policy_consistent()
102
+
103
+ UsageStatus = Literal["completed", "failed"]
104
+
105
+ @mcp.tool(name="search")
106
+ async def search(
107
+ query: Annotated[
108
+ str,
109
+ Field(
110
+ min_length=1,
111
+ max_length=2000,
112
+ description="Search query, up to 2000 characters.",
113
+ ),
114
+ ],
115
+ location: Annotated[
116
+ str | None,
117
+ Field(description="Optional country code for geo-targeted results."),
118
+ ] = None,
119
+ language: Annotated[
120
+ str | None,
121
+ Field(description="Optional language code for result language."),
122
+ ] = None,
123
+ page: Annotated[
124
+ int,
125
+ Field(ge=0, le=10, description="Search result page number, starting from 0."),
126
+ ] = 0,
127
+ include_thumbnail: Annotated[
128
+ bool,
129
+ Field(description="Include thumbnail_url in results when TinyFish has one."),
130
+ ] = False,
131
+ ) -> dict[str, Any]:
132
+ """Search the web through the free TinyFish Search API.
133
+
134
+ Returns ranked titles, snippets, URLs, and optional thumbnail URLs.
135
+ This tool does not start Agent, Browser, batch, or run lifecycle APIs.
136
+ """
137
+ try:
138
+ request = SearchRequest(
139
+ query=query,
140
+ location=location,
141
+ language=language,
142
+ page=page,
143
+ include_thumbnail=include_thumbnail,
144
+ )
145
+ return await executor.search(request)
146
+ except Exception as exc:
147
+ return safe_call_error(exc)
148
+
149
+ @mcp.tool(name="fetch_content")
150
+ async def fetch_content(
151
+ urls: Annotated[
152
+ list[str],
153
+ Field(min_length=1, max_length=10, description="One to ten URLs to fetch."),
154
+ ],
155
+ format: Annotated[
156
+ Literal["markdown", "html", "json"],
157
+ Field(description="Output format for extracted content."),
158
+ ] = "markdown",
159
+ include_html_head: Annotated[
160
+ bool,
161
+ Field(description="When format is html, include a complete document head."),
162
+ ] = False,
163
+ links: Annotated[
164
+ bool,
165
+ Field(description="Extract outbound links from each page."),
166
+ ] = False,
167
+ image_links: Annotated[
168
+ bool,
169
+ Field(description="Extract image URLs from each page."),
170
+ ] = False,
171
+ ) -> dict[str, Any]:
172
+ """Fetch and extract clean content through the free TinyFish Fetch API.
173
+
174
+ Each URL is processed independently; per-URL failures appear in errors.
175
+ This tool does not create browser sessions or run web automation.
176
+ """
177
+ try:
178
+ request = FetchRequest(
179
+ urls=urls,
180
+ format=format,
181
+ include_html_head=include_html_head,
182
+ links=links,
183
+ image_links=image_links,
184
+ )
185
+ return await executor.fetch_content(request)
186
+ except Exception as exc:
187
+ return safe_call_error(exc)
188
+
189
+ @mcp.tool(name="get_search_usage")
190
+ async def get_search_usage(
191
+ start_after: Annotated[
192
+ str | None, Field(description="Optional ISO datetime lower bound.")
193
+ ] = None,
194
+ end_before: Annotated[
195
+ str | None, Field(description="Optional ISO datetime upper bound.")
196
+ ] = None,
197
+ status: Annotated[UsageStatus | None, Field(description="completed or failed.")] = None,
198
+ limit: Annotated[int, Field(ge=1, le=1000, description="Search usage page size.")] = 100,
199
+ page: Annotated[int, Field(ge=1, description="Usage page number, starting from 1.")] = 1,
200
+ ) -> dict[str, Any]:
201
+ """List Search API usage records for audit and troubleshooting."""
202
+ try:
203
+ request = SearchUsageRequest(
204
+ start_after=start_after,
205
+ end_before=end_before,
206
+ status=status,
207
+ limit=limit,
208
+ page=page,
209
+ )
210
+ return await executor.get_search_usage(request)
211
+ except Exception as exc:
212
+ return safe_call_error(exc)
213
+
214
+ @mcp.tool(name="list_fetch_usage")
215
+ async def list_fetch_usage(
216
+ start_after: Annotated[
217
+ str | None, Field(description="Optional ISO datetime lower bound.")
218
+ ] = None,
219
+ end_before: Annotated[
220
+ str | None, Field(description="Optional ISO datetime upper bound.")
221
+ ] = None,
222
+ status: Annotated[UsageStatus | None, Field(description="completed or failed.")] = None,
223
+ limit: Annotated[int, Field(ge=1, le=100, description="Fetch usage page size.")] = 20,
224
+ page: Annotated[int, Field(ge=1, description="Usage page number, starting from 1.")] = 1,
225
+ ) -> dict[str, Any]:
226
+ """List Fetch API usage records for audit and troubleshooting."""
227
+ try:
228
+ request = FetchUsageRequest(
229
+ start_after=start_after,
230
+ end_before=end_before,
231
+ status=status,
232
+ limit=limit,
233
+ page=page,
234
+ )
235
+ return await executor.list_fetch_usage(request)
236
+ except Exception as exc:
237
+ return safe_call_error(exc)
@@ -0,0 +1,185 @@
1
+ Metadata-Version: 2.4
2
+ Name: microfish
3
+ Version: 0.1.0
4
+ Summary: A restricted TinyFish MCP gateway for free search and fetch capabilities.
5
+ License-File: LICENSE
6
+ Requires-Python: >=3.11
7
+ Requires-Dist: httpx<1,>=0.27
8
+ Requires-Dist: mcp<2,>=1.12
9
+ Requires-Dist: pydantic-settings<3,>=2.4
10
+ Requires-Dist: pydantic<3,>=2.8
11
+ Requires-Dist: starlette<1,>=0.37
12
+ Requires-Dist: uvicorn[standard]<1,>=0.30
13
+ Description-Content-Type: text/markdown
14
+
15
+ <img src=".github/assets/microfish-logo.png" width="128" vertical-align="middle">
16
+
17
+ # microfish
18
+
19
+ 中文文档: [README_cn.md](README_cn.md)
20
+
21
+ microfish is a restricted TinyFish MCP gateway. It exposes only the allowlisted TinyFish Search and Fetch related tools.
22
+
23
+ ## Tools
24
+
25
+ Retained tools:
26
+ - search
27
+ - fetch_content
28
+ - get_search_usage
29
+ - list_fetch_usage
30
+
31
+ Blocked tool groups:
32
+ - Agent automation
33
+ - Batch automation
34
+ - Browser sessions
35
+
36
+ ## Authentication and running modes
37
+
38
+ microfish only exposes TinyFish Search and Fetch related APIs. TinyFish Agent, Browser, batch, and run lifecycle APIs are intentionally not registered.
39
+
40
+ ### Get a TinyFish API key
41
+
42
+ Generate your API key at https://agent.tinyfish.ai/api-keys.
43
+
44
+ ### Client-owned single key
45
+
46
+ Leave `TINYFISH_KEYS` unset. Each MCP client sends `Authorization: Bearer <YOUR_TINYFISH_API_KEY>`. microfish forwards that value to TinyFish as `X-API-Key` for the current request only.
47
+
48
+ ### Server-managed single key
49
+
50
+ Set `TINYFISH_KEYS` to one TinyFish API key. MCP clients do not receive the TinyFish key. If `MCP_AUTH_TOKEN` is set, clients send `Authorization: Bearer <YOUR_MCP_AUTH_TOKEN>`; if `MCP_AUTH_TOKEN` is unset, the MCP entrypoint is not protected by a bearer token.
51
+
52
+ ### Server-managed key pool
53
+
54
+ Set `TINYFISH_KEYS` to multiple comma-separated TinyFish API keys. microfish assigns requests in order. When a whole upstream request fails, it tries the next key, stopping after all available keys for that call are tried or after three extra retries.
55
+
56
+ ## Server configuration
57
+
58
+ microfish reads runtime settings from environment variables:
59
+
60
+ - `MICROFISH_HOST`: bind host for the HTTP server. Defaults to `0.0.0.0`.
61
+ - `MICROFISH_PORT`: bind port for the HTTP server. Defaults to `8000`.
62
+ - `MICROFISH_MCP_PATH`: HTTP path that exposes the MCP entrypoint. Defaults to `/mcp`.
63
+ - `MICROFISH_TRANSPORT`: transport for the server. Use http for the HTTP service or stdio for local coding agent subprocesses. Defaults to http.
64
+ - `TINYFISH_KEYS`: comma-separated TinyFish API keys; presence selects server-managed mode.
65
+ - `MCP_AUTH_TOKEN`: optional bearer token required from MCP clients in server-managed mode.
66
+
67
+ ## Client configuration
68
+
69
+ microfish supports two transports:
70
+ - **HTTP transport** (`MICROFISH_TRANSPORT=http`, the default): run microfish as an HTTP service and connect clients to `http://localhost:8000/mcp`.
71
+ - **stdio transport** (`MICROFISH_TRANSPORT=stdio` or `--transport stdio`): launch `uvx microfish --transport stdio` as a local subprocess for coding agents.
72
+
73
+ For the HTTP transport, the value of `Authorization: Bearer` depends on your running mode:
74
+ - **Client-owned single key**: set it to your TinyFish API key.
75
+ - **Server-managed single/multiple keys with `MCP_AUTH_TOKEN`**: set it to the MCP auth token.
76
+ - **Server-managed keys without `MCP_AUTH_TOKEN`**: omit the Authorization header entirely.
77
+
78
+ The stdio transport requires `TINYFISH_KEYS` because there is no separate Authorization header on local subprocess pipes.
79
+
80
+ ### Claude Code
81
+
82
+ HTTP transport:
83
+
84
+ ```bash
85
+ # Without auth header
86
+ claude mcp add --transport http microfish http://localhost:8000/mcp
87
+
88
+ # With auth header
89
+ claude mcp add --transport http microfish http://localhost:8000/mcp \
90
+ --header "Authorization: Bearer <YOUR_MCP_OR_TINYFISH_TOKEN>"
91
+ ```
92
+
93
+ stdio transport:
94
+
95
+ ```bash
96
+ TINYFISH_KEYS=<YOUR_TINYFISH_API_KEY> \
97
+ claude mcp add microfish --env TINYFISH_KEYS -- uvx microfish --transport stdio
98
+ ```
99
+
100
+ ### Codex
101
+
102
+ HTTP transport:
103
+
104
+ ```toml
105
+ [mcp_servers.microfish]
106
+ url = "http://localhost:8000/mcp"
107
+ bearer_token_env_var = "MICROFISH_MCP_BEARER"
108
+ ```
109
+
110
+ Set `MICROFISH_MCP_BEARER` in your shell environment to your TinyFish API key (client-owned mode) or MCP auth token (server-managed mode).
111
+
112
+ stdio transport:
113
+
114
+ ```toml
115
+ [mcp_servers.microfish]
116
+ command = "uvx"
117
+ args = ["microfish", "--transport", "stdio"]
118
+ env = { TINYFISH_KEYS = "<YOUR_TINYFISH_API_KEY>" }
119
+ ```
120
+
121
+ ### Cursor
122
+
123
+ HTTP transport:
124
+
125
+ ```json
126
+ {
127
+ "mcpServers": {
128
+ "microfish": {
129
+ "url": "http://localhost:8000/mcp",
130
+ "headers": {
131
+ "Authorization": "Bearer ${env:MICROFISH_MCP_BEARER}"
132
+ }
133
+ }
134
+ }
135
+ }
136
+ ```
137
+
138
+ Set `MICROFISH_MCP_BEARER` in your environment to your TinyFish API key (client-owned mode) or MCP auth token (server-managed mode). If no auth token is required, remove the `headers` block.
139
+
140
+ stdio transport:
141
+
142
+ ```json
143
+ {
144
+ "mcpServers": {
145
+ "microfish": {
146
+ "command": "uvx",
147
+ "args": ["microfish", "--transport", "stdio"],
148
+ "env": {
149
+ "TINYFISH_KEYS": "<YOUR_TINYFISH_API_KEY>"
150
+ }
151
+ }
152
+ }
153
+ }
154
+ ```
155
+
156
+ ## Run locally
157
+
158
+ uv sync
159
+ uv run microfish
160
+
161
+ Or run directly without cloning via `uvx microfish`.
162
+
163
+ ## Docker
164
+
165
+ Two compose files are provided:
166
+ - `docker-compose.yml` pulls the published image `ghcr.io/vvtommy/microfish:${MICROFISH_IMAGE_TAG:-latest}` from GHCR.
167
+ - `docker-compose_build.yml` builds the local Dockerfile.
168
+
169
+ Both expose microfish on port 8000. Do not put TinyFish keys directly in compose files; pass them through your deployment environment.
170
+
171
+ ```bash
172
+ docker compose up -d
173
+ claude mcp add --transport http microfish http://localhost:8000/mcp \
174
+ --header "Authorization: Bearer <YOUR_MCP_OR_TINYFISH_TOKEN>"
175
+ ```
176
+
177
+ ## Releasing
178
+
179
+ Push a SemVer tag of the form `vX.Y.Z` to trigger publish workflows:
180
+ - `.github/workflows/pypi.yml` builds and publishes the Python package to PyPI via PyPI OIDC trusted publishing.
181
+ - `.github/workflows/docker.yml` builds and publishes `ghcr.io/vvtommy/microfish` Docker images with version tags and `latest`.
182
+
183
+ ## MCP endpoint
184
+
185
+ http://localhost:8000/mcp
@@ -0,0 +1,12 @@
1
+ microfish/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ microfish/auth.py,sha256=Jl_mS83-qMVsaMczgucRxVE5ixSRsPukHgHfj813PRc,2901
3
+ microfish/server.py,sha256=_8DUnPiMZnszsUv1Fqsy33woq6zgh1x200aKexW5JwU,2599
4
+ microfish/settings.py,sha256=MrVq4Yfy__F8x3z0FB2r6axuOvTO7UhzHS0qsK7C-H8,2039
5
+ microfish/tinyfish_client.py,sha256=XbEoHAyQxxAm_0x-dwYm86gblR7ycgpxD_RTvIQESnM,7359
6
+ microfish/tool_policy.py,sha256=zkJzfh9UyVrJW1ozRBR41vOOhChP7RwIfhz055hKQpw,4424
7
+ microfish/tools.py,sha256=yYAhXOhoEYQkEFD2v2f0ish86AaYmJoeWGMgi9VeiA0,9114
8
+ microfish-0.1.0.dist-info/METADATA,sha256=maWp-ok3NvfBaY-pRQpmUODfylchDb91LWu5Ekh77bg,5965
9
+ microfish-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
10
+ microfish-0.1.0.dist-info/entry_points.txt,sha256=2za57KKNs-41vdsWiCShQjZ0UC5b25IZetrS76pEFoA,52
11
+ microfish-0.1.0.dist-info/licenses/LICENSE,sha256=ObPR6ejmUmc7USfN0ozY7z8luJ_KBPf_MjYpA5HllMc,1063
12
+ microfish-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ microfish = microfish.server:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 vvtommy
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.