data-filter-mcp 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_filter_mcp-0.1.0/LICENSE +21 -0
- data_filter_mcp-0.1.0/PKG-INFO +74 -0
- data_filter_mcp-0.1.0/README.md +50 -0
- data_filter_mcp-0.1.0/data_filter_mcp/__init__.py +1 -0
- data_filter_mcp-0.1.0/data_filter_mcp/loaders/__init__.py +1 -0
- data_filter_mcp-0.1.0/data_filter_mcp/loaders/base.py +13 -0
- data_filter_mcp-0.1.0/data_filter_mcp/loaders/factory.py +49 -0
- data_filter_mcp-0.1.0/data_filter_mcp/loaders/json_loader.py +15 -0
- data_filter_mcp-0.1.0/data_filter_mcp/loaders/txt_loader.py +13 -0
- data_filter_mcp-0.1.0/data_filter_mcp/loaders/yaml_loader.py +16 -0
- data_filter_mcp-0.1.0/data_filter_mcp/models.py +54 -0
- data_filter_mcp-0.1.0/data_filter_mcp/registry.py +115 -0
- data_filter_mcp-0.1.0/data_filter_mcp/server.py +240 -0
- data_filter_mcp-0.1.0/data_filter_mcp/validator.py +287 -0
- data_filter_mcp-0.1.0/data_filter_mcp.egg-info/PKG-INFO +74 -0
- data_filter_mcp-0.1.0/data_filter_mcp.egg-info/SOURCES.txt +25 -0
- data_filter_mcp-0.1.0/data_filter_mcp.egg-info/dependency_links.txt +1 -0
- data_filter_mcp-0.1.0/data_filter_mcp.egg-info/entry_points.txt +2 -0
- data_filter_mcp-0.1.0/data_filter_mcp.egg-info/requires.txt +6 -0
- data_filter_mcp-0.1.0/data_filter_mcp.egg-info/top_level.txt +1 -0
- data_filter_mcp-0.1.0/pyproject.toml +45 -0
- data_filter_mcp-0.1.0/setup.cfg +4 -0
- data_filter_mcp-0.1.0/tests/test_loaders.py +45 -0
- data_filter_mcp-0.1.0/tests/test_mcp_tools.py +45 -0
- data_filter_mcp-0.1.0/tests/test_registry.py +70 -0
- data_filter_mcp-0.1.0/tests/test_server_flow.py +71 -0
- data_filter_mcp-0.1.0/tests/test_validator.py +168 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Alex Arakelyan
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: data-filter-mcp
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Local MCP server for running restricted Python text filters over files
|
|
5
|
+
Author: Alex Arakelyan
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/alxark/data-filter-mcp
|
|
8
|
+
Project-URL: Repository, https://github.com/alxark/data-filter-mcp
|
|
9
|
+
Project-URL: Issues, https://github.com/alxark/data-filter-mcp/issues
|
|
10
|
+
Keywords: mcp,model-context-protocol,filter,python,server
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Requires-Python: >=3.11
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Requires-Dist: mcp<2.0,>=1.12
|
|
19
|
+
Requires-Dist: pydantic<3,>=2
|
|
20
|
+
Requires-Dist: PyYAML<7,>=6
|
|
21
|
+
Provides-Extra: dev
|
|
22
|
+
Requires-Dist: pytest<9,>=8; extra == "dev"
|
|
23
|
+
Dynamic: license-file
|
|
24
|
+
|
|
25
|
+
# data-filter-mcp
|
|
26
|
+
|
|
27
|
+
Local MCP server that registers restricted Python filters and runs them against local `json`, `yaml`, and `txt` files.
|
|
28
|
+
|
|
29
|
+
## What it does
|
|
30
|
+
|
|
31
|
+
- `register_filter` accepts Python source code with exactly one top-level function: `def filter_item(data):`
|
|
32
|
+
- `run_filter` loads a local file, passes the loaded document into `filter_item(data)`, and returns the text from `result_text`
|
|
33
|
+
- Registered filters live only in memory and expire automatically based on server TTL settings
|
|
34
|
+
|
|
35
|
+
## Run with uvx
|
|
36
|
+
|
|
37
|
+
After publishing to PyPI, start the server with:
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
uvx data-filter-mcp --filter-ttl-seconds 3600 --cleanup-interval-seconds 60
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
Show the available CLI flags with:
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
uvx data-filter-mcp --help
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Example MCP client configuration:
|
|
50
|
+
|
|
51
|
+
```json
|
|
52
|
+
{
|
|
53
|
+
"mcpServers": {
|
|
54
|
+
"data-filter": {
|
|
55
|
+
"command": "uvx",
|
|
56
|
+
"args": [
|
|
57
|
+
"data-filter-mcp",
|
|
58
|
+
"--filter-ttl-seconds",
|
|
59
|
+
"3600",
|
|
60
|
+
"--cleanup-interval-seconds",
|
|
61
|
+
"60"
|
|
62
|
+
]
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Run locally
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
python server.py --filter-ttl-seconds 3600 --cleanup-interval-seconds 60
|
|
72
|
+
python -m data_filter_mcp.server --filter-ttl-seconds 3600 --cleanup-interval-seconds 60
|
|
73
|
+
.venv/bin/data-filter-mcp --filter-ttl-seconds 3600 --cleanup-interval-seconds 60
|
|
74
|
+
```
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# data-filter-mcp
|
|
2
|
+
|
|
3
|
+
Local MCP server that registers restricted Python filters and runs them against local `json`, `yaml`, and `txt` files.
|
|
4
|
+
|
|
5
|
+
## What it does
|
|
6
|
+
|
|
7
|
+
- `register_filter` accepts Python source code with exactly one top-level function: `def filter_item(data):`
|
|
8
|
+
- `run_filter` loads a local file, passes the loaded document into `filter_item(data)`, and returns the text from `result_text`
|
|
9
|
+
- Registered filters live only in memory and expire automatically based on server TTL settings
|
|
10
|
+
|
|
11
|
+
## Run with uvx
|
|
12
|
+
|
|
13
|
+
After publishing to PyPI, start the server with:
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
uvx data-filter-mcp --filter-ttl-seconds 3600 --cleanup-interval-seconds 60
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Show the available CLI flags with:
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
uvx data-filter-mcp --help
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Example MCP client configuration:
|
|
26
|
+
|
|
27
|
+
```json
|
|
28
|
+
{
|
|
29
|
+
"mcpServers": {
|
|
30
|
+
"data-filter": {
|
|
31
|
+
"command": "uvx",
|
|
32
|
+
"args": [
|
|
33
|
+
"data-filter-mcp",
|
|
34
|
+
"--filter-ttl-seconds",
|
|
35
|
+
"3600",
|
|
36
|
+
"--cleanup-interval-seconds",
|
|
37
|
+
"60"
|
|
38
|
+
]
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Run locally
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
python server.py --filter-ttl-seconds 3600 --cleanup-interval-seconds 60
|
|
48
|
+
python -m data_filter_mcp.server --filter-ttl-seconds 3600 --cleanup-interval-seconds 60
|
|
49
|
+
.venv/bin/data-filter-mcp --filter-ttl-seconds 3600 --cleanup-interval-seconds 60
|
|
50
|
+
```
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Local MCP server for running restricted text filters."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Document loaders for supported input file types."""
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Protocol
|
|
5
|
+
|
|
6
|
+
from ..models import LoadedDocument
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class DocumentLoader(Protocol):
|
|
10
|
+
file_type: str
|
|
11
|
+
extensions: tuple[str, ...]
|
|
12
|
+
|
|
13
|
+
def load(self, file_path: Path) -> LoadedDocument: ...
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from ..models import LoadedDocument
|
|
6
|
+
from .base import DocumentLoader
|
|
7
|
+
from .json_loader import JsonLoader
|
|
8
|
+
from .txt_loader import TxtLoader
|
|
9
|
+
from .yaml_loader import YamlLoader
|
|
10
|
+
|
|
11
|
+
LOADERS: dict[str, DocumentLoader] = {
|
|
12
|
+
"json": JsonLoader(),
|
|
13
|
+
"txt": TxtLoader(),
|
|
14
|
+
"yaml": YamlLoader(),
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
EXTENSION_TO_FILE_TYPE = {
|
|
18
|
+
".json": "json",
|
|
19
|
+
".txt": "txt",
|
|
20
|
+
".yaml": "yaml",
|
|
21
|
+
".yml": "yaml",
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def normalize_file_type(file_type: str) -> str:
|
|
26
|
+
normalized = file_type.strip().lower()
|
|
27
|
+
if normalized not in LOADERS:
|
|
28
|
+
raise ValueError(f"Unsupported file type: {file_type}")
|
|
29
|
+
return normalized
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def resolve_file_type(file_path: Path, file_type: str | None = None) -> str:
|
|
33
|
+
if file_type is not None:
|
|
34
|
+
return normalize_file_type(file_type)
|
|
35
|
+
|
|
36
|
+
suffix = file_path.suffix.lower()
|
|
37
|
+
resolved = EXTENSION_TO_FILE_TYPE.get(suffix)
|
|
38
|
+
if resolved is None:
|
|
39
|
+
raise ValueError(f"Could not detect file type from extension: {file_path.name}")
|
|
40
|
+
return resolved
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def load_document(
|
|
44
|
+
file_path: Path,
|
|
45
|
+
file_type: str | None = None,
|
|
46
|
+
) -> tuple[LoadedDocument, str]:
|
|
47
|
+
resolved_file_type = resolve_file_type(file_path, file_type)
|
|
48
|
+
loader = LOADERS[resolved_file_type]
|
|
49
|
+
return loader.load(file_path), resolved_file_type
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from ..models import LoadedDocument
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class JsonLoader:
|
|
10
|
+
file_type = "json"
|
|
11
|
+
extensions = (".json",)
|
|
12
|
+
|
|
13
|
+
def load(self, file_path: Path) -> LoadedDocument:
|
|
14
|
+
with file_path.open("r", encoding="utf-8") as handle:
|
|
15
|
+
return json.load(handle)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from ..models import LoadedDocument
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TxtLoader:
|
|
9
|
+
file_type = "txt"
|
|
10
|
+
extensions = (".txt",)
|
|
11
|
+
|
|
12
|
+
def load(self, file_path: Path) -> LoadedDocument:
|
|
13
|
+
return file_path.read_text(encoding="utf-8").splitlines()
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import yaml
|
|
6
|
+
|
|
7
|
+
from ..models import LoadedDocument
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class YamlLoader:
|
|
11
|
+
file_type = "yaml"
|
|
12
|
+
extensions = (".yaml", ".yml")
|
|
13
|
+
|
|
14
|
+
def load(self, file_path: Path) -> LoadedDocument:
|
|
15
|
+
with file_path.open("r", encoding="utf-8") as handle:
|
|
16
|
+
return yaml.safe_load(handle)
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Any, Callable
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
|
|
9
|
+
LoadedDocument = Any
|
|
10
|
+
FilterCallable = Callable[[LoadedDocument], str]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class RegisterFilterResult(BaseModel):
|
|
14
|
+
filter_id: str = Field(
|
|
15
|
+
description="Unique filter identifier to pass into run_filter."
|
|
16
|
+
)
|
|
17
|
+
expires_at: str = Field(
|
|
18
|
+
description="UTC timestamp in ISO 8601 format when the filter expires."
|
|
19
|
+
)
|
|
20
|
+
ttl_seconds: int = Field(
|
|
21
|
+
description="Server-side lifetime of the registered filter in seconds."
|
|
22
|
+
)
|
|
23
|
+
policy_version: str = Field(
|
|
24
|
+
description="Validation policy version used for the submitted filter code."
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class RunFilterResult(BaseModel):
|
|
29
|
+
filter_id: str = Field(
|
|
30
|
+
description="Identifier of the registered filter that produced this result."
|
|
31
|
+
)
|
|
32
|
+
file_path: str = Field(
|
|
33
|
+
description="Resolved absolute path of the processed local file."
|
|
34
|
+
)
|
|
35
|
+
file_type: str = Field(
|
|
36
|
+
description="Effective loader type used for the file. One of: json, yaml, txt."
|
|
37
|
+
)
|
|
38
|
+
expires_at: str = Field(
|
|
39
|
+
description="UTC timestamp in ISO 8601 format when this filter expires."
|
|
40
|
+
)
|
|
41
|
+
result_text: str = Field(description="Exact text returned by filter_item(data).")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass(slots=True)
|
|
45
|
+
class RegisteredFilter:
|
|
46
|
+
filter_id: str
|
|
47
|
+
function: FilterCallable
|
|
48
|
+
source_code: str
|
|
49
|
+
created_at: datetime
|
|
50
|
+
expires_at: datetime
|
|
51
|
+
policy_version: str
|
|
52
|
+
|
|
53
|
+
def is_expired(self, now: datetime) -> bool:
|
|
54
|
+
return now >= self.expires_at
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import datetime, timedelta, timezone
|
|
4
|
+
from threading import Event, RLock, Thread
|
|
5
|
+
from typing import Callable
|
|
6
|
+
from uuid import uuid4
|
|
7
|
+
|
|
8
|
+
from .models import FilterCallable, RegisteredFilter
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class FilterRegistryError(RuntimeError):
|
|
12
|
+
"""Base registry error."""
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class FilterNotFoundError(FilterRegistryError):
|
|
16
|
+
"""Raised when a filter id does not exist."""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class FilterExpiredError(FilterRegistryError):
|
|
20
|
+
"""Raised when a filter has passed its TTL."""
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class FilterRegistry:
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
filter_ttl_seconds: int,
|
|
27
|
+
cleanup_interval_seconds: float = 60.0,
|
|
28
|
+
now_provider: Callable[[], datetime] | None = None,
|
|
29
|
+
) -> None:
|
|
30
|
+
if filter_ttl_seconds <= 0:
|
|
31
|
+
raise ValueError("filter_ttl_seconds must be greater than zero")
|
|
32
|
+
if cleanup_interval_seconds <= 0:
|
|
33
|
+
raise ValueError("cleanup_interval_seconds must be greater than zero")
|
|
34
|
+
|
|
35
|
+
self._filter_ttl_seconds = filter_ttl_seconds
|
|
36
|
+
self._cleanup_interval_seconds = cleanup_interval_seconds
|
|
37
|
+
self._now_provider = now_provider or (lambda: datetime.now(timezone.utc))
|
|
38
|
+
self._filters: dict[str, RegisteredFilter] = {}
|
|
39
|
+
self._lock = RLock()
|
|
40
|
+
self._stop_event = Event()
|
|
41
|
+
self._cleanup_thread: Thread | None = None
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def filter_ttl_seconds(self) -> int:
|
|
45
|
+
return self._filter_ttl_seconds
|
|
46
|
+
|
|
47
|
+
def register(
|
|
48
|
+
self,
|
|
49
|
+
source_code: str,
|
|
50
|
+
function: FilterCallable,
|
|
51
|
+
policy_version: str,
|
|
52
|
+
) -> RegisteredFilter:
|
|
53
|
+
now = self._now_provider()
|
|
54
|
+
filter_id = str(uuid4())
|
|
55
|
+
entry = RegisteredFilter(
|
|
56
|
+
filter_id=filter_id,
|
|
57
|
+
function=function,
|
|
58
|
+
source_code=source_code,
|
|
59
|
+
created_at=now,
|
|
60
|
+
expires_at=now + timedelta(seconds=self._filter_ttl_seconds),
|
|
61
|
+
policy_version=policy_version,
|
|
62
|
+
)
|
|
63
|
+
with self._lock:
|
|
64
|
+
self._filters[filter_id] = entry
|
|
65
|
+
return entry
|
|
66
|
+
|
|
67
|
+
def get(self, filter_id: str) -> RegisteredFilter:
|
|
68
|
+
now = self._now_provider()
|
|
69
|
+
with self._lock:
|
|
70
|
+
entry = self._filters.get(filter_id)
|
|
71
|
+
if entry is None:
|
|
72
|
+
raise FilterNotFoundError(f"Unknown filter_id: {filter_id}")
|
|
73
|
+
if entry.is_expired(now):
|
|
74
|
+
del self._filters[filter_id]
|
|
75
|
+
raise FilterExpiredError(f"Filter has expired: {filter_id}")
|
|
76
|
+
return entry
|
|
77
|
+
|
|
78
|
+
def cleanup_expired(self) -> int:
|
|
79
|
+
now = self._now_provider()
|
|
80
|
+
with self._lock:
|
|
81
|
+
expired_ids = [
|
|
82
|
+
filter_id
|
|
83
|
+
for filter_id, entry in self._filters.items()
|
|
84
|
+
if entry.is_expired(now)
|
|
85
|
+
]
|
|
86
|
+
for filter_id in expired_ids:
|
|
87
|
+
del self._filters[filter_id]
|
|
88
|
+
return len(expired_ids)
|
|
89
|
+
|
|
90
|
+
def start_cleanup_thread(self) -> None:
|
|
91
|
+
with self._lock:
|
|
92
|
+
if self._cleanup_thread is not None and self._cleanup_thread.is_alive():
|
|
93
|
+
return
|
|
94
|
+
self._stop_event.clear()
|
|
95
|
+
self._cleanup_thread = Thread(
|
|
96
|
+
target=self._cleanup_loop,
|
|
97
|
+
name="filter-registry-cleanup",
|
|
98
|
+
daemon=True,
|
|
99
|
+
)
|
|
100
|
+
self._cleanup_thread.start()
|
|
101
|
+
|
|
102
|
+
def stop_cleanup_thread(self) -> None:
|
|
103
|
+
self._stop_event.set()
|
|
104
|
+
thread = self._cleanup_thread
|
|
105
|
+
if thread is not None:
|
|
106
|
+
thread.join(timeout=self._cleanup_interval_seconds + 1.0)
|
|
107
|
+
self._cleanup_thread = None
|
|
108
|
+
|
|
109
|
+
def __len__(self) -> int:
|
|
110
|
+
with self._lock:
|
|
111
|
+
return len(self._filters)
|
|
112
|
+
|
|
113
|
+
def _cleanup_loop(self) -> None:
|
|
114
|
+
while not self._stop_event.wait(self._cleanup_interval_seconds):
|
|
115
|
+
self.cleanup_expired()
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Annotated, Literal, Sequence
|
|
7
|
+
|
|
8
|
+
from mcp.server.fastmcp import FastMCP
|
|
9
|
+
from pydantic import Field
|
|
10
|
+
|
|
11
|
+
from .loaders.factory import load_document
|
|
12
|
+
from .models import RegisterFilterResult, RunFilterResult
|
|
13
|
+
from .registry import FilterExpiredError, FilterNotFoundError, FilterRegistry
|
|
14
|
+
from .validator import POLICY_VERSION, FilterValidationError, compile_filter
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _to_isoformat(value: datetime) -> str:
|
|
18
|
+
return value.isoformat().replace("+00:00", "Z")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class FilterService:
|
|
22
|
+
def __init__(
|
|
23
|
+
self,
|
|
24
|
+
filter_ttl_seconds: int = 3600,
|
|
25
|
+
cleanup_interval_seconds: float = 60.0,
|
|
26
|
+
now_provider=None,
|
|
27
|
+
) -> None:
|
|
28
|
+
self._registry = FilterRegistry(
|
|
29
|
+
filter_ttl_seconds=filter_ttl_seconds,
|
|
30
|
+
cleanup_interval_seconds=cleanup_interval_seconds,
|
|
31
|
+
now_provider=now_provider,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
def start(self) -> None:
|
|
35
|
+
self._registry.start_cleanup_thread()
|
|
36
|
+
|
|
37
|
+
def stop(self) -> None:
|
|
38
|
+
self._registry.stop_cleanup_thread()
|
|
39
|
+
|
|
40
|
+
def register_filter(self, code: str) -> RegisterFilterResult:
|
|
41
|
+
try:
|
|
42
|
+
filter_fn = compile_filter(code)
|
|
43
|
+
except FilterValidationError:
|
|
44
|
+
raise
|
|
45
|
+
|
|
46
|
+
entry = self._registry.register(
|
|
47
|
+
source_code=code,
|
|
48
|
+
function=filter_fn,
|
|
49
|
+
policy_version=POLICY_VERSION,
|
|
50
|
+
)
|
|
51
|
+
return RegisterFilterResult(
|
|
52
|
+
filter_id=entry.filter_id,
|
|
53
|
+
expires_at=_to_isoformat(entry.expires_at),
|
|
54
|
+
policy_version=entry.policy_version,
|
|
55
|
+
ttl_seconds=self._registry.filter_ttl_seconds,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
def run_filter(
|
|
59
|
+
self,
|
|
60
|
+
filter_id: str,
|
|
61
|
+
file_path: str,
|
|
62
|
+
file_type: Literal["json", "yaml", "txt"] | None = None,
|
|
63
|
+
) -> RunFilterResult:
|
|
64
|
+
try:
|
|
65
|
+
entry = self._registry.get(filter_id)
|
|
66
|
+
except (FilterNotFoundError, FilterExpiredError) as exc:
|
|
67
|
+
raise ValueError(str(exc)) from exc
|
|
68
|
+
|
|
69
|
+
resolved_path = Path(file_path).expanduser().resolve()
|
|
70
|
+
if not resolved_path.exists():
|
|
71
|
+
raise FileNotFoundError(f"File not found: {resolved_path}")
|
|
72
|
+
if not resolved_path.is_file():
|
|
73
|
+
raise ValueError(f"Path is not a file: {resolved_path}")
|
|
74
|
+
|
|
75
|
+
document, resolved_file_type = load_document(resolved_path, file_type)
|
|
76
|
+
result = entry.function(document)
|
|
77
|
+
if not isinstance(result, str):
|
|
78
|
+
raise ValueError("filter_item(data) must return a string")
|
|
79
|
+
|
|
80
|
+
return RunFilterResult(
|
|
81
|
+
expires_at=_to_isoformat(entry.expires_at),
|
|
82
|
+
file_path=str(resolved_path),
|
|
83
|
+
file_type=resolved_file_type,
|
|
84
|
+
filter_id=entry.filter_id,
|
|
85
|
+
result_text=result,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def create_mcp_server(service: FilterService | None = None) -> FastMCP:
|
|
90
|
+
active_service = service or FilterService()
|
|
91
|
+
mcp = FastMCP("data-filter-mcp")
|
|
92
|
+
|
|
93
|
+
@mcp.tool()
|
|
94
|
+
def register_filter(
|
|
95
|
+
code: Annotated[
|
|
96
|
+
str,
|
|
97
|
+
Field(
|
|
98
|
+
description=(
|
|
99
|
+
"Python source code that defines exactly one top-level function "
|
|
100
|
+
"named filter_item(data). The function receives the loaded "
|
|
101
|
+
"document and must return a text result."
|
|
102
|
+
)
|
|
103
|
+
),
|
|
104
|
+
],
|
|
105
|
+
) -> RegisterFilterResult:
|
|
106
|
+
"""
|
|
107
|
+
Validate and register a restricted Python filter for later execution on a local file.
|
|
108
|
+
|
|
109
|
+
Use this tool first when you want to run custom filtering or transformation logic
|
|
110
|
+
against a local document. The submitted source code must define exactly one
|
|
111
|
+
top-level function with this exact signature:
|
|
112
|
+
|
|
113
|
+
def filter_item(data):
|
|
114
|
+
|
|
115
|
+
The server loads the target file before execution and passes the loaded document
|
|
116
|
+
into filter_item(data).
|
|
117
|
+
|
|
118
|
+
Input document types:
|
|
119
|
+
- JSON files -> parsed JSON value such as dict, list, string, number, boolean, or null
|
|
120
|
+
- YAML files -> parsed YAML value such as dict, list, string, number, boolean, or null
|
|
121
|
+
- TXT files -> list of text lines
|
|
122
|
+
|
|
123
|
+
The function must return a text result (str). The returned text may contain any
|
|
124
|
+
format you want, such as plain text, YAML, CSV-like text, or a custom report.
|
|
125
|
+
|
|
126
|
+
Safety rules:
|
|
127
|
+
- The code is validated against a restricted Python subset
|
|
128
|
+
- Imports, file I/O, network access, dynamic execution, and unsafe attribute access are rejected
|
|
129
|
+
- Registered filters are stored in memory only and expire automatically after a server-side TTL
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
code: Python source code that defines exactly one function named filter_item(data).
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
A structured object containing the new filter identifier, expiration timestamp,
|
|
136
|
+
TTL in seconds, and validation policy version.
|
|
137
|
+
|
|
138
|
+
Raises:
|
|
139
|
+
ValueError: If the code is invalid, unsafe, or does not match the required function signature.
|
|
140
|
+
"""
|
|
141
|
+
|
|
142
|
+
return active_service.register_filter(code)
|
|
143
|
+
|
|
144
|
+
@mcp.tool()
|
|
145
|
+
def run_filter(
|
|
146
|
+
filter_id: Annotated[
|
|
147
|
+
str,
|
|
148
|
+
Field(description="Identifier previously returned by register_filter."),
|
|
149
|
+
],
|
|
150
|
+
file_path: Annotated[
|
|
151
|
+
str,
|
|
152
|
+
Field(
|
|
153
|
+
description=(
|
|
154
|
+
"Path to the local file that should be loaded and passed into "
|
|
155
|
+
"filter_item(data)."
|
|
156
|
+
)
|
|
157
|
+
),
|
|
158
|
+
],
|
|
159
|
+
file_type: Annotated[
|
|
160
|
+
Literal["json", "yaml", "txt"] | None,
|
|
161
|
+
Field(
|
|
162
|
+
description=(
|
|
163
|
+
"Optional explicit file type override. If omitted, the server "
|
|
164
|
+
"detects the type from the file extension."
|
|
165
|
+
)
|
|
166
|
+
),
|
|
167
|
+
] = None,
|
|
168
|
+
) -> RunFilterResult:
|
|
169
|
+
"""
|
|
170
|
+
Run a previously registered filter on a local file and return its text output.
|
|
171
|
+
|
|
172
|
+
Use this tool after register_filter. The server resolves the registered filter,
|
|
173
|
+
loads the file from the local filesystem, converts it into an in-memory document,
|
|
174
|
+
calls filter_item(data), and returns the exact text produced by the filter.
|
|
175
|
+
|
|
176
|
+
Supported file types:
|
|
177
|
+
- json
|
|
178
|
+
- yaml
|
|
179
|
+
- txt
|
|
180
|
+
|
|
181
|
+
If file_type is omitted, the server tries to detect the type from the file extension.
|
|
182
|
+
|
|
183
|
+
File loading behavior:
|
|
184
|
+
- json -> parsed JSON value
|
|
185
|
+
- yaml -> parsed YAML value
|
|
186
|
+
- txt -> list of lines
|
|
187
|
+
|
|
188
|
+
Args:
|
|
189
|
+
filter_id: Identifier returned earlier by register_filter.
|
|
190
|
+
file_path: Path to the local file that should be loaded and passed into the filter.
|
|
191
|
+
file_type: Optional explicit file type override. Use this when extension-based detection
|
|
192
|
+
is missing or ambiguous.
|
|
193
|
+
|
|
194
|
+
Returns:
|
|
195
|
+
A structured object containing the filter identifier, resolved file path,
|
|
196
|
+
effective file type, filter expiration time, and result_text.
|
|
197
|
+
|
|
198
|
+
Raises:
|
|
199
|
+
ValueError: If the filter does not exist, has expired, returns a non-string result,
|
|
200
|
+
or the file type is unsupported.
|
|
201
|
+
FileNotFoundError: If the file does not exist.
|
|
202
|
+
"""
|
|
203
|
+
|
|
204
|
+
return active_service.run_filter(filter_id, file_path, file_type)
|
|
205
|
+
|
|
206
|
+
return mcp
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def parse_args(argv: Sequence[str] | None = None) -> argparse.Namespace:
|
|
210
|
+
parser = argparse.ArgumentParser(description="Run the local data filter MCP server")
|
|
211
|
+
parser.add_argument(
|
|
212
|
+
"--filter-ttl-seconds",
|
|
213
|
+
type=int,
|
|
214
|
+
default=3600,
|
|
215
|
+
help="How long registered filters stay available in memory",
|
|
216
|
+
)
|
|
217
|
+
parser.add_argument(
|
|
218
|
+
"--cleanup-interval-seconds",
|
|
219
|
+
type=float,
|
|
220
|
+
default=60.0,
|
|
221
|
+
help="How often expired filters are swept from the registry",
|
|
222
|
+
)
|
|
223
|
+
return parser.parse_args(argv)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def main(argv: Sequence[str] | None = None) -> None:
|
|
227
|
+
args = parse_args(argv)
|
|
228
|
+
service = FilterService(
|
|
229
|
+
filter_ttl_seconds=args.filter_ttl_seconds,
|
|
230
|
+
cleanup_interval_seconds=args.cleanup_interval_seconds,
|
|
231
|
+
)
|
|
232
|
+
service.start()
|
|
233
|
+
try:
|
|
234
|
+
create_mcp_server(service).run()
|
|
235
|
+
finally:
|
|
236
|
+
service.stop()
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
if __name__ == "__main__":
|
|
240
|
+
main()
|