groundworkers 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- groundworkers/__init__.py +3 -0
- groundworkers/adapters/__init__.py +1 -0
- groundworkers/adapters/omop_emb.py +251 -0
- groundworkers/adapters/omop_graph.py +721 -0
- groundworkers/adapters/omop_vocab.py +582 -0
- groundworkers/base/__init__.py +17 -0
- groundworkers/base/errors.py +19 -0
- groundworkers/base/results.py +38 -0
- groundworkers/base/server.py +52 -0
- groundworkers/base/sql.py +109 -0
- groundworkers/config.py +139 -0
- groundworkers/server.py +127 -0
- groundworkers/tools/__init__.py +1 -0
- groundworkers/tools/concept_tools.py +237 -0
- groundworkers/tools/embedding_tools.py +83 -0
- groundworkers/tools/resolver_tools.py +90 -0
- groundworkers/tools/search_tools.py +163 -0
- groundworkers/tools/system_tools.py +67 -0
- groundworkers-0.1.0.dist-info/METADATA +116 -0
- groundworkers-0.1.0.dist-info/RECORD +23 -0
- groundworkers-0.1.0.dist-info/WHEEL +5 -0
- groundworkers-0.1.0.dist-info/entry_points.txt +2 -0
- groundworkers-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import inspect
|
|
4
|
+
from typing import Any, Callable
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class GroundcrewServer:
|
|
8
|
+
def __init__(self, name: str) -> None:
|
|
9
|
+
self.name = name
|
|
10
|
+
self._tools: dict[str, Callable[..., Any]] = {}
|
|
11
|
+
|
|
12
|
+
def tool(self, name: str | None = None) -> Callable[[Callable[..., Any]], Callable[..., Any]]:
|
|
13
|
+
def decorator(func: Callable[..., Any]) -> Callable[..., Any]:
|
|
14
|
+
tool_name = name or func.__name__
|
|
15
|
+
self._tools[tool_name] = func
|
|
16
|
+
return func
|
|
17
|
+
|
|
18
|
+
return decorator
|
|
19
|
+
|
|
20
|
+
def list_tools(self) -> list[str]:
|
|
21
|
+
return sorted(self._tools.keys())
|
|
22
|
+
|
|
23
|
+
def call(self, name: str, *args: Any, **kwargs: Any) -> Any:
|
|
24
|
+
return self._tools[name](*args, **kwargs)
|
|
25
|
+
|
|
26
|
+
def describe_tools(self) -> dict[str, dict[str, Any]]:
|
|
27
|
+
description: dict[str, dict[str, Any]] = {}
|
|
28
|
+
for name, func in self._tools.items():
|
|
29
|
+
signature = inspect.signature(func)
|
|
30
|
+
description[name] = {
|
|
31
|
+
"signature": str(signature),
|
|
32
|
+
"doc": inspect.getdoc(func) or "",
|
|
33
|
+
}
|
|
34
|
+
return description
|
|
35
|
+
|
|
36
|
+
def run(
|
|
37
|
+
self,
|
|
38
|
+
transport: str = "stdio",
|
|
39
|
+
host: str = "127.0.0.1",
|
|
40
|
+
port: int = 8000,
|
|
41
|
+
) -> None:
|
|
42
|
+
try:
|
|
43
|
+
from mcp.server.fastmcp import FastMCP
|
|
44
|
+
except ImportError as exc: # pragma: no cover
|
|
45
|
+
raise RuntimeError(
|
|
46
|
+
"The official MCP Python SDK is required to run the server. Install project dependencies first."
|
|
47
|
+
) from exc
|
|
48
|
+
|
|
49
|
+
app = FastMCP(self.name, host=host, port=port)
|
|
50
|
+
for tool_name, func in self._tools.items():
|
|
51
|
+
app.tool(name=tool_name)(func)
|
|
52
|
+
app.run(transport=transport)
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from sqlalchemy import MetaData, Table, and_, create_engine, func, or_, select, text
|
|
6
|
+
from sqlalchemy.engine import Engine
|
|
7
|
+
|
|
8
|
+
from groundworkers.base.results import DetailResult, ListResult, SearchHit, SearchResult
|
|
9
|
+
from groundworkers.config import FullTextConfig, SqlResourceConfig
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class SQLResource:
|
|
13
|
+
def __init__(self, engine: Engine, resource_id: str, config: SqlResourceConfig) -> None:
|
|
14
|
+
self.engine = engine
|
|
15
|
+
self.resource_id = resource_id
|
|
16
|
+
self.config = config
|
|
17
|
+
self._table: Table | None = None
|
|
18
|
+
|
|
19
|
+
@property
|
|
20
|
+
def table(self) -> Table:
|
|
21
|
+
if self._table is None:
|
|
22
|
+
metadata = MetaData(schema=self.config.db_schema)
|
|
23
|
+
self._table = Table(self.config.table, metadata, autoload_with=self.engine)
|
|
24
|
+
return self._table
|
|
25
|
+
|
|
26
|
+
def _project(self, row: Any) -> dict[str, Any]:
|
|
27
|
+
mapping = dict(row._mapping)
|
|
28
|
+
if self.config.display_fields:
|
|
29
|
+
return {field: mapping.get(field) for field in self.config.display_fields}
|
|
30
|
+
return mapping
|
|
31
|
+
|
|
32
|
+
def _validate_filters(self, filters: dict[str, Any] | None) -> dict[str, Any]:
|
|
33
|
+
if not filters:
|
|
34
|
+
return {}
|
|
35
|
+
invalid = sorted(set(filters) - set(self.config.allowed_filter_fields or []))
|
|
36
|
+
if invalid:
|
|
37
|
+
raise ValueError(f"Unsupported filter fields for {self.resource_id}: {', '.join(invalid)}")
|
|
38
|
+
return filters
|
|
39
|
+
|
|
40
|
+
def get(self, key: Any) -> DetailResult:
|
|
41
|
+
statement = select(self.table).where(self.table.c[self.config.primary_key] == key).limit(1)
|
|
42
|
+
with self.engine.connect() as connection:
|
|
43
|
+
row = connection.execute(statement).first()
|
|
44
|
+
return DetailResult(resource_id=self.resource_id, item=self._project(row) if row else None)
|
|
45
|
+
|
|
46
|
+
def list(self, *, limit: int = 20, offset: int = 0, filters: dict[str, Any] | None = None) -> ListResult:
|
|
47
|
+
safe_limit = max(1, min(limit, 100))
|
|
48
|
+
safe_offset = max(0, offset)
|
|
49
|
+
validated = self._validate_filters(filters)
|
|
50
|
+
predicates = [self.table.c[column] == value for column, value in validated.items()]
|
|
51
|
+
statement = select(self.table)
|
|
52
|
+
count_stmt = select(func.count()).select_from(self.table)
|
|
53
|
+
if predicates:
|
|
54
|
+
predicate = and_(*predicates)
|
|
55
|
+
statement = statement.where(predicate)
|
|
56
|
+
count_stmt = count_stmt.where(predicate)
|
|
57
|
+
statement = statement.limit(safe_limit).offset(safe_offset)
|
|
58
|
+
with self.engine.connect() as connection:
|
|
59
|
+
rows = connection.execute(statement).all()
|
|
60
|
+
total = connection.execute(count_stmt).scalar_one()
|
|
61
|
+
return ListResult(
|
|
62
|
+
resource_id=self.resource_id,
|
|
63
|
+
items=[self._project(row) for row in rows],
|
|
64
|
+
total=int(total),
|
|
65
|
+
limit=safe_limit,
|
|
66
|
+
offset=safe_offset,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class SQLTextSearchResource(SQLResource):
|
|
71
|
+
def __init__(self, engine: Engine, resource_id: str, config: SqlResourceConfig, fulltext: FullTextConfig) -> None:
|
|
72
|
+
super().__init__(engine, resource_id, config)
|
|
73
|
+
self.fulltext = fulltext
|
|
74
|
+
self._search_table: Table | None = None
|
|
75
|
+
|
|
76
|
+
@property
|
|
77
|
+
def search_table(self) -> Table:
|
|
78
|
+
if self._search_table is None:
|
|
79
|
+
metadata = MetaData(schema=self.fulltext.db_schema)
|
|
80
|
+
self._search_table = Table(self.fulltext.table, metadata, autoload_with=self.engine)
|
|
81
|
+
return self._search_table
|
|
82
|
+
|
|
83
|
+
def search(self, query: str, *, limit: int = 10) -> SearchResult:
|
|
84
|
+
safe_limit = max(1, min(limit, 50))
|
|
85
|
+
dialect = self.engine.dialect.name
|
|
86
|
+
table = self.search_table
|
|
87
|
+
if dialect == "postgresql" and self.fulltext.vector_column:
|
|
88
|
+
where_clause = text(f"{self.fulltext.vector_column} @@ plainto_tsquery(:query)")
|
|
89
|
+
statement = select(table).where(where_clause).limit(safe_limit).params(query=query)
|
|
90
|
+
else:
|
|
91
|
+
predicates = [table.c[field].ilike(f"%{query}%") for field in self.fulltext.search_fields if field in table.c]
|
|
92
|
+
if not predicates:
|
|
93
|
+
raise ValueError(f"No search fields configured for {self.resource_id}")
|
|
94
|
+
statement = select(table).where(or_(*predicates)).limit(safe_limit)
|
|
95
|
+
with self.engine.connect() as connection:
|
|
96
|
+
rows = connection.execute(statement).all()
|
|
97
|
+
items = [
|
|
98
|
+
SearchHit(
|
|
99
|
+
id=str(row._mapping.get(self.config.primary_key, index)),
|
|
100
|
+
score=1.0,
|
|
101
|
+
payload=dict(row._mapping),
|
|
102
|
+
)
|
|
103
|
+
for index, row in enumerate(rows)
|
|
104
|
+
]
|
|
105
|
+
return SearchResult(resource_id=self.resource_id, query=query, items=items, limit=safe_limit)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def build_engine(url: str) -> Engine:
|
|
109
|
+
return create_engine(url, future=True)
|
groundworkers/config.py
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import yaml
|
|
8
|
+
from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
SCHEMA_NAME_PATTERN = re.compile(r"^[A-Za-z0-9_]+$")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class FullTextConfig(BaseModel):
|
|
15
|
+
model_config = ConfigDict(populate_by_name=True)
|
|
16
|
+
|
|
17
|
+
db_schema: str | None = Field(default=None, alias="schema")
|
|
18
|
+
table: str
|
|
19
|
+
search_fields: list[str] = Field(default_factory=list)
|
|
20
|
+
vector_column: str | None = None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class EmbeddingIndexConfig(BaseModel):
|
|
24
|
+
model_config = ConfigDict(extra="forbid")
|
|
25
|
+
|
|
26
|
+
path: str
|
|
27
|
+
ids_dataset: str = "ids"
|
|
28
|
+
vectors_dataset: str = "vectors"
|
|
29
|
+
payloads_dataset: str | None = None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class SqlResourceConfig(BaseModel):
|
|
33
|
+
model_config = ConfigDict(populate_by_name=True)
|
|
34
|
+
|
|
35
|
+
db_schema: str | None = Field(default=None, alias="schema")
|
|
36
|
+
table: str
|
|
37
|
+
primary_key: str
|
|
38
|
+
allowed_filter_fields: list[str] = Field(default_factory=list)
|
|
39
|
+
search_fields: list[str] = Field(default_factory=list)
|
|
40
|
+
display_fields: list[str] = Field(default_factory=list)
|
|
41
|
+
fulltext: FullTextConfig | None = None
|
|
42
|
+
embedding_index: EmbeddingIndexConfig | None = None
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class ModuleConfig(BaseModel):
|
|
46
|
+
model_config = ConfigDict(extra="forbid")
|
|
47
|
+
|
|
48
|
+
enabled: bool = True
|
|
49
|
+
resources: dict[str, SqlResourceConfig] = Field(default_factory=dict)
|
|
50
|
+
relationship_resource: str | None = None
|
|
51
|
+
relationship_left_key: str | None = None
|
|
52
|
+
relationship_right_key: str | None = None
|
|
53
|
+
concept_reference_columns: dict[str, str] = Field(default_factory=dict)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class DatabaseConfig(BaseModel):
|
|
57
|
+
model_config = ConfigDict(extra="forbid")
|
|
58
|
+
|
|
59
|
+
url: str
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class OmopGraphConfig(BaseModel):
|
|
63
|
+
model_config = ConfigDict(extra="forbid")
|
|
64
|
+
|
|
65
|
+
db_url: str
|
|
66
|
+
vocab_schema: str = "omop_vocab"
|
|
67
|
+
emb_model_name: str | None = None
|
|
68
|
+
|
|
69
|
+
@field_validator("vocab_schema")
|
|
70
|
+
@classmethod
|
|
71
|
+
def validate_schema_name(cls, value: str) -> str:
|
|
72
|
+
if SCHEMA_NAME_PATTERN.fullmatch(value):
|
|
73
|
+
return value
|
|
74
|
+
raise ValueError("schema names must contain only letters, numbers, and underscores")
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class OmopEmbConfig(BaseModel):
|
|
78
|
+
model_config = ConfigDict(extra="forbid")
|
|
79
|
+
|
|
80
|
+
enabled: bool = False
|
|
81
|
+
db_url: str | None = None
|
|
82
|
+
backend_type: str = "sqlitevec"
|
|
83
|
+
db_path: str | None = None
|
|
84
|
+
default_model_name: str | None = None
|
|
85
|
+
faiss_cache_dir: str | None = None
|
|
86
|
+
api_base: str | None = None
|
|
87
|
+
api_key: str | None = None
|
|
88
|
+
|
|
89
|
+
@model_validator(mode="after")
|
|
90
|
+
def validate_enabled_backend(self) -> OmopEmbConfig:
|
|
91
|
+
if not self.enabled:
|
|
92
|
+
return self
|
|
93
|
+
if not self.db_url and not self.db_path:
|
|
94
|
+
raise ValueError("enabled omop_emb config requires db_url or db_path")
|
|
95
|
+
if self.api_base and not self.api_key:
|
|
96
|
+
raise ValueError("omop_emb.api_key is required when api_base is configured")
|
|
97
|
+
return self
|
|
98
|
+
|
|
99
|
+
@property
|
|
100
|
+
def configured_api_credentials(self) -> tuple[str, str] | None:
|
|
101
|
+
if self.api_base is None:
|
|
102
|
+
return None
|
|
103
|
+
if self.api_key is None:
|
|
104
|
+
raise ValueError("omop_emb.api_key is required when api_base is configured")
|
|
105
|
+
return self.api_base, self.api_key
|
|
106
|
+
|
|
107
|
+
@property
|
|
108
|
+
def required_db_url(self) -> str:
|
|
109
|
+
if self.db_url is None:
|
|
110
|
+
raise ValueError("omop_emb.db_url is required for this configuration")
|
|
111
|
+
return self.db_url
|
|
112
|
+
|
|
113
|
+
@property
|
|
114
|
+
def required_db_path(self) -> str:
|
|
115
|
+
if self.db_path is None:
|
|
116
|
+
raise ValueError("omop_emb.db_path is required for this configuration")
|
|
117
|
+
return self.db_path
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class AppConfig(BaseModel):
|
|
121
|
+
model_config = ConfigDict(extra="forbid")
|
|
122
|
+
|
|
123
|
+
app_name: str = "groundworkers"
|
|
124
|
+
database: DatabaseConfig | None = None
|
|
125
|
+
omop_graph: OmopGraphConfig | None = None
|
|
126
|
+
omop_emb: OmopEmbConfig | None = None
|
|
127
|
+
|
|
128
|
+
@classmethod
|
|
129
|
+
def load(cls, path: str | Path) -> "AppConfig":
|
|
130
|
+
data = yaml.safe_load(Path(path).read_text(encoding="utf-8")) or {}
|
|
131
|
+
return cls.model_validate(data)
|
|
132
|
+
|
|
133
|
+
def describe(self) -> dict[str, Any]:
|
|
134
|
+
return {
|
|
135
|
+
"app_name": self.app_name,
|
|
136
|
+
"database_url": self.database.url if self.database else None,
|
|
137
|
+
"omop_graph": self.omop_graph.model_dump() if self.omop_graph else None,
|
|
138
|
+
"omop_emb": self.omop_emb.model_dump() if self.omop_emb else None,
|
|
139
|
+
}
|
groundworkers/server.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import json
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
from omop_emb import EmbeddingClient
|
|
8
|
+
|
|
9
|
+
from groundworkers.adapters.omop_emb import OmopEmbAdapter
|
|
10
|
+
from groundworkers.adapters.omop_graph import OmopGraphAdapter
|
|
11
|
+
from groundworkers.adapters.omop_vocab import OmopVocabAdapter
|
|
12
|
+
from groundworkers.base.server import GroundcrewServer
|
|
13
|
+
from groundworkers.base.sql import build_engine
|
|
14
|
+
from groundworkers.config import AppConfig
|
|
15
|
+
from groundworkers.tools.concept_tools import register_concept_tools
|
|
16
|
+
from groundworkers.tools.embedding_tools import register_embedding_tools
|
|
17
|
+
from groundworkers.tools.resolver_tools import register_resolver_tools
|
|
18
|
+
from groundworkers.tools.search_tools import register_search_tools
|
|
19
|
+
from groundworkers.tools.system_tools import register_system_tools
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class Adapters:
|
|
24
|
+
omop_graph: OmopGraphAdapter | None = None
|
|
25
|
+
omop_vocab: OmopVocabAdapter | None = None
|
|
26
|
+
omop_emb: OmopEmbAdapter | None = None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def build_adapters(config: AppConfig) -> Adapters:
|
|
30
|
+
adapters = Adapters()
|
|
31
|
+
|
|
32
|
+
omop_graph = config.omop_graph
|
|
33
|
+
if omop_graph is not None:
|
|
34
|
+
engine = build_engine(omop_graph.db_url)
|
|
35
|
+
adapters.omop_graph = OmopGraphAdapter(
|
|
36
|
+
engine=engine,
|
|
37
|
+
vocab_schema=omop_graph.vocab_schema,
|
|
38
|
+
emb_model_name=omop_graph.emb_model_name,
|
|
39
|
+
)
|
|
40
|
+
# OmopVocabAdapter shares the same engine — no separate connection pool.
|
|
41
|
+
adapters.omop_vocab = OmopVocabAdapter(engine=engine)
|
|
42
|
+
|
|
43
|
+
omop_emb = config.omop_emb
|
|
44
|
+
if omop_emb is not None and omop_emb.enabled:
|
|
45
|
+
cdm_engine = adapters.omop_graph.engine if adapters.omop_graph is not None else None
|
|
46
|
+
|
|
47
|
+
def build_backend() -> object:
|
|
48
|
+
backend_type = omop_emb.backend_type.lower()
|
|
49
|
+
if backend_type == "sqlitevec":
|
|
50
|
+
# Requires groundworkers[embedding-sqlitevec] → omop-emb[sqlitevec]
|
|
51
|
+
from omop_emb.backends.sqlitevec import SQLiteVecEmbeddingBackend
|
|
52
|
+
return SQLiteVecEmbeddingBackend.from_path(omop_emb.required_db_path)
|
|
53
|
+
if backend_type == "pgvector":
|
|
54
|
+
# Requires groundworkers[embedding-pgvector] → omop-emb[pgvector]
|
|
55
|
+
from omop_emb.backends.pgvector import PGVectorEmbeddingBackend
|
|
56
|
+
engine = build_engine(omop_emb.required_db_url)
|
|
57
|
+
return PGVectorEmbeddingBackend(emb_engine=engine)
|
|
58
|
+
raise RuntimeError(f"Unsupported embedding backend type: {omop_emb.backend_type}")
|
|
59
|
+
|
|
60
|
+
client_factory = None
|
|
61
|
+
api_credentials = omop_emb.configured_api_credentials
|
|
62
|
+
if api_credentials is not None:
|
|
63
|
+
api_base, api_key = api_credentials
|
|
64
|
+
|
|
65
|
+
def build_client(model_name: str) -> object:
|
|
66
|
+
return EmbeddingClient(
|
|
67
|
+
model=model_name,
|
|
68
|
+
api_base=api_base,
|
|
69
|
+
api_key=api_key,
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
client_factory = build_client
|
|
73
|
+
|
|
74
|
+
adapters.omop_emb = OmopEmbAdapter(
|
|
75
|
+
backend_factory=build_backend,
|
|
76
|
+
backend_type=omop_emb.backend_type,
|
|
77
|
+
default_model_name=omop_emb.default_model_name,
|
|
78
|
+
client_factory=client_factory,
|
|
79
|
+
cdm_engine=cdm_engine,
|
|
80
|
+
faiss_cache_dir=omop_emb.faiss_cache_dir,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
return adapters
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def create_server(config: AppConfig) -> GroundcrewServer:
|
|
87
|
+
server = GroundcrewServer(config.app_name)
|
|
88
|
+
adapters = build_adapters(config)
|
|
89
|
+
server.adapters = adapters # type: ignore[attr-defined]
|
|
90
|
+
if adapters.omop_graph is not None:
|
|
91
|
+
register_concept_tools(server, adapters.omop_graph)
|
|
92
|
+
register_resolver_tools(server, adapters.omop_graph)
|
|
93
|
+
if adapters.omop_vocab is not None:
|
|
94
|
+
register_search_tools(server, adapters.omop_vocab)
|
|
95
|
+
if adapters.omop_emb is not None:
|
|
96
|
+
register_embedding_tools(server, adapters.omop_emb)
|
|
97
|
+
register_system_tools(server, adapters.omop_graph, adapters.omop_emb)
|
|
98
|
+
return server
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def parse_args() -> argparse.Namespace:
|
|
102
|
+
parser = argparse.ArgumentParser(description="Run the groundworkers MCP server")
|
|
103
|
+
parser.add_argument("--config", required=True, help="Path to a YAML configuration file")
|
|
104
|
+
parser.add_argument("--describe", action="store_true", help="Print configured tools and exit")
|
|
105
|
+
parser.add_argument(
|
|
106
|
+
"--transport",
|
|
107
|
+
choices=["stdio", "streamable-http"],
|
|
108
|
+
default="stdio",
|
|
109
|
+
help="MCP transport (default: stdio)",
|
|
110
|
+
)
|
|
111
|
+
parser.add_argument("--host", default="127.0.0.1", help="Bind host for HTTP transport (default: 127.0.0.1)")
|
|
112
|
+
parser.add_argument("--port", type=int, default=8000, help="Bind port for HTTP transport (default: 8000)")
|
|
113
|
+
return parser.parse_args()
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def main() -> None:
|
|
117
|
+
args = parse_args()
|
|
118
|
+
config = AppConfig.load(args.config)
|
|
119
|
+
server = create_server(config)
|
|
120
|
+
if args.describe:
|
|
121
|
+
print(json.dumps({"config": config.describe(), "tools": server.describe_tools()}, indent=2))
|
|
122
|
+
return
|
|
123
|
+
server.run(transport=args.transport, host=args.host, port=args.port)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
if __name__ == "__main__":
|
|
127
|
+
main()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from groundworkers.adapters.omop_graph import OmopGraphAdapter
|
|
6
|
+
from groundworkers.base.errors import GroundworkersError
|
|
7
|
+
from groundworkers.base.server import GroundcrewServer
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def register_concept_tools(server: GroundcrewServer, graph_adapter: OmopGraphAdapter) -> None:
|
|
11
|
+
"""Register deterministic concept lookup tools against the MCP server.
|
|
12
|
+
|
|
13
|
+
These tools take a known identifier (concept_id, vocabulary+code) and return
|
|
14
|
+
a fact — they are deterministic lookups, not text matching.
|
|
15
|
+
|
|
16
|
+
For free-text grounding see resolver_tools.py.
|
|
17
|
+
For agent-composable search primitives see search_tools.py.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
@server.tool("concept_get")
|
|
21
|
+
def concept_get(concept_id: int) -> dict[str, Any]:
|
|
22
|
+
"""Returns one OMOP concept by concept_id."""
|
|
23
|
+
if concept_id <= 0:
|
|
24
|
+
return {"error": True, "code": "INVALID_INPUT", "message": "concept_id must be a positive integer"}
|
|
25
|
+
try:
|
|
26
|
+
concept = graph_adapter.get_concept(concept_id)
|
|
27
|
+
if concept is None:
|
|
28
|
+
return {"error": True, "code": "NOT_FOUND", "message": f"Concept {concept_id} was not found"}
|
|
29
|
+
return concept
|
|
30
|
+
except GroundworkersError as exc:
|
|
31
|
+
return exc.to_dict()
|
|
32
|
+
except Exception as exc:
|
|
33
|
+
return {"error": True, "code": "QUERY_ERROR", "message": repr(exc)}
|
|
34
|
+
|
|
35
|
+
@server.tool("concept_by_code")
|
|
36
|
+
def concept_by_code(vocabulary_id: str, concept_code: str) -> dict[str, Any]:
|
|
37
|
+
"""Returns one OMOP concept by vocabulary_id and concept_code."""
|
|
38
|
+
if not vocabulary_id.strip():
|
|
39
|
+
return {"error": True, "code": "INVALID_INPUT", "message": "vocabulary_id must be a non-empty string"}
|
|
40
|
+
if not concept_code.strip():
|
|
41
|
+
return {"error": True, "code": "INVALID_INPUT", "message": "concept_code must be a non-empty string"}
|
|
42
|
+
try:
|
|
43
|
+
concepts = graph_adapter.get_concept_by_code(vocabulary_id, concept_code)
|
|
44
|
+
if not concepts:
|
|
45
|
+
return {
|
|
46
|
+
"error": True,
|
|
47
|
+
"code": "NOT_FOUND",
|
|
48
|
+
"message": f"Concept {vocabulary_id}:{concept_code} was not found",
|
|
49
|
+
}
|
|
50
|
+
return {"concepts": concepts}
|
|
51
|
+
except GroundworkersError as exc:
|
|
52
|
+
return exc.to_dict()
|
|
53
|
+
except Exception as exc:
|
|
54
|
+
return {"error": True, "code": "QUERY_ERROR", "message": repr(exc)}
|
|
55
|
+
|
|
56
|
+
@server.tool("concept_ancestors")
|
|
57
|
+
def concept_ancestors(concept_id: int, max_depth: int = 5) -> dict[str, Any]:
|
|
58
|
+
"""Returns ancestor concepts for one concept_id."""
|
|
59
|
+
if concept_id <= 0:
|
|
60
|
+
return {"error": True, "code": "INVALID_INPUT", "message": "concept_id must be a positive integer"}
|
|
61
|
+
safe_depth = max(1, min(max_depth, 20))
|
|
62
|
+
try:
|
|
63
|
+
ancestors = graph_adapter.get_ancestors(concept_id, safe_depth)
|
|
64
|
+
return {"concept_id": concept_id, "ancestors": ancestors}
|
|
65
|
+
except GroundworkersError as exc:
|
|
66
|
+
return exc.to_dict()
|
|
67
|
+
except Exception as exc:
|
|
68
|
+
return {"error": True, "code": "QUERY_ERROR", "message": repr(exc)}
|
|
69
|
+
|
|
70
|
+
@server.tool("concept_descendants")
|
|
71
|
+
def concept_descendants(concept_id: int, max_depth: int = 3) -> dict[str, Any]:
|
|
72
|
+
"""Returns descendant concepts for one concept_id."""
|
|
73
|
+
if concept_id <= 0:
|
|
74
|
+
return {"error": True, "code": "INVALID_INPUT", "message": "concept_id must be a positive integer"}
|
|
75
|
+
safe_depth = max(1, min(max_depth, 10))
|
|
76
|
+
try:
|
|
77
|
+
descendants = graph_adapter.get_descendants(concept_id, safe_depth)
|
|
78
|
+
return {"concept_id": concept_id, "descendants": descendants}
|
|
79
|
+
except GroundworkersError as exc:
|
|
80
|
+
return exc.to_dict()
|
|
81
|
+
except Exception as exc:
|
|
82
|
+
return {"error": True, "code": "QUERY_ERROR", "message": repr(exc)}
|
|
83
|
+
|
|
84
|
+
@server.tool("concept_relationships")
|
|
85
|
+
def concept_relationships(concept_id: int) -> dict[str, Any]:
|
|
86
|
+
"""Returns all relationships for a concept grouped by direction (inbound/outbound)."""
|
|
87
|
+
if concept_id <= 0:
|
|
88
|
+
return {"error": True, "code": "INVALID_INPUT", "message": "concept_id must be a positive integer"}
|
|
89
|
+
try:
|
|
90
|
+
edges = graph_adapter.get_edges(concept_id)
|
|
91
|
+
return {"concept_id": concept_id, **edges}
|
|
92
|
+
except GroundworkersError as exc:
|
|
93
|
+
return exc.to_dict()
|
|
94
|
+
except Exception as exc:
|
|
95
|
+
return {"error": True, "code": "QUERY_ERROR", "message": repr(exc)}
|
|
96
|
+
|
|
97
|
+
@server.tool("concept_equivalency_path")
|
|
98
|
+
def concept_equivalency_path(
|
|
99
|
+
source_id: int,
|
|
100
|
+
target_id: int,
|
|
101
|
+
allow_hierarchical_traversal: bool = False,
|
|
102
|
+
max_depth: int = 8,
|
|
103
|
+
) -> dict[str, Any]:
|
|
104
|
+
"""Returns the shortest equivalency path(s) between two OMOP concepts.
|
|
105
|
+
|
|
106
|
+
Traverses only identity and (optionally) hierarchy relationships —
|
|
107
|
+
never attribute, composition, or association edges. Cross-domain
|
|
108
|
+
edges are always included because identity relationships are designed
|
|
109
|
+
to span vocabulary boundaries.
|
|
110
|
+
|
|
111
|
+
allow_hierarchical_traversal=False (default)
|
|
112
|
+
Only IDENTITY predicates: Maps to, Concept same_as, Concept poss_eq,
|
|
113
|
+
Mapped from, etc. The path represents a direct cross-vocabulary
|
|
114
|
+
equivalence with no loss of specificity.
|
|
115
|
+
|
|
116
|
+
allow_hierarchical_traversal=True
|
|
117
|
+
Adds HIERARCHY predicates (Is a / Subsumes). The path may step up
|
|
118
|
+
or down the ancestry chain, so the connection found may be at a
|
|
119
|
+
broader level of abstraction (e.g. source maps to an ancestor of
|
|
120
|
+
target rather than target itself).
|
|
121
|
+
"""
|
|
122
|
+
if source_id <= 0 or target_id <= 0:
|
|
123
|
+
return {"error": True, "code": "INVALID_INPUT", "message": "source_id and target_id must be positive integers"}
|
|
124
|
+
safe_depth = max(2, min(max_depth, 15))
|
|
125
|
+
try:
|
|
126
|
+
result = graph_adapter.find_equivalency_path(
|
|
127
|
+
source_id, target_id, safe_depth, allow_hierarchical_traversal
|
|
128
|
+
)
|
|
129
|
+
return {"source_id": source_id, "target_id": target_id, **result}
|
|
130
|
+
except GroundworkersError as exc:
|
|
131
|
+
return exc.to_dict()
|
|
132
|
+
except Exception as exc:
|
|
133
|
+
return {"error": True, "code": "QUERY_ERROR", "message": repr(exc)}
|
|
134
|
+
|
|
135
|
+
@server.tool("concept_path")
|
|
136
|
+
def concept_path(
|
|
137
|
+
source_id: int,
|
|
138
|
+
target_id: int,
|
|
139
|
+
max_depth: int = 8,
|
|
140
|
+
within_domain: bool = True,
|
|
141
|
+
) -> dict[str, Any]:
|
|
142
|
+
"""Returns the shortest path(s) between two OMOP concepts across all relationship types.
|
|
143
|
+
|
|
144
|
+
Traverses every relationship kind in the concept graph: IDENTITY,
|
|
145
|
+
HIERARCHY, ATTRIBUTE, COMPOSITION, and ASSOCIATION. Use this when
|
|
146
|
+
you want to find any conceptual connection regardless of relationship
|
|
147
|
+
type.
|
|
148
|
+
|
|
149
|
+
within_domain=True (default)
|
|
150
|
+
Only traverses edges where both endpoint concepts share the same
|
|
151
|
+
domain_id (e.g. Condition → Condition). Reduces noise for most
|
|
152
|
+
queries.
|
|
153
|
+
|
|
154
|
+
within_domain=False
|
|
155
|
+
Allows cross-domain traversal. Required when the path crosses
|
|
156
|
+
vocabulary/domain boundaries via attribute relationships such as
|
|
157
|
+
"Has asso morph", "Has finding site", or "Has associated procedure".
|
|
158
|
+
"""
|
|
159
|
+
if source_id <= 0 or target_id <= 0:
|
|
160
|
+
return {"error": True, "code": "INVALID_INPUT", "message": "source_id and target_id must be positive integers"}
|
|
161
|
+
safe_depth = max(2, min(max_depth, 15))
|
|
162
|
+
try:
|
|
163
|
+
result = graph_adapter.find_path(source_id, target_id, safe_depth, within_domain=within_domain)
|
|
164
|
+
return {"source_id": source_id, "target_id": target_id, **result}
|
|
165
|
+
except GroundworkersError as exc:
|
|
166
|
+
return exc.to_dict()
|
|
167
|
+
except Exception as exc:
|
|
168
|
+
return {"error": True, "code": "QUERY_ERROR", "message": repr(exc)}
|
|
169
|
+
|
|
170
|
+
@server.tool("concept_neighbors")
|
|
171
|
+
def concept_neighbors(
|
|
172
|
+
concept_id: int,
|
|
173
|
+
max_depth: int = 2,
|
|
174
|
+
predicate_kinds: list[str] | None = None,
|
|
175
|
+
max_nodes: int = 100,
|
|
176
|
+
include_edges: bool = True,
|
|
177
|
+
) -> dict[str, Any]:
|
|
178
|
+
"""Bounded multi-hop neighborhood exploration for one concept.
|
|
179
|
+
|
|
180
|
+
Performs a BFS from the seed concept, following outgoing relationship
|
|
181
|
+
edges up to max_depth hops and collecting all reachable concepts and
|
|
182
|
+
the edges connecting them. This reaches across all relationship types
|
|
183
|
+
simultaneously — hierarchy, identity, attribute, composition, and
|
|
184
|
+
association — unlike concept_ancestors / concept_descendants which only
|
|
185
|
+
follow the parent/child hierarchy.
|
|
186
|
+
|
|
187
|
+
Use this to discover what is conceptually related to a concept without
|
|
188
|
+
knowing in advance which relationship types connect them. Typical uses:
|
|
189
|
+
- finding associated anatomical sites, morphologies, or procedures
|
|
190
|
+
for a clinical finding
|
|
191
|
+
- mapping a drug concept to its ingredient, dose forms, and routes
|
|
192
|
+
- exploring OMOP concept class membership and equivalence clusters
|
|
193
|
+
|
|
194
|
+
predicate_kinds: optional list restricting which edge types to follow.
|
|
195
|
+
Valid values: HIERARCHY, IDENTITY, ATTRIBUTE, COMPOSITION, ASSOCIATION.
|
|
196
|
+
When omitted, all relationship types are traversed.
|
|
197
|
+
|
|
198
|
+
max_depth: maximum hops from the seed (1–4, server-enforced).
|
|
199
|
+
|
|
200
|
+
max_nodes: stop after visiting this many distinct concepts (10–500,
|
|
201
|
+
server-enforced). terminated_early=true and terminated_reason="max_nodes"
|
|
202
|
+
in the response indicate the traversal was cut short.
|
|
203
|
+
|
|
204
|
+
include_edges: when true (default) the edges list contains every
|
|
205
|
+
relationship edge in the discovered subgraph with its predicate_kind.
|
|
206
|
+
Set false when you only need the neighbor concept list.
|
|
207
|
+
"""
|
|
208
|
+
if concept_id <= 0:
|
|
209
|
+
return {"error": True, "code": "INVALID_INPUT", "message": "concept_id must be a positive integer"}
|
|
210
|
+
safe_depth = max(1, min(max_depth, 4))
|
|
211
|
+
safe_nodes = max(10, min(max_nodes, 500))
|
|
212
|
+
try:
|
|
213
|
+
return graph_adapter.get_neighbors(
|
|
214
|
+
concept_id=concept_id,
|
|
215
|
+
max_depth=safe_depth,
|
|
216
|
+
predicate_kinds=predicate_kinds,
|
|
217
|
+
max_nodes=safe_nodes,
|
|
218
|
+
include_edges=include_edges,
|
|
219
|
+
)
|
|
220
|
+
except GroundworkersError as exc:
|
|
221
|
+
return exc.to_dict()
|
|
222
|
+
except Exception as exc:
|
|
223
|
+
return {"error": True, "code": "QUERY_ERROR", "message": repr(exc)}
|
|
224
|
+
|
|
225
|
+
@server.tool("concept_map_to_standard")
|
|
226
|
+
def concept_map_to_standard(vocabulary_id: str, concept_code: str) -> dict[str, Any]:
|
|
227
|
+
"""Maps a vocabulary concept code to its standard OMOP equivalent(s)."""
|
|
228
|
+
if not vocabulary_id.strip():
|
|
229
|
+
return {"error": True, "code": "INVALID_INPUT", "message": "vocabulary_id must be a non-empty string"}
|
|
230
|
+
if not concept_code.strip():
|
|
231
|
+
return {"error": True, "code": "INVALID_INPUT", "message": "concept_code must be a non-empty string"}
|
|
232
|
+
try:
|
|
233
|
+
return graph_adapter.map_to_standard(vocabulary_id, concept_code)
|
|
234
|
+
except GroundworkersError as exc:
|
|
235
|
+
return exc.to_dict()
|
|
236
|
+
except Exception as exc:
|
|
237
|
+
return {"error": True, "code": "QUERY_ERROR", "message": repr(exc)}
|