groundworkers 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- groundworkers/__init__.py +3 -0
- groundworkers/adapters/__init__.py +1 -0
- groundworkers/adapters/omop_emb.py +251 -0
- groundworkers/adapters/omop_graph.py +721 -0
- groundworkers/adapters/omop_vocab.py +582 -0
- groundworkers/base/__init__.py +17 -0
- groundworkers/base/errors.py +19 -0
- groundworkers/base/results.py +38 -0
- groundworkers/base/server.py +52 -0
- groundworkers/base/sql.py +109 -0
- groundworkers/config.py +139 -0
- groundworkers/server.py +127 -0
- groundworkers/tools/__init__.py +1 -0
- groundworkers/tools/concept_tools.py +237 -0
- groundworkers/tools/embedding_tools.py +83 -0
- groundworkers/tools/resolver_tools.py +90 -0
- groundworkers/tools/search_tools.py +163 -0
- groundworkers/tools/system_tools.py +67 -0
- groundworkers-0.1.0.dist-info/METADATA +116 -0
- groundworkers-0.1.0.dist-info/RECORD +23 -0
- groundworkers-0.1.0.dist-info/WHEEL +5 -0
- groundworkers-0.1.0.dist-info/entry_points.txt +2 -0
- groundworkers-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from groundworkers.adapters.omop_emb import OmopEmbAdapter
|
|
6
|
+
from groundworkers.base.errors import GroundworkersError
|
|
7
|
+
from groundworkers.base.server import GroundcrewServer
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def register_embedding_tools(server: GroundcrewServer, emb_adapter: OmopEmbAdapter) -> None:
|
|
11
|
+
@server.tool("embedding_index_status")
|
|
12
|
+
def embedding_index_status() -> dict[str, Any]:
|
|
13
|
+
"""Returns status of the embedding backend and registered models."""
|
|
14
|
+
try:
|
|
15
|
+
return emb_adapter.index_status()
|
|
16
|
+
except GroundworkersError as exc:
|
|
17
|
+
return exc.to_dict()
|
|
18
|
+
except Exception as exc:
|
|
19
|
+
return {"error": True, "code": "QUERY_ERROR", "message": repr(exc)}
|
|
20
|
+
|
|
21
|
+
@server.tool("embedding_neighbours")
|
|
22
|
+
def embedding_neighbours(concept_id: int, limit: int = 10, model_name: str | None = None) -> dict[str, Any]:
|
|
23
|
+
"""Returns nearest embedding-space neighbours for one OMOP concept."""
|
|
24
|
+
if concept_id <= 0:
|
|
25
|
+
return {"error": True, "code": "INVALID_INPUT", "message": "concept_id must be a positive integer"}
|
|
26
|
+
safe_limit = max(1, min(limit, 50))
|
|
27
|
+
if model_name is not None and not model_name.strip():
|
|
28
|
+
return {"error": True, "code": "INVALID_INPUT", "message": "model_name must be a non-empty string"}
|
|
29
|
+
try:
|
|
30
|
+
return emb_adapter.get_neighbours(
|
|
31
|
+
concept_id=concept_id,
|
|
32
|
+
limit=safe_limit,
|
|
33
|
+
model_name=model_name,
|
|
34
|
+
)
|
|
35
|
+
except GroundworkersError as exc:
|
|
36
|
+
return exc.to_dict()
|
|
37
|
+
except Exception as exc:
|
|
38
|
+
return {"error": True, "code": "QUERY_ERROR", "message": repr(exc)}
|
|
39
|
+
|
|
40
|
+
@server.tool("embedding_search")
|
|
41
|
+
def embedding_search(
|
|
42
|
+
query: str,
|
|
43
|
+
limit: int = 10,
|
|
44
|
+
domain: str | None = None,
|
|
45
|
+
vocabulary: str | None = None,
|
|
46
|
+
standard_only: bool = False,
|
|
47
|
+
active_only: bool = True,
|
|
48
|
+
model_name: str | None = None,
|
|
49
|
+
) -> dict[str, Any]:
|
|
50
|
+
"""Searches the embedding index by encoding a query string on the fly."""
|
|
51
|
+
if not query.strip():
|
|
52
|
+
return {"error": True, "code": "INVALID_INPUT", "message": "query must be a non-empty string"}
|
|
53
|
+
safe_limit = max(1, min(limit, 50))
|
|
54
|
+
try:
|
|
55
|
+
return emb_adapter.search(
|
|
56
|
+
query=query,
|
|
57
|
+
limit=safe_limit,
|
|
58
|
+
domain=domain,
|
|
59
|
+
vocabulary=vocabulary,
|
|
60
|
+
standard_only=standard_only,
|
|
61
|
+
active_only=active_only,
|
|
62
|
+
model_name=model_name,
|
|
63
|
+
)
|
|
64
|
+
except NotImplementedError as exc:
|
|
65
|
+
return {"error": True, "code": "BACKEND_UNAVAIL", "message": str(exc)}
|
|
66
|
+
except GroundworkersError as exc:
|
|
67
|
+
return exc.to_dict()
|
|
68
|
+
except Exception as exc:
|
|
69
|
+
return {"error": True, "code": "QUERY_ERROR", "message": repr(exc)}
|
|
70
|
+
|
|
71
|
+
@server.tool("embedding_encode")
|
|
72
|
+
def embedding_encode(text: str, model_name: str | None = None) -> dict[str, Any]:
|
|
73
|
+
"""Encodes free text into one embedding vector using the configured model client."""
|
|
74
|
+
if not text.strip():
|
|
75
|
+
return {"error": True, "code": "INVALID_INPUT", "message": "text must be a non-empty string"}
|
|
76
|
+
if model_name is not None and not model_name.strip():
|
|
77
|
+
return {"error": True, "code": "INVALID_INPUT", "message": "model_name must be a non-empty string"}
|
|
78
|
+
try:
|
|
79
|
+
return emb_adapter.encode(text=text, model_name=model_name)
|
|
80
|
+
except GroundworkersError as exc:
|
|
81
|
+
return exc.to_dict()
|
|
82
|
+
except Exception as exc:
|
|
83
|
+
return {"error": True, "code": "QUERY_ERROR", "message": repr(exc)}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from groundworkers.adapters.omop_graph import OmopGraphAdapter
|
|
6
|
+
from groundworkers.base.errors import GroundworkersError
|
|
7
|
+
from groundworkers.base.server import GroundcrewServer
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def register_resolver_tools(server: GroundcrewServer, graph_adapter: OmopGraphAdapter) -> None:
|
|
11
|
+
"""Register free-text concept resolution tools against the MCP server.
|
|
12
|
+
|
|
13
|
+
These tools map unstructured text (clinical terms, natural language, partial
|
|
14
|
+
descriptions) to OMOP standard concepts. They are probabilistic — results
|
|
15
|
+
are ranked candidates, not guaranteed matches.
|
|
16
|
+
|
|
17
|
+
For deterministic lookups from known identifiers (concept_id, vocab+code,
|
|
18
|
+
hierarchy traversal) see concept_tools.py.
|
|
19
|
+
For agent-composable primitive search operations see search_tools.py.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
@server.tool("concept_ground")
|
|
23
|
+
def concept_ground(
|
|
24
|
+
query: str,
|
|
25
|
+
limit: int = 5,
|
|
26
|
+
domain: str | None = None,
|
|
27
|
+
vocabulary_id: str | None = None,
|
|
28
|
+
parent_ids: list[int] | None = None,
|
|
29
|
+
) -> dict[str, Any]:
|
|
30
|
+
"""Ground free text to matching OMOP standard concepts ranked by relevance.
|
|
31
|
+
|
|
32
|
+
Runs a tiered resolver pipeline (Exact → FullText → Embedding → Partial)
|
|
33
|
+
and short-circuits on the first tier that returns results. Each tier
|
|
34
|
+
also matches against concept synonyms.
|
|
35
|
+
|
|
36
|
+
match_kind in each result indicates which resolver tier produced it:
|
|
37
|
+
EXACT — case-insensitive exact match on concept_name or synonym
|
|
38
|
+
FULLTEXT — PostgreSQL FTS (requires tsvector sidecar column)
|
|
39
|
+
EMBEDDING_NEAREST — nearest-neighbour embedding search
|
|
40
|
+
PARTIAL — iLIKE fragment match (last resort)
|
|
41
|
+
|
|
42
|
+
Each result includes scoring fields (total_score, relevance, parsimony_penalty,
|
|
43
|
+
broadness_bonus, separation, embedding_score) and standardized_from when the
|
|
44
|
+
grounded concept was mapped from a non-standard source concept.
|
|
45
|
+
|
|
46
|
+
grounding_explanation summarises which resolver tier matched, whether embedding
|
|
47
|
+
scoring was active, and which parent_ids constrained the search space.
|
|
48
|
+
|
|
49
|
+
parent_ids: optional list of OMOP concept_ids that act as required ancestors.
|
|
50
|
+
Only concepts that are descendants of at least one of these will be returned.
|
|
51
|
+
Use this to constrain grounding to a specific clinical sub-hierarchy — e.g.
|
|
52
|
+
pass the concept_id for "Neoplastic disease" to ensure only oncology results
|
|
53
|
+
are returned, or the concept_id for a specific drug class to scope a drug
|
|
54
|
+
lookup to that class.
|
|
55
|
+
When omitted the search is anchored to the domain root (or all known domain
|
|
56
|
+
roots when domain is also omitted).
|
|
57
|
+
|
|
58
|
+
For finer control over resolver selection and quality thresholds, use the
|
|
59
|
+
agent-composable primitives: concept_search_exact, concept_search_fulltext,
|
|
60
|
+
embedding_search, and concept_navigate_to_standard.
|
|
61
|
+
"""
|
|
62
|
+
stripped = query.strip()
|
|
63
|
+
if not stripped:
|
|
64
|
+
return {
|
|
65
|
+
"error": True,
|
|
66
|
+
"code": "INVALID_INPUT",
|
|
67
|
+
"message": "query must be a non-empty string",
|
|
68
|
+
}
|
|
69
|
+
if parent_ids is not None and any(pid <= 0 for pid in parent_ids):
|
|
70
|
+
return {
|
|
71
|
+
"error": True,
|
|
72
|
+
"code": "INVALID_INPUT",
|
|
73
|
+
"message": "all parent_ids must be positive integers",
|
|
74
|
+
}
|
|
75
|
+
safe_limit = max(1, min(limit, 20))
|
|
76
|
+
resolved_parent_ids = tuple(parent_ids) if parent_ids else None
|
|
77
|
+
try:
|
|
78
|
+
ground_result = graph_adapter.ground(
|
|
79
|
+
stripped, safe_limit, domain or None, vocabulary_id or None,
|
|
80
|
+
parent_ids=resolved_parent_ids,
|
|
81
|
+
)
|
|
82
|
+
return {
|
|
83
|
+
"query": stripped,
|
|
84
|
+
"results": ground_result["results"],
|
|
85
|
+
"grounding_explanation": ground_result["grounding_explanation"],
|
|
86
|
+
}
|
|
87
|
+
except GroundworkersError as exc:
|
|
88
|
+
return exc.to_dict()
|
|
89
|
+
except Exception as exc:
|
|
90
|
+
return {"error": True, "code": "QUERY_ERROR", "message": repr(exc)}
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from groundworkers.adapters.omop_vocab import (
|
|
6
|
+
OmopVocabAdapter,
|
|
7
|
+
OmopVocabError,
|
|
8
|
+
serialise_concept_match,
|
|
9
|
+
serialise_standard_mapping,
|
|
10
|
+
)
|
|
11
|
+
from groundworkers.base.server import GroundcrewServer
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def register_search_tools(server: GroundcrewServer, vocab_adapter: OmopVocabAdapter) -> None:
|
|
15
|
+
"""Register agent-composable primitive search tools against the MCP server."""
|
|
16
|
+
|
|
17
|
+
@server.tool("concept_search_exact")
|
|
18
|
+
def concept_search_exact(
|
|
19
|
+
query: str,
|
|
20
|
+
domain: str | None = None,
|
|
21
|
+
vocabulary_id: str | None = None,
|
|
22
|
+
standard_only: bool = False,
|
|
23
|
+
include_synonyms: bool = True,
|
|
24
|
+
limit: int = 20,
|
|
25
|
+
) -> dict[str, Any]:
|
|
26
|
+
"""
|
|
27
|
+
Case-insensitive exact match of a query string against concept_name and
|
|
28
|
+
(optionally) concept_synonym_name.
|
|
29
|
+
|
|
30
|
+
Unlike concept_ground, standard_only defaults to false — results include
|
|
31
|
+
non-standard concepts so the caller can inspect the standard_concept flag
|
|
32
|
+
and decide whether to call concept_navigate_to_standard.
|
|
33
|
+
|
|
34
|
+
match_source is "name" when the concept_name matched, "synonym" when a
|
|
35
|
+
concept_synonym_name matched. matched_synonym contains the synonym string
|
|
36
|
+
that triggered the match.
|
|
37
|
+
"""
|
|
38
|
+
if not query.strip():
|
|
39
|
+
return {
|
|
40
|
+
"error": True,
|
|
41
|
+
"code": "INVALID_INPUT",
|
|
42
|
+
"message": "query must be a non-empty string",
|
|
43
|
+
}
|
|
44
|
+
safe_limit = max(1, min(limit, 50))
|
|
45
|
+
try:
|
|
46
|
+
results = vocab_adapter.search_exact(
|
|
47
|
+
query,
|
|
48
|
+
domain=domain or None,
|
|
49
|
+
vocabulary_id=vocabulary_id or None,
|
|
50
|
+
standard_only=standard_only,
|
|
51
|
+
include_synonyms=include_synonyms,
|
|
52
|
+
limit=safe_limit,
|
|
53
|
+
)
|
|
54
|
+
return {
|
|
55
|
+
"query": query.strip(),
|
|
56
|
+
"results": [serialise_concept_match(r) for r in results],
|
|
57
|
+
}
|
|
58
|
+
except ValueError as exc:
|
|
59
|
+
return {"error": True, "code": "INVALID_INPUT", "message": str(exc)}
|
|
60
|
+
except OmopVocabError as exc:
|
|
61
|
+
return {"error": True, "code": "QUERY_ERROR", "message": str(exc)}
|
|
62
|
+
except Exception as exc:
|
|
63
|
+
return {"error": True, "code": "QUERY_ERROR", "message": repr(exc)}
|
|
64
|
+
|
|
65
|
+
@server.tool("concept_search_fulltext")
|
|
66
|
+
def concept_search_fulltext(
|
|
67
|
+
query: str,
|
|
68
|
+
domain: str | None = None,
|
|
69
|
+
vocabulary_id: str | None = None,
|
|
70
|
+
standard_only: bool = False,
|
|
71
|
+
include_synonyms: bool = True,
|
|
72
|
+
min_rank: float = 0.0,
|
|
73
|
+
limit: int = 20,
|
|
74
|
+
) -> dict[str, Any]:
|
|
75
|
+
"""
|
|
76
|
+
PostgreSQL full-text search against the concept_name tsvector sidecar column.
|
|
77
|
+
|
|
78
|
+
Returns results with ts_rank exposed so the caller can apply its own quality
|
|
79
|
+
threshold (e.g. discard results where ts_rank < 0.05).
|
|
80
|
+
|
|
81
|
+
tsvector_available indicates whether the GIN-indexed sidecar column was
|
|
82
|
+
detected. When false, results is always [] and the caller should fall
|
|
83
|
+
through to embedding_search or concept_search_exact.
|
|
84
|
+
|
|
85
|
+
min_rank is an optional server-side pre-filter (avoids returning very
|
|
86
|
+
large result sets); the caller's own threshold may be stricter.
|
|
87
|
+
|
|
88
|
+
standard_only defaults to false — see concept_search_exact.
|
|
89
|
+
"""
|
|
90
|
+
if not query.strip():
|
|
91
|
+
return {
|
|
92
|
+
"error": True,
|
|
93
|
+
"code": "INVALID_INPUT",
|
|
94
|
+
"message": "query must be a non-empty string",
|
|
95
|
+
}
|
|
96
|
+
if not (0.0 <= min_rank <= 1.0):
|
|
97
|
+
return {
|
|
98
|
+
"error": True,
|
|
99
|
+
"code": "INVALID_INPUT",
|
|
100
|
+
"message": "min_rank must be between 0.0 and 1.0",
|
|
101
|
+
}
|
|
102
|
+
safe_limit = max(1, min(limit, 50))
|
|
103
|
+
try:
|
|
104
|
+
results, fts_available = vocab_adapter.search_fulltext(
|
|
105
|
+
query,
|
|
106
|
+
domain=domain or None,
|
|
107
|
+
vocabulary_id=vocabulary_id or None,
|
|
108
|
+
standard_only=standard_only,
|
|
109
|
+
include_synonyms=include_synonyms,
|
|
110
|
+
min_rank=min_rank,
|
|
111
|
+
limit=safe_limit,
|
|
112
|
+
)
|
|
113
|
+
return {
|
|
114
|
+
"query": query.strip(),
|
|
115
|
+
"tsvector_available": fts_available,
|
|
116
|
+
"results": [serialise_concept_match(r) for r in results],
|
|
117
|
+
}
|
|
118
|
+
except ValueError as exc:
|
|
119
|
+
return {"error": True, "code": "INVALID_INPUT", "message": str(exc)}
|
|
120
|
+
except OmopVocabError as exc:
|
|
121
|
+
return {"error": True, "code": "QUERY_ERROR", "message": str(exc)}
|
|
122
|
+
except Exception as exc:
|
|
123
|
+
return {"error": True, "code": "QUERY_ERROR", "message": repr(exc)}
|
|
124
|
+
|
|
125
|
+
@server.tool("concept_navigate_to_standard")
|
|
126
|
+
def concept_navigate_to_standard(
|
|
127
|
+
concept_ids: list[int],
|
|
128
|
+
) -> dict[str, Any]:
|
|
129
|
+
"""
|
|
130
|
+
Given a list of concept_ids, return their standard OMOP equivalents by
|
|
131
|
+
following "Maps to" relationship edges.
|
|
132
|
+
|
|
133
|
+
For concepts that are already standard: standard_concepts contains the
|
|
134
|
+
concept itself (relationship_id = "self").
|
|
135
|
+
For concepts with no "Maps to" mapping: standard_concepts is [].
|
|
136
|
+
concept_ids not found in the vocabulary are silently omitted from results.
|
|
137
|
+
|
|
138
|
+
This is the batch-by-concept-id form of concept_map_to_standard. Use it
|
|
139
|
+
after concept_search_exact, concept_search_fulltext, or embedding_search
|
|
140
|
+
to resolve non-standard candidates to their standard equivalents in a
|
|
141
|
+
single round-trip.
|
|
142
|
+
"""
|
|
143
|
+
if not concept_ids:
|
|
144
|
+
return {"results": []}
|
|
145
|
+
if len(concept_ids) > 100:
|
|
146
|
+
return {
|
|
147
|
+
"error": True,
|
|
148
|
+
"code": "INVALID_INPUT",
|
|
149
|
+
"message": "concept_ids must contain at most 100 entries",
|
|
150
|
+
}
|
|
151
|
+
if any(cid <= 0 for cid in concept_ids):
|
|
152
|
+
return {
|
|
153
|
+
"error": True,
|
|
154
|
+
"code": "INVALID_INPUT",
|
|
155
|
+
"message": "all concept_ids must be positive integers",
|
|
156
|
+
}
|
|
157
|
+
try:
|
|
158
|
+
mappings = vocab_adapter.navigate_to_standard(concept_ids)
|
|
159
|
+
return {"results": [serialise_standard_mapping(m) for m in mappings]}
|
|
160
|
+
except OmopVocabError as exc:
|
|
161
|
+
return {"error": True, "code": "QUERY_ERROR", "message": str(exc)}
|
|
162
|
+
except Exception as exc:
|
|
163
|
+
return {"error": True, "code": "QUERY_ERROR", "message": repr(exc)}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""System-level MCP tools: system_status and system_vocabulary_catalogue.
|
|
2
|
+
|
|
3
|
+
These tools are always registered regardless of adapter availability, so
|
|
4
|
+
callers always get a structured response (never "unknown tool").
|
|
5
|
+
|
|
6
|
+
system_status — reports availability of every configured adapter.
|
|
7
|
+
system_vocabulary_catalogue — returns the full OMOP vocabulary/domain/class
|
|
8
|
+
catalogue from OmopGraphAdapter. Requires omop_graph to be configured.
|
|
9
|
+
"""
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
from groundworkers.adapters.omop_emb import OmopEmbAdapter
|
|
15
|
+
from groundworkers.adapters.omop_graph import OmopGraphAdapter
|
|
16
|
+
from groundworkers.base.errors import GroundworkersError
|
|
17
|
+
from groundworkers.base.server import GroundcrewServer
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def register_system_tools(
|
|
21
|
+
server: GroundcrewServer,
|
|
22
|
+
graph_adapter: OmopGraphAdapter | None = None,
|
|
23
|
+
emb_adapter: OmopEmbAdapter | None = None,
|
|
24
|
+
) -> None:
|
|
25
|
+
@server.tool("system_status")
|
|
26
|
+
def system_status() -> dict[str, Any]:
|
|
27
|
+
"""Returns availability of each configured adapter/backend."""
|
|
28
|
+
adapters: dict[str, Any] = {}
|
|
29
|
+
|
|
30
|
+
if graph_adapter is not None:
|
|
31
|
+
try:
|
|
32
|
+
adapters["omop_graph"] = {"available": graph_adapter.is_available()}
|
|
33
|
+
except Exception as exc:
|
|
34
|
+
adapters["omop_graph"] = {"available": False, "reason": repr(exc)}
|
|
35
|
+
else:
|
|
36
|
+
adapters["omop_graph"] = {"available": False, "reason": "not configured"}
|
|
37
|
+
|
|
38
|
+
if emb_adapter is not None:
|
|
39
|
+
try:
|
|
40
|
+
status = emb_adapter.index_status()
|
|
41
|
+
adapters["omop_emb"] = {
|
|
42
|
+
"available": status["available"],
|
|
43
|
+
"models": status.get("models", []),
|
|
44
|
+
}
|
|
45
|
+
except Exception as exc:
|
|
46
|
+
adapters["omop_emb"] = {"available": False, "reason": repr(exc)}
|
|
47
|
+
else:
|
|
48
|
+
adapters["omop_emb"] = {"available": False, "reason": "not configured"}
|
|
49
|
+
|
|
50
|
+
overall = any(v.get("available") for v in adapters.values())
|
|
51
|
+
return {"available": overall, "adapters": adapters}
|
|
52
|
+
|
|
53
|
+
@server.tool("system_vocabulary_catalogue")
|
|
54
|
+
def system_vocabulary_catalogue() -> dict[str, Any]:
|
|
55
|
+
"""Returns all OMOP vocabularies, domains, and concept classes."""
|
|
56
|
+
if graph_adapter is None:
|
|
57
|
+
return {
|
|
58
|
+
"error": True,
|
|
59
|
+
"code": "BACKEND_UNAVAIL",
|
|
60
|
+
"message": "omop_graph adapter is not configured",
|
|
61
|
+
}
|
|
62
|
+
try:
|
|
63
|
+
return graph_adapter.get_vocabulary_catalogue()
|
|
64
|
+
except GroundworkersError as exc:
|
|
65
|
+
return exc.to_dict()
|
|
66
|
+
except Exception as exc:
|
|
67
|
+
return {"error": True, "code": "QUERY_ERROR", "message": repr(exc)}
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: groundworkers
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Groundworkers MCP server — read-only agentive access to OMOP vocabularies, concept graphs, and embeddings.
|
|
5
|
+
Requires-Python: >=3.12
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: mcp[cli]<2,>=1
|
|
8
|
+
Requires-Dist: pydantic<3,>=2
|
|
9
|
+
Requires-Dist: pyyaml<7,>=6
|
|
10
|
+
Requires-Dist: SQLAlchemy<3,>=2
|
|
11
|
+
Requires-Dist: psycopg[binary]<4,>=3.1
|
|
12
|
+
Requires-Dist: omop-graph>=1.1.0
|
|
13
|
+
Requires-Dist: omop-emb>=1.0.0
|
|
14
|
+
Provides-Extra: embedding-pgvector
|
|
15
|
+
Requires-Dist: omop-emb[pgvector]>=1.0.0; extra == "embedding-pgvector"
|
|
16
|
+
Provides-Extra: embedding-faiss
|
|
17
|
+
Requires-Dist: omop-emb[faiss-cpu]>=1.0.0; extra == "embedding-faiss"
|
|
18
|
+
Provides-Extra: dev
|
|
19
|
+
Requires-Dist: pytest<9,>=8; extra == "dev"
|
|
20
|
+
|
|
21
|
+
# groundworkers
|
|
22
|
+
|
|
23
|
+
**groundworkers** is an atomic, read-only MCP (Model Context Protocol) tool library for
|
|
24
|
+
navigating the OMOP vocabularies. It exposes OMOP vocabulary lookups, embedding similarity search,
|
|
25
|
+
cohort concept references, and system status as typed MCP tools that any MCP client can call —
|
|
26
|
+
including [groundcrew](https://github.com/AustralianCancerDataNetwork/groundcrew),
|
|
27
|
+
Claude Code, and autonomous agents.
|
|
28
|
+
|
|
29
|
+
Read-only. No patient-level data. No write operations.
|
|
30
|
+
|
|
31
|
+
## What it exposes
|
|
32
|
+
|
|
33
|
+
| Group | Tools |
|
|
34
|
+
|---|---|
|
|
35
|
+
| **Concept** | `concept_get`, `concept_by_code`, `concept_ancestors`, `concept_descendants`, `concept_relationships`, `concept_equivalency_path`, `concept_path`, `concept_map_to_standard`, `concept_neighbors` |
|
|
36
|
+
| **Resolver** | `concept_ground` (with `parent_ids`, scoring fields, and `grounding_explanation`) |
|
|
37
|
+
| **Search** | `concept_search_exact`, `concept_search_fulltext`, `concept_navigate_to_standard` |
|
|
38
|
+
| **Embedding** | `embedding_index_status`, `embedding_neighbours`, `embedding_search`, `embedding_encode` |
|
|
39
|
+
| **Cohort** | `cohort_find_concept_references` |
|
|
40
|
+
| **System** | `system_status`, `system_vocabulary_catalogue` |
|
|
41
|
+
|
|
42
|
+
Tools are registered conditionally — if an adapter is not configured, its tools are
|
|
43
|
+
simply not registered. `system_status` and `system_vocabulary_catalogue` are always
|
|
44
|
+
registered so clients can always query adapter availability.
|
|
45
|
+
|
|
46
|
+
## Quick start
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
uv venv
|
|
50
|
+
uv sync --extra dev --extra embedding-tools
|
|
51
|
+
uv run groundworkers --config config/groundworkers.example.yaml --describe
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Start the server:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
uv run groundworkers --config config/groundworkers.example.yaml
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Example config
|
|
61
|
+
|
|
62
|
+
```yaml
|
|
63
|
+
omop_graph:
|
|
64
|
+
db_url: "postgresql+psycopg://user:pass@localhost:5432/omop"
|
|
65
|
+
vocab_schema: omop_vocab
|
|
66
|
+
|
|
67
|
+
omop_emb:
|
|
68
|
+
enabled: true
|
|
69
|
+
backend_type: pgvector
|
|
70
|
+
db_url: "postgresql+psycopg://user:pass@localhost:5432/omop"
|
|
71
|
+
default_model_name: qwen3-embedding:0.6b
|
|
72
|
+
api_base: "http://localhost:11434/v1"
|
|
73
|
+
api_key: "ollama"
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## Install matrix
|
|
77
|
+
|
|
78
|
+
| Use case | Extras |
|
|
79
|
+
|---|---|
|
|
80
|
+
| Core server only | none |
|
|
81
|
+
| Concept tools | `concept-tools` |
|
|
82
|
+
| Cohort tools | `cohort-tools` |
|
|
83
|
+
| Embedding tools (sqlite-vec) | `embedding-tools` |
|
|
84
|
+
| Embedding tools (pgvector) | `embedding-pgvector` |
|
|
85
|
+
| Embedding tools (FAISS sidecar) | `embedding-faiss` |
|
|
86
|
+
| All tool families | `all-tools` |
|
|
87
|
+
| All + pgvector embeddings | `all-tools-pgvector` |
|
|
88
|
+
| All + FAISS embeddings | `all-tools-faiss` |
|
|
89
|
+
| Development | `dev` |
|
|
90
|
+
| Development + all tools | `dev-all` |
|
|
91
|
+
|
|
92
|
+
## Layout
|
|
93
|
+
|
|
94
|
+
```
|
|
95
|
+
src/groundworkers/
|
|
96
|
+
adapters/ — omop_graph, omop_emb, oa_cohorts adapter classes
|
|
97
|
+
base/ — GroundcrewServer, errors, results, SQL helpers
|
|
98
|
+
tools/ — MCP tool registrations by domain
|
|
99
|
+
config.py — Pydantic config models (AppConfig, OmopGraphConfig, etc.)
|
|
100
|
+
server.py — Server factory and CLI entry point
|
|
101
|
+
config/ — Example YAML configs
|
|
102
|
+
_design/ — Architecture notes and spec documents
|
|
103
|
+
tests/ — Unit and integration tests
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## Adapter backends
|
|
107
|
+
|
|
108
|
+
- **omop-graph** — concept lookup, hierarchy traversal, full-text search
|
|
109
|
+
- **omop-emb** — embedding index (sqlite-vec, pgvector, or FAISS sidecar)
|
|
110
|
+
- **OpenAnalytics cohorts** — cohort concept reference queries (Phase N)
|
|
111
|
+
|
|
112
|
+
## Companion repos
|
|
113
|
+
|
|
114
|
+
- [groundcrew](https://github.com/AustralianCancerDataNetwork/groundcrew) — ACP orchestration layer that drives this tool substrate
|
|
115
|
+
- [omop-graph](https://australiancancerdatanetwork.github.io/omop-graph/) — OMOP virtual knowledge graph library
|
|
116
|
+
- [omop-emb](https://australiancancerdatanetwork.github.io/omop-emb/) — OMOP embedding index library
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
groundworkers/__init__.py,sha256=4t_crzhrLum--oyowUMxtjBTzUtWp7oRTF22ewEvJG4,49
|
|
2
|
+
groundworkers/config.py,sha256=WvymBnWqAy8AS787wGJKTBGiIkci0K2dqUzTNwgqoiI,4390
|
|
3
|
+
groundworkers/server.py,sha256=oM6EZIfSZcM40NP9Vk7GA1KZs2-ln27vkSRYtfSidk0,5101
|
|
4
|
+
groundworkers/adapters/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
5
|
+
groundworkers/adapters/omop_emb.py,sha256=Bw_DCuwjJpm9EYTGIvVCFs9VfpNkg7YDJqj7R4mjoxU,9673
|
|
6
|
+
groundworkers/adapters/omop_graph.py,sha256=sJqwPBuzRNt76kqEKX2pPCQSqyEIvfF0TU-3kPil2Yo,31204
|
|
7
|
+
groundworkers/adapters/omop_vocab.py,sha256=NuUlwWs-vRe2t74DBbN8S-rDHkTZcvjT9YotY_cL69k,23557
|
|
8
|
+
groundworkers/base/__init__.py,sha256=cjqp5jap9pDXxa9RNdLWk6zHePCfy7iyJNg4_ufwROI,455
|
|
9
|
+
groundworkers/base/errors.py,sha256=kXWbyf8Nf_9bVm-SJwQeYDs9UGmVIpqytszKPuye80E,437
|
|
10
|
+
groundworkers/base/results.py,sha256=72ZAQdz2BYptBj9sfsUN5P0vcRHsfrZovGNu-frZyuU,802
|
|
11
|
+
groundworkers/base/server.py,sha256=9flE-dpS8S2W-cNCf5X7YfCuS4vwDfMWhKBmOVUJweo,1730
|
|
12
|
+
groundworkers/base/sql.py,sha256=Ry9yWUHX2ylqLM_qGHWz9jnCujuUSS12rnhLtv4D3Mg,4789
|
|
13
|
+
groundworkers/tools/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
14
|
+
groundworkers/tools/concept_tools.py,sha256=tKM4eceDGFwftb2zZnSaSR0EM4XKPnEBk-lO2A0knOo,11440
|
|
15
|
+
groundworkers/tools/embedding_tools.py,sha256=f48YUpqXEcvY4oqM588W81mxUypVnz4b_YDdirHMFAw,3729
|
|
16
|
+
groundworkers/tools/resolver_tools.py,sha256=mFMLFbdlBnLvqir1ol7d5hJNeUrA5iy8t4yWsjtKnY0,4200
|
|
17
|
+
groundworkers/tools/search_tools.py,sha256=nIPO5FI69-nCb3wUa-jv1KTuGB-bGIgDoKCqXNBci9Q,6544
|
|
18
|
+
groundworkers/tools/system_tools.py,sha256=8gFitN69SCG_U45oumdqlcG74x86JYYTqopEbkpixYk,2720
|
|
19
|
+
groundworkers-0.1.0.dist-info/METADATA,sha256=DXp8SF8VQN6ittghIO1TfTaqV7ohiCdYcI497cM4qYA,4418
|
|
20
|
+
groundworkers-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
21
|
+
groundworkers-0.1.0.dist-info/entry_points.txt,sha256=MYrp7tx1lT05mfSL3Z7qIAVXEdht5m0Hf3FTkdZMebM,60
|
|
22
|
+
groundworkers-0.1.0.dist-info/top_level.txt,sha256=ZtSWb8IJggAZKgS2CLyQnho63WeiAYWPk5jSGizhrLc,14
|
|
23
|
+
groundworkers-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
groundworkers
|