groundworkers 0.1.0__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- groundworkers-0.2.0/PKG-INFO +208 -0
- groundworkers-0.2.0/README.md +178 -0
- {groundworkers-0.1.0 → groundworkers-0.2.0}/pyproject.toml +12 -2
- {groundworkers-0.1.0 → groundworkers-0.2.0}/src/groundworkers/adapters/omop_emb.py +19 -11
- {groundworkers-0.1.0 → groundworkers-0.2.0}/src/groundworkers/adapters/omop_graph.py +69 -10
- {groundworkers-0.1.0 → groundworkers-0.2.0}/src/groundworkers/adapters/omop_vocab.py +272 -0
- groundworkers-0.1.0/src/groundworkers/server.py → groundworkers-0.2.0/src/groundworkers/app.py +50 -55
- {groundworkers-0.1.0 → groundworkers-0.2.0}/src/groundworkers/config.py +14 -0
- groundworkers-0.2.0/src/groundworkers/server.py +62 -0
- groundworkers-0.2.0/src/groundworkers/services/__init__.py +3 -0
- groundworkers-0.2.0/src/groundworkers/services/mapping.py +600 -0
- groundworkers-0.2.0/src/groundworkers/tools/mapping_tools.py +225 -0
- groundworkers-0.2.0/src/groundworkers.egg-info/PKG-INFO +208 -0
- {groundworkers-0.1.0 → groundworkers-0.2.0}/src/groundworkers.egg-info/SOURCES.txt +4 -0
- {groundworkers-0.1.0 → groundworkers-0.2.0}/src/groundworkers.egg-info/requires.txt +11 -1
- {groundworkers-0.1.0 → groundworkers-0.2.0}/tests/test_server_registry.py +12 -0
- groundworkers-0.1.0/PKG-INFO +0 -116
- groundworkers-0.1.0/README.md +0 -96
- groundworkers-0.1.0/src/groundworkers.egg-info/PKG-INFO +0 -116
- {groundworkers-0.1.0 → groundworkers-0.2.0}/setup.cfg +0 -0
- {groundworkers-0.1.0 → groundworkers-0.2.0}/src/groundworkers/__init__.py +0 -0
- {groundworkers-0.1.0 → groundworkers-0.2.0}/src/groundworkers/adapters/__init__.py +0 -0
- {groundworkers-0.1.0 → groundworkers-0.2.0}/src/groundworkers/base/__init__.py +0 -0
- {groundworkers-0.1.0 → groundworkers-0.2.0}/src/groundworkers/base/errors.py +0 -0
- {groundworkers-0.1.0 → groundworkers-0.2.0}/src/groundworkers/base/results.py +0 -0
- {groundworkers-0.1.0 → groundworkers-0.2.0}/src/groundworkers/base/server.py +0 -0
- {groundworkers-0.1.0 → groundworkers-0.2.0}/src/groundworkers/base/sql.py +0 -0
- {groundworkers-0.1.0 → groundworkers-0.2.0}/src/groundworkers/tools/__init__.py +0 -0
- {groundworkers-0.1.0 → groundworkers-0.2.0}/src/groundworkers/tools/concept_tools.py +0 -0
- {groundworkers-0.1.0 → groundworkers-0.2.0}/src/groundworkers/tools/embedding_tools.py +0 -0
- {groundworkers-0.1.0 → groundworkers-0.2.0}/src/groundworkers/tools/resolver_tools.py +0 -0
- {groundworkers-0.1.0 → groundworkers-0.2.0}/src/groundworkers/tools/search_tools.py +0 -0
- {groundworkers-0.1.0 → groundworkers-0.2.0}/src/groundworkers/tools/system_tools.py +0 -0
- {groundworkers-0.1.0 → groundworkers-0.2.0}/src/groundworkers.egg-info/dependency_links.txt +0 -0
- {groundworkers-0.1.0 → groundworkers-0.2.0}/src/groundworkers.egg-info/entry_points.txt +0 -0
- {groundworkers-0.1.0 → groundworkers-0.2.0}/src/groundworkers.egg-info/top_level.txt +0 -0
- {groundworkers-0.1.0 → groundworkers-0.2.0}/tests/test_sql_resource.py +0 -0
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: groundworkers
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Groundworkers MCP server — read-only agentive access to OMOP vocabularies, concept graphs, and embeddings.
|
|
5
|
+
Requires-Python: >=3.12
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: mcp[cli]<2,>=1
|
|
8
|
+
Requires-Dist: pydantic<3,>=2
|
|
9
|
+
Requires-Dist: pyyaml<7,>=6
|
|
10
|
+
Requires-Dist: SQLAlchemy<3,>=2
|
|
11
|
+
Requires-Dist: psycopg[binary]<4,>=3.1
|
|
12
|
+
Requires-Dist: omop-graph>=1.1.0
|
|
13
|
+
Requires-Dist: omop-emb>=1.0.0
|
|
14
|
+
Provides-Extra: embedding-pgvector
|
|
15
|
+
Requires-Dist: omop-emb[pgvector]>=1.0.0; extra == "embedding-pgvector"
|
|
16
|
+
Provides-Extra: embedding-faiss
|
|
17
|
+
Requires-Dist: omop-emb[faiss-cpu]>=1.0.0; extra == "embedding-faiss"
|
|
18
|
+
Provides-Extra: dev
|
|
19
|
+
Requires-Dist: ipython>=8.0; extra == "dev"
|
|
20
|
+
Requires-Dist: tornado>=6.5.5; extra == "dev"
|
|
21
|
+
Requires-Dist: pytest>=9.0.3; extra == "dev"
|
|
22
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
23
|
+
Requires-Dist: mypy>=1.8; extra == "dev"
|
|
24
|
+
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
25
|
+
Requires-Dist: mkdocs-material>=9.7.1; extra == "dev"
|
|
26
|
+
Requires-Dist: mkdocstrings-python>=2.0.1; extra == "dev"
|
|
27
|
+
Requires-Dist: mkdocs>=1.6.1; extra == "dev"
|
|
28
|
+
Requires-Dist: requests>=2.33.0; extra == "dev"
|
|
29
|
+
Requires-Dist: mkdocs-mermaid2-plugin; extra == "dev"
|
|
30
|
+
|
|
31
|
+
# groundworkers
|
|
32
|
+
|
|
33
|
+
`groundworkers` is a read-only OMOP vocabulary integration package. You can use it
|
|
34
|
+
in two ways:
|
|
35
|
+
|
|
36
|
+
- as an **MCP server** for tool consumers such as `groundcrew`, Claude Code, and
|
|
37
|
+
other MCP clients
|
|
38
|
+
- as a **Python library** for applications that want to call mapping and retrieval
|
|
39
|
+
logic directly
|
|
40
|
+
|
|
41
|
+
No patient-level writes. No session state. No transport-specific business logic.
|
|
42
|
+
|
|
43
|
+
## When to use it
|
|
44
|
+
|
|
45
|
+
Use `groundworkers` when you want:
|
|
46
|
+
|
|
47
|
+
- OMOP concept lookup and hierarchy navigation
|
|
48
|
+
- exact, full-text, and embedding-based concept retrieval
|
|
49
|
+
- mapping-oriented evidence bundles and context assembly
|
|
50
|
+
- one package that works both over MCP and in-process from Python
|
|
51
|
+
|
|
52
|
+
## How it is organized
|
|
53
|
+
|
|
54
|
+
```mermaid
|
|
55
|
+
flowchart LR
|
|
56
|
+
Client1[Python app] --> App[build_application]
|
|
57
|
+
Client2[MCP client] --> Server[groundworkers server]
|
|
58
|
+
App --> Services[services/]
|
|
59
|
+
Server --> Tools[tools/]
|
|
60
|
+
Tools --> Services
|
|
61
|
+
Services --> Adapters[adapters/]
|
|
62
|
+
Adapters --> OG[omop-graph]
|
|
63
|
+
Adapters --> OE[omop-emb]
|
|
64
|
+
Adapters --> DB[(OMOP DB)]
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
- `adapters/` handle dependency-specific details
|
|
68
|
+
- `services/` handle reusable workflow logic
|
|
69
|
+
- `tools/` expose MCP-facing wrappers
|
|
70
|
+
- `app.py` and `server.py` wire those pieces together
|
|
71
|
+
|
|
72
|
+
## What it exposes
|
|
73
|
+
|
|
74
|
+
| Group | Surface | Notes |
|
|
75
|
+
|---|---|---|
|
|
76
|
+
| Concept | `concept_get`, `concept_by_code`, `concept_ancestors`, `concept_descendants`, `concept_relationships`, `concept_equivalency_path`, `concept_path`, `concept_map_to_standard`, `concept_neighbors` | Backed by `OmopGraphAdapter` |
|
|
77
|
+
| Resolver | `concept_ground` | Best-answer grounding pipeline |
|
|
78
|
+
| Search | `concept_search_exact`, `concept_search_fulltext`, `concept_navigate_to_standard` | Low-level lexical primitives |
|
|
79
|
+
| Mapping | `concept_search_normalized`, `concept_candidate_bundle`, `concept_parent_backoff`, `concept_mapping_context`, `concept_map_to_value`, `concept_resolve_mapping_expression`, `mapping_evaluate_candidates` | High-level mapping workflows |
|
|
80
|
+
| Embedding | `embedding_index_status`, `embedding_neighbours`, `embedding_search`, `embedding_encode` | Backed by `OmopEmbAdapter` |
|
|
81
|
+
| System | `system_status`, `system_vocabulary_catalogue` | Always registered |
|
|
82
|
+
|
|
83
|
+
## Quick start
|
|
84
|
+
|
|
85
|
+
### MCP server
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
uv venv
|
|
89
|
+
uv sync --extra dev --extra embedding-tools
|
|
90
|
+
uv run groundworkers --config config/groundworkers.example.yaml --describe
|
|
91
|
+
uv run groundworkers --config config/groundworkers.example.yaml
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
### Direct Python use
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
from groundworkers.app import build_application
|
|
98
|
+
from groundworkers.config import AppConfig
|
|
99
|
+
|
|
100
|
+
config = AppConfig.model_validate(
|
|
101
|
+
{
|
|
102
|
+
"omop_graph": {
|
|
103
|
+
"db_url": "postgresql+psycopg://user:pass@localhost:5432/omop",
|
|
104
|
+
"vocab_schema": "omop_vocab",
|
|
105
|
+
},
|
|
106
|
+
"omop_emb": {
|
|
107
|
+
"enabled": True,
|
|
108
|
+
"backend_type": "pgvector",
|
|
109
|
+
"db_url": "postgresql+psycopg://user:pass@localhost:5432/omop",
|
|
110
|
+
"default_model_name": "qwen3-embedding:0.6b",
|
|
111
|
+
"api_base": "http://localhost:11434/v1",
|
|
112
|
+
"api_key": "ollama",
|
|
113
|
+
},
|
|
114
|
+
}
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
app = build_application(config)
|
|
118
|
+
mapping = app.services.mapping
|
|
119
|
+
assert mapping is not None
|
|
120
|
+
|
|
121
|
+
bundle = mapping.concept_candidate_bundle(
|
|
122
|
+
"type 2 diabetes",
|
|
123
|
+
domain="Condition",
|
|
124
|
+
include_normalized=True,
|
|
125
|
+
include_fulltext=True,
|
|
126
|
+
include_embedding=True,
|
|
127
|
+
)
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
## Example config
|
|
131
|
+
|
|
132
|
+
```yaml
|
|
133
|
+
omop_graph:
|
|
134
|
+
db_url: "postgresql+psycopg://user:pass@localhost:5432/omop"
|
|
135
|
+
vocab_schema: omop_vocab
|
|
136
|
+
|
|
137
|
+
omop_emb:
|
|
138
|
+
enabled: true
|
|
139
|
+
backend_type: pgvector
|
|
140
|
+
db_url: "postgresql+psycopg://user:pass@localhost:5432/omop"
|
|
141
|
+
default_model_name: qwen3-embedding:0.6b
|
|
142
|
+
api_base: "http://localhost:11434/v1"
|
|
143
|
+
api_key: "ollama"
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## End-to-end examples
|
|
147
|
+
|
|
148
|
+
### MCP consumer flow
|
|
149
|
+
|
|
150
|
+
```mermaid
|
|
151
|
+
sequenceDiagram
|
|
152
|
+
participant C as MCP consumer
|
|
153
|
+
participant GW as groundworkers
|
|
154
|
+
participant M as MappingService
|
|
155
|
+
participant D as OMOP dependencies
|
|
156
|
+
|
|
157
|
+
C->>GW: call tool concept_candidate_bundle
|
|
158
|
+
GW->>M: invoke tool wrapper
|
|
159
|
+
M->>D: gather lexical, graph, and embedding evidence
|
|
160
|
+
D-->>M: candidate evidence
|
|
161
|
+
M-->>GW: assembled bundle
|
|
162
|
+
GW-->>C: MCP-safe JSON result
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
Representative tool payload:
|
|
166
|
+
|
|
167
|
+
```json
|
|
168
|
+
{
|
|
169
|
+
"tool": "concept_candidate_bundle",
|
|
170
|
+
"arguments": {
|
|
171
|
+
"query": "type 2 diabetes",
|
|
172
|
+
"domain": "Condition",
|
|
173
|
+
"include_normalized": true,
|
|
174
|
+
"include_fulltext": true,
|
|
175
|
+
"include_embedding": true,
|
|
176
|
+
"include_standard_mappings": true
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
### Direct Python flow
|
|
182
|
+
|
|
183
|
+
```mermaid
|
|
184
|
+
sequenceDiagram
|
|
185
|
+
participant App as Python application
|
|
186
|
+
participant S as MappingService
|
|
187
|
+
participant A as Adapters
|
|
188
|
+
participant D as OMOP dependencies
|
|
189
|
+
|
|
190
|
+
App->>S: concept_mapping_context(...)
|
|
191
|
+
S->>A: coordinate graph / vocab / emb calls
|
|
192
|
+
A->>D: execute dependency queries
|
|
193
|
+
D-->>A: raw results
|
|
194
|
+
A-->>S: adapter-shaped results
|
|
195
|
+
S-->>App: domain result
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
## If you are using it as a library
|
|
199
|
+
|
|
200
|
+
Start with `build_application(config)` and `app.services.mapping` for higher-level
|
|
201
|
+
mapping workflows. Drop down to `app.adapters.*` when you want lower-level,
|
|
202
|
+
dependency-shaped operations.
|
|
203
|
+
|
|
204
|
+
## Companion repos
|
|
205
|
+
|
|
206
|
+
- [groundcrew](https://github.com/AustralianCancerDataNetwork/groundcrew) for MCP-based orchestration
|
|
207
|
+
- [omop-graph](https://australiancancerdatanetwork.github.io/omop-graph/) for OMOP concept and hierarchy queries
|
|
208
|
+
- [omop-emb](https://australiancancerdatanetwork.github.io/omop-emb/) for embedding index and semantic retrieval
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
# groundworkers
|
|
2
|
+
|
|
3
|
+
`groundworkers` is a read-only OMOP vocabulary integration package. You can use it
|
|
4
|
+
in two ways:
|
|
5
|
+
|
|
6
|
+
- as an **MCP server** for tool consumers such as `groundcrew`, Claude Code, and
|
|
7
|
+
other MCP clients
|
|
8
|
+
- as a **Python library** for applications that want to call mapping and retrieval
|
|
9
|
+
logic directly
|
|
10
|
+
|
|
11
|
+
No patient-level writes. No session state. No transport-specific business logic.
|
|
12
|
+
|
|
13
|
+
## When to use it
|
|
14
|
+
|
|
15
|
+
Use `groundworkers` when you want:
|
|
16
|
+
|
|
17
|
+
- OMOP concept lookup and hierarchy navigation
|
|
18
|
+
- exact, full-text, and embedding-based concept retrieval
|
|
19
|
+
- mapping-oriented evidence bundles and context assembly
|
|
20
|
+
- one package that works both over MCP and in-process from Python
|
|
21
|
+
|
|
22
|
+
## How it is organized
|
|
23
|
+
|
|
24
|
+
```mermaid
|
|
25
|
+
flowchart LR
|
|
26
|
+
Client1[Python app] --> App[build_application]
|
|
27
|
+
Client2[MCP client] --> Server[groundworkers server]
|
|
28
|
+
App --> Services[services/]
|
|
29
|
+
Server --> Tools[tools/]
|
|
30
|
+
Tools --> Services
|
|
31
|
+
Services --> Adapters[adapters/]
|
|
32
|
+
Adapters --> OG[omop-graph]
|
|
33
|
+
Adapters --> OE[omop-emb]
|
|
34
|
+
Adapters --> DB[(OMOP DB)]
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
- `adapters/` handle dependency-specific details
|
|
38
|
+
- `services/` handle reusable workflow logic
|
|
39
|
+
- `tools/` expose MCP-facing wrappers
|
|
40
|
+
- `app.py` and `server.py` wire those pieces together
|
|
41
|
+
|
|
42
|
+
## What it exposes
|
|
43
|
+
|
|
44
|
+
| Group | Surface | Notes |
|
|
45
|
+
|---|---|---|
|
|
46
|
+
| Concept | `concept_get`, `concept_by_code`, `concept_ancestors`, `concept_descendants`, `concept_relationships`, `concept_equivalency_path`, `concept_path`, `concept_map_to_standard`, `concept_neighbors` | Backed by `OmopGraphAdapter` |
|
|
47
|
+
| Resolver | `concept_ground` | Best-answer grounding pipeline |
|
|
48
|
+
| Search | `concept_search_exact`, `concept_search_fulltext`, `concept_navigate_to_standard` | Low-level lexical primitives |
|
|
49
|
+
| Mapping | `concept_search_normalized`, `concept_candidate_bundle`, `concept_parent_backoff`, `concept_mapping_context`, `concept_map_to_value`, `concept_resolve_mapping_expression`, `mapping_evaluate_candidates` | High-level mapping workflows |
|
|
50
|
+
| Embedding | `embedding_index_status`, `embedding_neighbours`, `embedding_search`, `embedding_encode` | Backed by `OmopEmbAdapter` |
|
|
51
|
+
| System | `system_status`, `system_vocabulary_catalogue` | Always registered |
|
|
52
|
+
|
|
53
|
+
## Quick start
|
|
54
|
+
|
|
55
|
+
### MCP server
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
uv venv
|
|
59
|
+
uv sync --extra dev --extra embedding-tools
|
|
60
|
+
uv run groundworkers --config config/groundworkers.example.yaml --describe
|
|
61
|
+
uv run groundworkers --config config/groundworkers.example.yaml
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
### Direct Python use
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from groundworkers.app import build_application
|
|
68
|
+
from groundworkers.config import AppConfig
|
|
69
|
+
|
|
70
|
+
config = AppConfig.model_validate(
|
|
71
|
+
{
|
|
72
|
+
"omop_graph": {
|
|
73
|
+
"db_url": "postgresql+psycopg://user:pass@localhost:5432/omop",
|
|
74
|
+
"vocab_schema": "omop_vocab",
|
|
75
|
+
},
|
|
76
|
+
"omop_emb": {
|
|
77
|
+
"enabled": True,
|
|
78
|
+
"backend_type": "pgvector",
|
|
79
|
+
"db_url": "postgresql+psycopg://user:pass@localhost:5432/omop",
|
|
80
|
+
"default_model_name": "qwen3-embedding:0.6b",
|
|
81
|
+
"api_base": "http://localhost:11434/v1",
|
|
82
|
+
"api_key": "ollama",
|
|
83
|
+
},
|
|
84
|
+
}
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
app = build_application(config)
|
|
88
|
+
mapping = app.services.mapping
|
|
89
|
+
assert mapping is not None
|
|
90
|
+
|
|
91
|
+
bundle = mapping.concept_candidate_bundle(
|
|
92
|
+
"type 2 diabetes",
|
|
93
|
+
domain="Condition",
|
|
94
|
+
include_normalized=True,
|
|
95
|
+
include_fulltext=True,
|
|
96
|
+
include_embedding=True,
|
|
97
|
+
)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## Example config
|
|
101
|
+
|
|
102
|
+
```yaml
|
|
103
|
+
omop_graph:
|
|
104
|
+
db_url: "postgresql+psycopg://user:pass@localhost:5432/omop"
|
|
105
|
+
vocab_schema: omop_vocab
|
|
106
|
+
|
|
107
|
+
omop_emb:
|
|
108
|
+
enabled: true
|
|
109
|
+
backend_type: pgvector
|
|
110
|
+
db_url: "postgresql+psycopg://user:pass@localhost:5432/omop"
|
|
111
|
+
default_model_name: qwen3-embedding:0.6b
|
|
112
|
+
api_base: "http://localhost:11434/v1"
|
|
113
|
+
api_key: "ollama"
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## End-to-end examples
|
|
117
|
+
|
|
118
|
+
### MCP consumer flow
|
|
119
|
+
|
|
120
|
+
```mermaid
|
|
121
|
+
sequenceDiagram
|
|
122
|
+
participant C as MCP consumer
|
|
123
|
+
participant GW as groundworkers
|
|
124
|
+
participant M as MappingService
|
|
125
|
+
participant D as OMOP dependencies
|
|
126
|
+
|
|
127
|
+
C->>GW: call tool concept_candidate_bundle
|
|
128
|
+
GW->>M: invoke tool wrapper
|
|
129
|
+
M->>D: gather lexical, graph, and embedding evidence
|
|
130
|
+
D-->>M: candidate evidence
|
|
131
|
+
M-->>GW: assembled bundle
|
|
132
|
+
GW-->>C: MCP-safe JSON result
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
Representative tool payload:
|
|
136
|
+
|
|
137
|
+
```json
|
|
138
|
+
{
|
|
139
|
+
"tool": "concept_candidate_bundle",
|
|
140
|
+
"arguments": {
|
|
141
|
+
"query": "type 2 diabetes",
|
|
142
|
+
"domain": "Condition",
|
|
143
|
+
"include_normalized": true,
|
|
144
|
+
"include_fulltext": true,
|
|
145
|
+
"include_embedding": true,
|
|
146
|
+
"include_standard_mappings": true
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
### Direct Python flow
|
|
152
|
+
|
|
153
|
+
```mermaid
|
|
154
|
+
sequenceDiagram
|
|
155
|
+
participant App as Python application
|
|
156
|
+
participant S as MappingService
|
|
157
|
+
participant A as Adapters
|
|
158
|
+
participant D as OMOP dependencies
|
|
159
|
+
|
|
160
|
+
App->>S: concept_mapping_context(...)
|
|
161
|
+
S->>A: coordinate graph / vocab / emb calls
|
|
162
|
+
A->>D: execute dependency queries
|
|
163
|
+
D-->>A: raw results
|
|
164
|
+
A-->>S: adapter-shaped results
|
|
165
|
+
S-->>App: domain result
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
## If you are using it as a library
|
|
169
|
+
|
|
170
|
+
Start with `build_application(config)` and `app.services.mapping` for higher-level
|
|
171
|
+
mapping workflows. Drop down to `app.adapters.*` when you want lower-level,
|
|
172
|
+
dependency-shaped operations.
|
|
173
|
+
|
|
174
|
+
## Companion repos
|
|
175
|
+
|
|
176
|
+
- [groundcrew](https://github.com/AustralianCancerDataNetwork/groundcrew) for MCP-based orchestration
|
|
177
|
+
- [omop-graph](https://australiancancerdatanetwork.github.io/omop-graph/) for OMOP concept and hierarchy queries
|
|
178
|
+
- [omop-emb](https://australiancancerdatanetwork.github.io/omop-emb/) for embedding index and semantic retrieval
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "groundworkers"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.2.0"
|
|
4
4
|
description = "Groundworkers MCP server — read-only agentive access to OMOP vocabularies, concept graphs, and embeddings."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.12"
|
|
@@ -22,7 +22,17 @@ embedding-faiss = [
|
|
|
22
22
|
"omop-emb[faiss-cpu]>=1.0.0",
|
|
23
23
|
]
|
|
24
24
|
dev = [
|
|
25
|
-
"
|
|
25
|
+
"ipython>=8.0",
|
|
26
|
+
"tornado>=6.5.5",
|
|
27
|
+
"pytest>=9.0.3",
|
|
28
|
+
"pytest-cov>=4.0",
|
|
29
|
+
"mypy>=1.8",
|
|
30
|
+
"ruff>=0.4",
|
|
31
|
+
"mkdocs-material>=9.7.1",
|
|
32
|
+
"mkdocstrings-python>=2.0.1",
|
|
33
|
+
"mkdocs>=1.6.1",
|
|
34
|
+
"requests>=2.33.0",
|
|
35
|
+
"mkdocs-mermaid2-plugin"
|
|
26
36
|
]
|
|
27
37
|
|
|
28
38
|
[project.scripts]
|
|
@@ -4,7 +4,15 @@ from collections.abc import Callable
|
|
|
4
4
|
from typing import Any
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
|
-
from
|
|
7
|
+
from sqlalchemy.engine import Engine
|
|
8
|
+
|
|
9
|
+
from omop_emb import (
|
|
10
|
+
EmbeddingBackend,
|
|
11
|
+
EmbeddingClient,
|
|
12
|
+
EmbeddingConceptFilter,
|
|
13
|
+
EmbeddingModelRecord,
|
|
14
|
+
EmbeddingReaderInterface,
|
|
15
|
+
)
|
|
8
16
|
from omop_emb.config import MetricType
|
|
9
17
|
from omop_emb.embeddings.embedding_client import EmbeddingRole
|
|
10
18
|
from omop_emb.interface import list_registered_models
|
|
@@ -16,11 +24,11 @@ class OmopEmbAdapter:
|
|
|
16
24
|
def __init__(
|
|
17
25
|
self,
|
|
18
26
|
*,
|
|
19
|
-
backend_factory: Callable[[],
|
|
27
|
+
backend_factory: Callable[[], EmbeddingBackend],
|
|
20
28
|
backend_type: str | None,
|
|
21
29
|
default_model_name: str | None = None,
|
|
22
|
-
client_factory: Callable[[str],
|
|
23
|
-
cdm_engine:
|
|
30
|
+
client_factory: Callable[[str], EmbeddingClient] | None = None,
|
|
31
|
+
cdm_engine: Engine | None = None,
|
|
24
32
|
faiss_cache_dir: str | None = None,
|
|
25
33
|
) -> None:
|
|
26
34
|
self._backend_factory = backend_factory
|
|
@@ -29,8 +37,8 @@ class OmopEmbAdapter:
|
|
|
29
37
|
self._client_factory = client_factory
|
|
30
38
|
self._cdm_engine = cdm_engine
|
|
31
39
|
self._faiss_cache_dir = faiss_cache_dir
|
|
32
|
-
self._backend:
|
|
33
|
-
self._clients: dict[str,
|
|
40
|
+
self._backend: EmbeddingBackend | None = None
|
|
41
|
+
self._clients: dict[str, EmbeddingClient] = {}
|
|
34
42
|
|
|
35
43
|
def is_available(self) -> bool:
|
|
36
44
|
return self.index_status()["available"]
|
|
@@ -164,7 +172,7 @@ class OmopEmbAdapter:
|
|
|
164
172
|
"vector": row.tolist(),
|
|
165
173
|
}
|
|
166
174
|
|
|
167
|
-
def _get_backend(self) ->
|
|
175
|
+
def _get_backend(self) -> EmbeddingBackend:
|
|
168
176
|
if self._backend is None:
|
|
169
177
|
try:
|
|
170
178
|
self._backend = self._backend_factory()
|
|
@@ -172,7 +180,7 @@ class OmopEmbAdapter:
|
|
|
172
180
|
raise GroundworkersError("BACKEND_UNAVAIL", f"Embedding backend is unavailable: {exc}") from exc
|
|
173
181
|
return self._backend
|
|
174
182
|
|
|
175
|
-
def _resolve_model_record(self, model_name: str | None) ->
|
|
183
|
+
def _resolve_model_record(self, model_name: str | None) -> EmbeddingModelRecord:
|
|
176
184
|
backend = self._get_backend()
|
|
177
185
|
requested_name = model_name or self._default_model_name
|
|
178
186
|
records = list_registered_models(backend=backend, model_name=requested_name)
|
|
@@ -190,7 +198,7 @@ class OmopEmbAdapter:
|
|
|
190
198
|
"No default embedding model is configured and multiple registered models are available",
|
|
191
199
|
)
|
|
192
200
|
|
|
193
|
-
def _build_reader(self, record:
|
|
201
|
+
def _build_reader(self, record: EmbeddingModelRecord) -> EmbeddingReaderInterface:
|
|
194
202
|
return EmbeddingReaderInterface(
|
|
195
203
|
model=record.model_name,
|
|
196
204
|
backend=self._get_backend(),
|
|
@@ -200,7 +208,7 @@ class OmopEmbAdapter:
|
|
|
200
208
|
faiss_cache_dir=self._faiss_cache_dir,
|
|
201
209
|
)
|
|
202
210
|
|
|
203
|
-
def _get_client(self, model_name: str) ->
|
|
211
|
+
def _get_client(self, model_name: str) -> EmbeddingClient:
|
|
204
212
|
if self._client_factory is None:
|
|
205
213
|
raise GroundworkersError("BACKEND_UNAVAIL", "embedding client is not configured")
|
|
206
214
|
client = self._clients.get(model_name)
|
|
@@ -231,7 +239,7 @@ class OmopEmbAdapter:
|
|
|
231
239
|
limit=limit,
|
|
232
240
|
)
|
|
233
241
|
|
|
234
|
-
def _backend_type_from_backend(self, backend:
|
|
242
|
+
def _backend_type_from_backend(self, backend: EmbeddingBackend) -> str | None:
|
|
235
243
|
backend_type = getattr(backend, "backend_type", None)
|
|
236
244
|
return self._enum_value(backend_type)
|
|
237
245
|
|
|
@@ -7,7 +7,7 @@ from typing import Any
|
|
|
7
7
|
|
|
8
8
|
from omop_graph.extensions.omop_alchemy import PredicateKind
|
|
9
9
|
from omop_graph.graph.constraints import SearchConstraintConcept
|
|
10
|
-
from omop_graph.graph.kg import KnowledgeGraph
|
|
10
|
+
from omop_graph.graph.kg import KnowledgeGraph, KnowledgeGraphEmbeddingConfiguration
|
|
11
11
|
from omop_graph.graph.paths import find_shortest_paths_batch
|
|
12
12
|
from omop_graph.graph.traverse import traverse
|
|
13
13
|
from omop_graph.reasoning.grounding import GroundingConstraints, ground_term
|
|
@@ -32,6 +32,8 @@ from sqlalchemy import func, select, text
|
|
|
32
32
|
from sqlalchemy.engine import Engine
|
|
33
33
|
from sqlalchemy.exc import NoResultFound
|
|
34
34
|
|
|
35
|
+
from omop_emb import EmbeddingClient
|
|
36
|
+
|
|
35
37
|
from groundworkers.base.errors import GroundworkersError
|
|
36
38
|
|
|
37
39
|
# TODO: some of this adapter logic really should be pushed back into
|
|
@@ -45,12 +47,28 @@ class OmopGraphAdapter:
|
|
|
45
47
|
*,
|
|
46
48
|
vocab_schema: str = "omop_vocab",
|
|
47
49
|
emb_model_name: str | None = None,
|
|
50
|
+
embedding_client: EmbeddingClient | None = None,
|
|
51
|
+
min_fulltext_overlap: float = 0.0,
|
|
48
52
|
) -> None:
|
|
49
53
|
self.engine = engine
|
|
50
54
|
self.vocab_schema = vocab_schema
|
|
51
55
|
self.emb_model_name = emb_model_name
|
|
56
|
+
self._embedding_client: EmbeddingClient | None = embedding_client
|
|
57
|
+
self.min_fulltext_overlap = min_fulltext_overlap
|
|
52
58
|
self._kg: KnowledgeGraph | None = None
|
|
53
59
|
|
|
60
|
+
def set_embedding_client(self, client: EmbeddingClient, model_name: str | None = None) -> None:
|
|
61
|
+
"""Inject an EmbeddingClient so concept_ground can encode query strings on-the-fly.
|
|
62
|
+
|
|
63
|
+
Call this after construction (e.g. once the omop_emb adapter has resolved
|
|
64
|
+
the default model from the registry). The embedding is computed before
|
|
65
|
+
ground_term is called and passed as the query_embedding argument — the KG
|
|
66
|
+
itself does not need to be rebuilt.
|
|
67
|
+
"""
|
|
68
|
+
self._embedding_client = client
|
|
69
|
+
if model_name is not None:
|
|
70
|
+
self.emb_model_name = model_name
|
|
71
|
+
|
|
54
72
|
def is_available(self) -> bool:
|
|
55
73
|
try:
|
|
56
74
|
self._get_kg()
|
|
@@ -157,22 +175,38 @@ class OmopGraphAdapter:
|
|
|
157
175
|
(ExactLabelResolver(), ExactSynonymResolver()),
|
|
158
176
|
(FullTextResolver(), FullTextSynonymResolver()),
|
|
159
177
|
]
|
|
160
|
-
if self.emb_model_name:
|
|
178
|
+
if self.emb_model_name or self._embedding_client is not None:
|
|
161
179
|
tiers.append((EmbeddingResolver(),))
|
|
162
180
|
tiers.append((PartialLabelResolver(), PartialSynonymResolver()))
|
|
163
181
|
|
|
164
182
|
results: list[Any] = []
|
|
165
183
|
for tier in tiers:
|
|
184
|
+
is_fts_tier = any(
|
|
185
|
+
isinstance(r, (FullTextResolver, FullTextSynonymResolver)) for r in tier
|
|
186
|
+
)
|
|
166
187
|
pipeline = ResolverPipeline(resolvers=tier)
|
|
167
188
|
try:
|
|
168
|
-
|
|
189
|
+
raw = ground_term(
|
|
169
190
|
pipeline, kg, query,
|
|
170
|
-
query_embedding=None,
|
|
191
|
+
query_embedding=None, # KG computes this via its emb_config
|
|
171
192
|
constraints=constraints,
|
|
172
193
|
max_candidates=limit,
|
|
173
194
|
)
|
|
174
195
|
except Exception as exc:
|
|
175
196
|
raise self._wrap_graph_error(exc, default_code="QUERY_ERROR")
|
|
197
|
+
# Apply minimum token-overlap filter to FTS results: if fewer than
|
|
198
|
+
# min_fulltext_overlap of the query tokens appear in the matched
|
|
199
|
+
# concept name, drop the hit and fall through to a better tier.
|
|
200
|
+
if raw and is_fts_tier and self.min_fulltext_overlap > 0.0:
|
|
201
|
+
query_tokens = set(query.lower().split())
|
|
202
|
+
filtered = [
|
|
203
|
+
r for r in raw
|
|
204
|
+
if self._fts_overlap(query_tokens, r.matched_concept_label or "")
|
|
205
|
+
>= self.min_fulltext_overlap
|
|
206
|
+
]
|
|
207
|
+
results = filtered
|
|
208
|
+
else:
|
|
209
|
+
results = list(raw)
|
|
176
210
|
if results:
|
|
177
211
|
break
|
|
178
212
|
|
|
@@ -535,6 +569,20 @@ class OmopGraphAdapter:
|
|
|
535
569
|
"valid": edge.invalid_reason is None, # type: ignore[attr-defined]
|
|
536
570
|
}
|
|
537
571
|
|
|
572
|
+
@staticmethod
|
|
573
|
+
def _fts_overlap(query_tokens: set[str], concept_label: str) -> float:
|
|
574
|
+
"""Return the proportion of query tokens that appear in *concept_label*.
|
|
575
|
+
|
|
576
|
+
Both sides are lowercased and split on whitespace. A value of 1.0
|
|
577
|
+
means every query token was found; 0.0 means none were found.
|
|
578
|
+
Used to filter noisy fulltext results before falling through to the
|
|
579
|
+
embedding tier.
|
|
580
|
+
"""
|
|
581
|
+
if not query_tokens:
|
|
582
|
+
return 1.0
|
|
583
|
+
label_tokens = set(concept_label.lower().split())
|
|
584
|
+
return len(query_tokens & label_tokens) / len(query_tokens)
|
|
585
|
+
|
|
538
586
|
@staticmethod
|
|
539
587
|
def _label_match_kind_name(match_kind: object) -> str:
|
|
540
588
|
_MAP = {0: "EXACT", 1: "FULLTEXT", 2: "PARTIAL", 3: "EMBEDDING_NEAREST"}
|
|
@@ -588,7 +636,18 @@ class OmopGraphAdapter:
|
|
|
588
636
|
raise GroundworkersError("DB_UNAVAILABLE", f"Cannot connect to database: {exc}") from exc
|
|
589
637
|
|
|
590
638
|
try:
|
|
591
|
-
|
|
639
|
+
emb_config: KnowledgeGraphEmbeddingConfiguration | None = None
|
|
640
|
+
if self._embedding_client is not None:
|
|
641
|
+
try:
|
|
642
|
+
from omop_emb.config import MetricType
|
|
643
|
+
emb_config = KnowledgeGraphEmbeddingConfiguration(
|
|
644
|
+
metric_type=MetricType.COSINE,
|
|
645
|
+
model_name=self.emb_model_name,
|
|
646
|
+
client=self._embedding_client,
|
|
647
|
+
)
|
|
648
|
+
except Exception:
|
|
649
|
+
emb_config = None # Non-fatal: grounding falls back to non-embedding tiers
|
|
650
|
+
self._kg = KnowledgeGraph(cdm_engine=self.engine, emb_config=emb_config)
|
|
592
651
|
except Exception as exc:
|
|
593
652
|
raise self._wrap_graph_error(exc, default_code="BACKEND_UNAVAIL")
|
|
594
653
|
return self._kg
|
|
@@ -597,11 +656,11 @@ class OmopGraphAdapter:
|
|
|
597
656
|
# These are consistent across all Athena vocabulary releases (concept_ids may differ
|
|
598
657
|
# between instances, but concept_codes are stable).
|
|
599
658
|
_DOMAIN_ROOT_CODES: dict[str, tuple[str, str]] = {
|
|
600
|
-
"
|
|
601
|
-
"
|
|
602
|
-
"
|
|
603
|
-
"
|
|
604
|
-
"
|
|
659
|
+
"Condition": ("SNOMED", "404684003"), # Clinical finding
|
|
660
|
+
"Procedure": ("SNOMED", "71388002"), # Procedure
|
|
661
|
+
"Drug": ("SNOMED", "373873005"), # Pharmaceutical / biologic product
|
|
662
|
+
"Measurement": ("SNOMED", "363787002"), # Observable entity
|
|
663
|
+
"Device": ("SNOMED", "260787004"), # Physical object
|
|
605
664
|
}
|
|
606
665
|
|
|
607
666
|
def _get_domain_root_ids(self, domain: str | None) -> tuple[int, ...]:
|