groundworkers 0.2.0__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- groundworkers-0.3.2/PKG-INFO +168 -0
- groundworkers-0.3.2/README.md +122 -0
- {groundworkers-0.2.0 → groundworkers-0.3.2}/pyproject.toml +29 -5
- groundworkers-0.3.2/src/groundworkers/adapters/cdm.py +43 -0
- groundworkers-0.3.2/src/groundworkers/adapters/llm.py +177 -0
- {groundworkers-0.2.0 → groundworkers-0.3.2}/src/groundworkers/adapters/omop_emb.py +52 -7
- groundworkers-0.3.2/src/groundworkers/adapters/omop_graph.py +508 -0
- groundworkers-0.3.2/src/groundworkers/app.py +211 -0
- {groundworkers-0.2.0 → groundworkers-0.3.2}/src/groundworkers/base/__init__.py +0 -3
- {groundworkers-0.2.0 → groundworkers-0.3.2}/src/groundworkers/base/errors.py +3 -0
- groundworkers-0.3.2/src/groundworkers/base/server.py +125 -0
- groundworkers-0.3.2/src/groundworkers/bootstrap.py +120 -0
- groundworkers-0.3.2/src/groundworkers/config.py +276 -0
- groundworkers-0.3.2/src/groundworkers/server.py +133 -0
- groundworkers-0.3.2/src/groundworkers/services/__init__.py +9 -0
- groundworkers-0.3.2/src/groundworkers/services/domain.py +116 -0
- groundworkers-0.3.2/src/groundworkers/services/graph.py +414 -0
- groundworkers-0.3.2/src/groundworkers/services/grounding.py +118 -0
- groundworkers-0.3.2/src/groundworkers/services/knowledge/__init__.py +15 -0
- groundworkers-0.3.2/src/groundworkers/services/knowledge/catalogue.py +172 -0
- groundworkers-0.3.2/src/groundworkers/services/knowledge/models.py +100 -0
- {groundworkers-0.2.0 → groundworkers-0.3.2}/src/groundworkers/services/mapping.py +74 -37
- groundworkers-0.3.2/src/groundworkers/services/source_planning/__init__.py +79 -0
- groundworkers-0.3.2/src/groundworkers/services/source_planning/assisted.py +202 -0
- groundworkers-0.3.2/src/groundworkers/services/source_planning/canonical_headers.py +153 -0
- groundworkers-0.3.2/src/groundworkers/services/source_planning/classifier.py +413 -0
- groundworkers-0.3.2/src/groundworkers/services/source_planning/decomposer.py +67 -0
- groundworkers-0.3.2/src/groundworkers/services/source_planning/decomposers/__init__.py +1 -0
- groundworkers-0.3.2/src/groundworkers/services/source_planning/decomposers/csv_.py +61 -0
- groundworkers-0.3.2/src/groundworkers/services/source_planning/decomposers/ddl_.py +217 -0
- groundworkers-0.3.2/src/groundworkers/services/source_planning/decomposers/docx_.py +149 -0
- groundworkers-0.3.2/src/groundworkers/services/source_planning/decomposers/json_.py +172 -0
- groundworkers-0.3.2/src/groundworkers/services/source_planning/decomposers/pdf_.py +113 -0
- groundworkers-0.3.2/src/groundworkers/services/source_planning/decomposers/xlsx_.py +112 -0
- groundworkers-0.3.2/src/groundworkers/services/source_planning/decomposers/xml_.py +181 -0
- groundworkers-0.3.2/src/groundworkers/services/source_planning/detector.py +85 -0
- groundworkers-0.3.2/src/groundworkers/services/source_planning/models.py +308 -0
- groundworkers-0.3.2/src/groundworkers/services/source_planning/normalisation.py +264 -0
- groundworkers-0.3.2/src/groundworkers/services/source_planning/provenance.py +29 -0
- groundworkers-0.3.2/src/groundworkers/services/source_planning/router.py +194 -0
- groundworkers-0.3.2/src/groundworkers/services/source_planning/serialisation.py +59 -0
- groundworkers-0.3.2/src/groundworkers/services/source_planning/service.py +394 -0
- groundworkers-0.3.2/src/groundworkers/services/source_planning/source_profiles/__init__.py +11 -0
- groundworkers-0.3.2/src/groundworkers/services/source_planning/source_profiles/base.py +51 -0
- groundworkers-0.3.2/src/groundworkers/services/source_planning/source_profiles/redcap.py +41 -0
- groundworkers-0.3.2/src/groundworkers/services/source_planning/source_profiles/registry.py +40 -0
- groundworkers-0.3.2/src/groundworkers/services/source_planning/warnings.py +29 -0
- groundworkers-0.3.2/src/groundworkers/services/text/__init__.py +29 -0
- groundworkers-0.3.2/src/groundworkers/services/text/models.py +54 -0
- groundworkers-0.3.2/src/groundworkers/services/text/prompts.py +80 -0
- groundworkers-0.3.2/src/groundworkers/services/text/service.py +112 -0
- groundworkers-0.2.0/src/groundworkers/adapters/omop_vocab.py → groundworkers-0.3.2/src/groundworkers/services/vocab.py +131 -218
- {groundworkers-0.2.0 → groundworkers-0.3.2}/src/groundworkers/tools/concept_tools.py +11 -11
- groundworkers-0.3.2/src/groundworkers/tools/domain_tools.py +45 -0
- {groundworkers-0.2.0 → groundworkers-0.3.2}/src/groundworkers/tools/embedding_tools.py +18 -0
- groundworkers-0.3.2/src/groundworkers/tools/knowledge_tools.py +187 -0
- {groundworkers-0.2.0 → groundworkers-0.3.2}/src/groundworkers/tools/mapping_tools.py +48 -22
- {groundworkers-0.2.0 → groundworkers-0.3.2}/src/groundworkers/tools/resolver_tools.py +50 -6
- {groundworkers-0.2.0 → groundworkers-0.3.2}/src/groundworkers/tools/search_tools.py +21 -46
- groundworkers-0.3.2/src/groundworkers/tools/source_planning_tools.py +141 -0
- groundworkers-0.3.2/src/groundworkers/tools/system_tools.py +164 -0
- groundworkers-0.3.2/src/groundworkers/tools/text_tools.py +209 -0
- groundworkers-0.3.2/src/groundworkers/transports/__init__.py +1 -0
- groundworkers-0.3.2/src/groundworkers/transports/rest/__init__.py +5 -0
- groundworkers-0.3.2/src/groundworkers/transports/rest/api.py +135 -0
- groundworkers-0.3.2/src/groundworkers/transports/rest/models.py +95 -0
- groundworkers-0.3.2/src/groundworkers.egg-info/PKG-INFO +168 -0
- groundworkers-0.3.2/src/groundworkers.egg-info/SOURCES.txt +78 -0
- groundworkers-0.3.2/src/groundworkers.egg-info/entry_points.txt +5 -0
- groundworkers-0.3.2/src/groundworkers.egg-info/requires.txt +47 -0
- groundworkers-0.3.2/tests/test_bootstrap_config.py +151 -0
- groundworkers-0.3.2/tests/test_rest_api.py +210 -0
- groundworkers-0.3.2/tests/test_server_registry.py +419 -0
- groundworkers-0.2.0/PKG-INFO +0 -208
- groundworkers-0.2.0/README.md +0 -178
- groundworkers-0.2.0/src/groundworkers/adapters/omop_graph.py +0 -780
- groundworkers-0.2.0/src/groundworkers/app.py +0 -122
- groundworkers-0.2.0/src/groundworkers/base/server.py +0 -52
- groundworkers-0.2.0/src/groundworkers/base/sql.py +0 -109
- groundworkers-0.2.0/src/groundworkers/config.py +0 -153
- groundworkers-0.2.0/src/groundworkers/server.py +0 -62
- groundworkers-0.2.0/src/groundworkers/services/__init__.py +0 -3
- groundworkers-0.2.0/src/groundworkers/tools/system_tools.py +0 -67
- groundworkers-0.2.0/src/groundworkers.egg-info/PKG-INFO +0 -208
- groundworkers-0.2.0/src/groundworkers.egg-info/SOURCES.txt +0 -32
- groundworkers-0.2.0/src/groundworkers.egg-info/entry_points.txt +0 -2
- groundworkers-0.2.0/src/groundworkers.egg-info/requires.txt +0 -26
- groundworkers-0.2.0/tests/test_server_registry.py +0 -97
- groundworkers-0.2.0/tests/test_sql_resource.py +0 -61
- {groundworkers-0.2.0 → groundworkers-0.3.2}/setup.cfg +0 -0
- {groundworkers-0.2.0 → groundworkers-0.3.2}/src/groundworkers/__init__.py +0 -0
- {groundworkers-0.2.0 → groundworkers-0.3.2}/src/groundworkers/adapters/__init__.py +0 -0
- {groundworkers-0.2.0 → groundworkers-0.3.2}/src/groundworkers/base/results.py +0 -0
- {groundworkers-0.2.0 → groundworkers-0.3.2}/src/groundworkers/tools/__init__.py +0 -0
- {groundworkers-0.2.0 → groundworkers-0.3.2}/src/groundworkers.egg-info/dependency_links.txt +0 -0
- {groundworkers-0.2.0 → groundworkers-0.3.2}/src/groundworkers.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: groundworkers
|
|
3
|
+
Version: 0.3.2
|
|
4
|
+
Summary: Groundworkers MCP server — read-only agentive access to OMOP vocabularies, concept graphs, and embeddings.
|
|
5
|
+
Requires-Python: >=3.12
|
|
6
|
+
Description-Content-Type: text/markdown
|
|
7
|
+
Requires-Dist: fastapi<1.0,>=0.110
|
|
8
|
+
Requires-Dist: mcp[cli]<2,>=1
|
|
9
|
+
Requires-Dist: pydantic<3,>=2
|
|
10
|
+
Requires-Dist: pyyaml<7,>=6
|
|
11
|
+
Requires-Dist: SQLAlchemy<3,>=2
|
|
12
|
+
Requires-Dist: psycopg[binary]<4,>=3.1
|
|
13
|
+
Requires-Dist: oa-configurator>=0.1.2
|
|
14
|
+
Requires-Dist: omop-graph>=1.3.0
|
|
15
|
+
Requires-Dist: omop-emb>=1.1.1
|
|
16
|
+
Requires-Dist: uvicorn[standard]<1.0,>=0.29
|
|
17
|
+
Provides-Extra: llm
|
|
18
|
+
Requires-Dist: openai>=1.0; extra == "llm"
|
|
19
|
+
Provides-Extra: xlsx
|
|
20
|
+
Requires-Dist: openpyxl<4,>=3.1; extra == "xlsx"
|
|
21
|
+
Provides-Extra: pdf
|
|
22
|
+
Requires-Dist: pdfplumber>=0.10; extra == "pdf"
|
|
23
|
+
Provides-Extra: docx
|
|
24
|
+
Requires-Dist: python-docx>=1.0; extra == "docx"
|
|
25
|
+
Provides-Extra: all-source
|
|
26
|
+
Requires-Dist: openpyxl<4,>=3.1; extra == "all-source"
|
|
27
|
+
Requires-Dist: pdfplumber>=0.10; extra == "all-source"
|
|
28
|
+
Requires-Dist: python-docx>=1.0; extra == "all-source"
|
|
29
|
+
Provides-Extra: embedding-pgvector
|
|
30
|
+
Requires-Dist: omop-emb[pgvector]>=1.1.1; extra == "embedding-pgvector"
|
|
31
|
+
Provides-Extra: embedding-faiss
|
|
32
|
+
Requires-Dist: omop-emb[faiss-cpu]>=1.1.1; extra == "embedding-faiss"
|
|
33
|
+
Provides-Extra: dev
|
|
34
|
+
Requires-Dist: httpx<1,>=0.27; extra == "dev"
|
|
35
|
+
Requires-Dist: ipython>=8.0; extra == "dev"
|
|
36
|
+
Requires-Dist: tornado>=6.5.5; extra == "dev"
|
|
37
|
+
Requires-Dist: pytest>=9.0.3; extra == "dev"
|
|
38
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
39
|
+
Requires-Dist: mypy>=1.8; extra == "dev"
|
|
40
|
+
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
41
|
+
Requires-Dist: mkdocs-material>=9.7.1; extra == "dev"
|
|
42
|
+
Requires-Dist: mkdocstrings-python>=2.0.1; extra == "dev"
|
|
43
|
+
Requires-Dist: mkdocs>=1.6.1; extra == "dev"
|
|
44
|
+
Requires-Dist: requests>=2.33.0; extra == "dev"
|
|
45
|
+
Requires-Dist: mkdocs-mermaid2-plugin; extra == "dev"
|
|
46
|
+
|
|
47
|
+
# groundworkers
|
|
48
|
+
|
|
49
|
+
`groundworkers` is the reusable capability layer for OMOP-grounded lookup,
|
|
50
|
+
mapping, source planning, and knowledge-pack discovery.
|
|
51
|
+
|
|
52
|
+
You can use it in three ways:
|
|
53
|
+
|
|
54
|
+
- as an **MCP service** for agentic clients and tool discovery
|
|
55
|
+
- as a **REST service** for fixed workflow applications
|
|
56
|
+
- as a **direct Python library** for in-process orchestration
|
|
57
|
+
|
|
58
|
+
No patient-level writes. No session state. No transport-specific business logic.
|
|
59
|
+
|
|
60
|
+
## What it provides
|
|
61
|
+
|
|
62
|
+
- OMOP concept lookup and hierarchy navigation
|
|
63
|
+
- exact, normalized, full-text, and embedding-backed retrieval
|
|
64
|
+
- mapping-oriented candidate bundles and context assembly
|
|
65
|
+
- stateless source-planning workflows
|
|
66
|
+
- LLM-backed text normalization and domain classification
|
|
67
|
+
|
|
68
|
+
## Runtime model
|
|
69
|
+
|
|
70
|
+
```mermaid
|
|
71
|
+
flowchart TD
|
|
72
|
+
STACK[shared stack config] --> BOOT[build_app_config]
|
|
73
|
+
BOOT --> CFG[AppConfig]
|
|
74
|
+
CFG --> APP[build_application]
|
|
75
|
+
APP --> GW[GroundworkersApp]
|
|
76
|
+
GW --> SVC[services]
|
|
77
|
+
GW --> ADP[adapters]
|
|
78
|
+
MCP[MCP client] --> TOOLS[MCP tools]
|
|
79
|
+
REST[REST client] --> API[REST transport]
|
|
80
|
+
PY[Python caller] --> SVC
|
|
81
|
+
TOOLS --> SVC
|
|
82
|
+
TOOLS -. adapter-backed primitives .-> ADP
|
|
83
|
+
API --> SVC
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
`build_application(...)` is the composition root. It builds one reusable
|
|
87
|
+
runtime container with transport-agnostic services plus dependency-facing
|
|
88
|
+
adapters. Most caller-facing workflows go through services; some MCP tools are
|
|
89
|
+
intentionally adapter-backed when the capability is closer to a backend
|
|
90
|
+
primitive than a domain service.
|
|
91
|
+
|
|
92
|
+
## Quick start
|
|
93
|
+
|
|
94
|
+
### Install
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
pip install groundworkers
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Optional extras:
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
pip install "groundworkers[llm,embedding-pgvector]"
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
### Configure the shared stack
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
omop-config configure omop_alchemy
|
|
110
|
+
omop-config configure omop_graph
|
|
111
|
+
omop-config configure groundworkers
|
|
112
|
+
# optional if you want embedding-backed capabilities
|
|
113
|
+
omop-config configure omop_emb
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### Start MCP
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
groundworkers --describe
|
|
120
|
+
groundworkers --transport streamable-http --host 0.0.0.0 --port 8000
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Start REST
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
groundworkers --transport rest --host 0.0.0.0 --port 8080
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### Use from Python
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
from groundworkers.app import build_application
|
|
133
|
+
from groundworkers.bootstrap import build_app_config
|
|
134
|
+
|
|
135
|
+
config = build_app_config()
|
|
136
|
+
app = build_application(config)
|
|
137
|
+
|
|
138
|
+
mapping = app.services.mapping
|
|
139
|
+
bundle = mapping.concept_candidate_bundle(
|
|
140
|
+
"type 2 diabetes",
|
|
141
|
+
domain="Condition",
|
|
142
|
+
include_normalized=True,
|
|
143
|
+
include_fulltext=True,
|
|
144
|
+
include_embedding=True,
|
|
145
|
+
)
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
## Main surfaces
|
|
149
|
+
|
|
150
|
+
| Surface | Best for |
|
|
151
|
+
|---|---|
|
|
152
|
+
| MCP tools | Tool discovery, agent interoperability, shared capability services |
|
|
153
|
+
| REST routes | Typed HTTP workflows such as candidate bundles and assisted source planning |
|
|
154
|
+
| `app.services.*` | In-process Python applications and batch workflows |
|
|
155
|
+
| `app.adapters.*` | Backend wrappers used when you intentionally need dependency-shaped primitives |
|
|
156
|
+
|
|
157
|
+
## Learn more
|
|
158
|
+
|
|
159
|
+
- Docs home: `docs/index.md`
|
|
160
|
+
- Configuration: `docs/usage/configuration.md`
|
|
161
|
+
- Integrations: `docs/usage/integrations.md`
|
|
162
|
+
- Architecture: `docs/architecture.md`
|
|
163
|
+
|
|
164
|
+
## Companion repos
|
|
165
|
+
|
|
166
|
+
- [groundcrew](https://github.com/AustralianCancerDataNetwork/groundcrew)
|
|
167
|
+
- [omop-graph](https://australiancancerdatanetwork.github.io/omop-graph/)
|
|
168
|
+
- [omop-emb](https://australiancancerdatanetwork.github.io/omop-emb/)
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# groundworkers
|
|
2
|
+
|
|
3
|
+
`groundworkers` is the reusable capability layer for OMOP-grounded lookup,
|
|
4
|
+
mapping, source planning, and knowledge-pack discovery.
|
|
5
|
+
|
|
6
|
+
You can use it in three ways:
|
|
7
|
+
|
|
8
|
+
- as an **MCP service** for agentic clients and tool discovery
|
|
9
|
+
- as a **REST service** for fixed workflow applications
|
|
10
|
+
- as a **direct Python library** for in-process orchestration
|
|
11
|
+
|
|
12
|
+
No patient-level writes. No session state. No transport-specific business logic.
|
|
13
|
+
|
|
14
|
+
## What it provides
|
|
15
|
+
|
|
16
|
+
- OMOP concept lookup and hierarchy navigation
|
|
17
|
+
- exact, normalized, full-text, and embedding-backed retrieval
|
|
18
|
+
- mapping-oriented candidate bundles and context assembly
|
|
19
|
+
- stateless source-planning workflows
|
|
20
|
+
- LLM-backed text normalization and domain classification
|
|
21
|
+
|
|
22
|
+
## Runtime model
|
|
23
|
+
|
|
24
|
+
```mermaid
|
|
25
|
+
flowchart TD
|
|
26
|
+
STACK[shared stack config] --> BOOT[build_app_config]
|
|
27
|
+
BOOT --> CFG[AppConfig]
|
|
28
|
+
CFG --> APP[build_application]
|
|
29
|
+
APP --> GW[GroundworkersApp]
|
|
30
|
+
GW --> SVC[services]
|
|
31
|
+
GW --> ADP[adapters]
|
|
32
|
+
MCP[MCP client] --> TOOLS[MCP tools]
|
|
33
|
+
REST[REST client] --> API[REST transport]
|
|
34
|
+
PY[Python caller] --> SVC
|
|
35
|
+
TOOLS --> SVC
|
|
36
|
+
TOOLS -. adapter-backed primitives .-> ADP
|
|
37
|
+
API --> SVC
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
`build_application(...)` is the composition root. It builds one reusable
|
|
41
|
+
runtime container with transport-agnostic services plus dependency-facing
|
|
42
|
+
adapters. Most caller-facing workflows go through services; some MCP tools are
|
|
43
|
+
intentionally adapter-backed when the capability is closer to a backend
|
|
44
|
+
primitive than a domain service.
|
|
45
|
+
|
|
46
|
+
## Quick start
|
|
47
|
+
|
|
48
|
+
### Install
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install groundworkers
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Optional extras:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
pip install "groundworkers[llm,embedding-pgvector]"
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Configure the shared stack
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
omop-config configure omop_alchemy
|
|
64
|
+
omop-config configure omop_graph
|
|
65
|
+
omop-config configure groundworkers
|
|
66
|
+
# optional if you want embedding-backed capabilities
|
|
67
|
+
omop-config configure omop_emb
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
### Start MCP
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
groundworkers --describe
|
|
74
|
+
groundworkers --transport streamable-http --host 0.0.0.0 --port 8000
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### Start REST
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
groundworkers --transport rest --host 0.0.0.0 --port 8080
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### Use from Python
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
from groundworkers.app import build_application
|
|
87
|
+
from groundworkers.bootstrap import build_app_config
|
|
88
|
+
|
|
89
|
+
config = build_app_config()
|
|
90
|
+
app = build_application(config)
|
|
91
|
+
|
|
92
|
+
mapping = app.services.mapping
|
|
93
|
+
bundle = mapping.concept_candidate_bundle(
|
|
94
|
+
"type 2 diabetes",
|
|
95
|
+
domain="Condition",
|
|
96
|
+
include_normalized=True,
|
|
97
|
+
include_fulltext=True,
|
|
98
|
+
include_embedding=True,
|
|
99
|
+
)
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## Main surfaces
|
|
103
|
+
|
|
104
|
+
| Surface | Best for |
|
|
105
|
+
|---|---|
|
|
106
|
+
| MCP tools | Tool discovery, agent interoperability, shared capability services |
|
|
107
|
+
| REST routes | Typed HTTP workflows such as candidate bundles and assisted source planning |
|
|
108
|
+
| `app.services.*` | In-process Python applications and batch workflows |
|
|
109
|
+
| `app.adapters.*` | Backend wrappers used when you intentionally need dependency-shaped primitives |
|
|
110
|
+
|
|
111
|
+
## Learn more
|
|
112
|
+
|
|
113
|
+
- Docs home: `docs/index.md`
|
|
114
|
+
- Configuration: `docs/usage/configuration.md`
|
|
115
|
+
- Integrations: `docs/usage/integrations.md`
|
|
116
|
+
- Architecture: `docs/architecture.md`
|
|
117
|
+
|
|
118
|
+
## Companion repos
|
|
119
|
+
|
|
120
|
+
- [groundcrew](https://github.com/AustralianCancerDataNetwork/groundcrew)
|
|
121
|
+
- [omop-graph](https://australiancancerdatanetwork.github.io/omop-graph/)
|
|
122
|
+
- [omop-emb](https://australiancancerdatanetwork.github.io/omop-emb/)
|
|
@@ -1,27 +1,48 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "groundworkers"
|
|
3
|
-
version = "0.2
|
|
3
|
+
version = "0.3.2"
|
|
4
4
|
description = "Groundworkers MCP server — read-only agentive access to OMOP vocabularies, concept graphs, and embeddings."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.12"
|
|
7
7
|
dependencies = [
|
|
8
|
+
"fastapi>=0.110,<1.0",
|
|
8
9
|
"mcp[cli]>=1,<2",
|
|
9
10
|
"pydantic>=2,<3",
|
|
10
11
|
"pyyaml>=6,<7",
|
|
11
12
|
"SQLAlchemy>=2,<3",
|
|
12
13
|
"psycopg[binary]>=3.1,<4",
|
|
13
|
-
"
|
|
14
|
-
"omop-
|
|
14
|
+
"oa-configurator>=0.1.2",
|
|
15
|
+
"omop-graph>=1.3.0",
|
|
16
|
+
"omop-emb>=1.1.1",
|
|
17
|
+
"uvicorn[standard]>=0.29,<1.0",
|
|
15
18
|
]
|
|
16
19
|
|
|
17
20
|
[project.optional-dependencies]
|
|
21
|
+
llm = [
|
|
22
|
+
"openai>=1.0",
|
|
23
|
+
]
|
|
24
|
+
xlsx = [
|
|
25
|
+
"openpyxl>=3.1,<4",
|
|
26
|
+
]
|
|
27
|
+
pdf = [
|
|
28
|
+
"pdfplumber>=0.10",
|
|
29
|
+
]
|
|
30
|
+
docx = [
|
|
31
|
+
"python-docx>=1.0",
|
|
32
|
+
]
|
|
33
|
+
all_source = [
|
|
34
|
+
"openpyxl>=3.1,<4",
|
|
35
|
+
"pdfplumber>=0.10",
|
|
36
|
+
"python-docx>=1.0",
|
|
37
|
+
]
|
|
18
38
|
embedding-pgvector = [
|
|
19
|
-
"omop-emb[pgvector]>=1.
|
|
39
|
+
"omop-emb[pgvector]>=1.1.1",
|
|
20
40
|
]
|
|
21
41
|
embedding-faiss = [
|
|
22
|
-
"omop-emb[faiss-cpu]>=1.
|
|
42
|
+
"omop-emb[faiss-cpu]>=1.1.1",
|
|
23
43
|
]
|
|
24
44
|
dev = [
|
|
45
|
+
"httpx>=0.27,<1",
|
|
25
46
|
"ipython>=8.0",
|
|
26
47
|
"tornado>=6.5.5",
|
|
27
48
|
"pytest>=9.0.3",
|
|
@@ -38,6 +59,9 @@ dev = [
|
|
|
38
59
|
[project.scripts]
|
|
39
60
|
groundworkers = "groundworkers.server:main"
|
|
40
61
|
|
|
62
|
+
[project.entry-points."omop.config"]
|
|
63
|
+
groundworkers = "groundworkers.config:GroundworkersConfig"
|
|
64
|
+
|
|
41
65
|
[build-system]
|
|
42
66
|
requires = ["setuptools>=68", "wheel"]
|
|
43
67
|
build-backend = "setuptools.build_meta"
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from sqlalchemy import text
|
|
4
|
+
from sqlalchemy.engine import Engine
|
|
5
|
+
from sqlalchemy.orm import sessionmaker
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class CDMAdapter:
|
|
9
|
+
"""Adapter for a CDM (Common Data Model) database connection.
|
|
10
|
+
|
|
11
|
+
Holds the SQLAlchemy engine and session factory for an OMOP CDM database.
|
|
12
|
+
Shared by services that need to query the CDM directly (VocabService,
|
|
13
|
+
OmopGraphAdapter).
|
|
14
|
+
|
|
15
|
+
Pass engine to adapters that wrap their own session management.
|
|
16
|
+
Use session() for services that need a scoped session context manager.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, engine: Engine) -> None:
|
|
20
|
+
self._engine = engine
|
|
21
|
+
self._session_factory = sessionmaker(engine)
|
|
22
|
+
|
|
23
|
+
@property
|
|
24
|
+
def engine(self) -> Engine:
|
|
25
|
+
"""The underlying SQLAlchemy engine."""
|
|
26
|
+
return self._engine
|
|
27
|
+
|
|
28
|
+
def session(self):
|
|
29
|
+
"""Return a session context manager."""
|
|
30
|
+
return self._session_factory()
|
|
31
|
+
|
|
32
|
+
def is_available(self) -> bool:
|
|
33
|
+
"""Return True if the CDM database is reachable (SELECT 1 probe)."""
|
|
34
|
+
try:
|
|
35
|
+
with self._engine.connect() as conn:
|
|
36
|
+
conn.execute(text("SELECT 1"))
|
|
37
|
+
return True
|
|
38
|
+
except Exception:
|
|
39
|
+
return False
|
|
40
|
+
|
|
41
|
+
def close(self) -> None:
|
|
42
|
+
"""Dispose the engine and release the connection pool."""
|
|
43
|
+
self._engine.dispose()
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from groundworkers.base.errors import GroundworkersError
|
|
8
|
+
|
|
9
|
+
_STATUS_TIMEOUT_SECONDS = 2.0
|
|
10
|
+
_COMPLETION_TIMEOUT_SECONDS = 180.0
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class LLMAdapter:
|
|
14
|
+
"""Adapter for OpenAI-compatible LLM chat completion APIs.
|
|
15
|
+
|
|
16
|
+
Works with any provider that implements the OpenAI chat completions API:
|
|
17
|
+
local deployments (Ollama, vLLM, LM Studio) and remote services (OpenAI,
|
|
18
|
+
Azure OpenAI, and compatible cloud APIs). Configure ``api_base`` to point
|
|
19
|
+
at the correct endpoint.
|
|
20
|
+
|
|
21
|
+
Two completion modes are available:
|
|
22
|
+
|
|
23
|
+
- **Text completion** (``complete_text``): returns a raw text response.
|
|
24
|
+
- **Structured completion** (``complete_structured``): requests a JSON
|
|
25
|
+
response matching a caller-supplied schema. Preferred for MCP-facing
|
|
26
|
+
tools where downstream agents need to parse the output reliably.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
*,
|
|
32
|
+
provider: str,
|
|
33
|
+
default_model_name: str | None = None,
|
|
34
|
+
client_factory: Callable[[], Any],
|
|
35
|
+
) -> None:
|
|
36
|
+
self._provider = provider
|
|
37
|
+
self._default_model_name = default_model_name
|
|
38
|
+
self._client_factory = client_factory
|
|
39
|
+
self._client: Any = None
|
|
40
|
+
|
|
41
|
+
def is_available(self) -> bool:
|
|
42
|
+
"""Return True if the LLM API is reachable."""
|
|
43
|
+
return self.status()["available"]
|
|
44
|
+
|
|
45
|
+
def close(self) -> None:
|
|
46
|
+
"""Release the cached client."""
|
|
47
|
+
self._client = None
|
|
48
|
+
|
|
49
|
+
def status(self) -> dict[str, Any]:
|
|
50
|
+
"""Return availability and configuration details. Never raises.
|
|
51
|
+
|
|
52
|
+
Probes the API with a short timeout. On failure returns
|
|
53
|
+
``{"available": False, ..., "detail": "<reason>"}``.
|
|
54
|
+
"""
|
|
55
|
+
try:
|
|
56
|
+
client = self._get_client()
|
|
57
|
+
client.models.list(timeout=_STATUS_TIMEOUT_SECONDS)
|
|
58
|
+
return {
|
|
59
|
+
"available": True,
|
|
60
|
+
"provider": self._provider,
|
|
61
|
+
"default_model": self._default_model_name,
|
|
62
|
+
"structured_output_supported": True,
|
|
63
|
+
"detail": None,
|
|
64
|
+
}
|
|
65
|
+
except Exception as exc:
|
|
66
|
+
return {
|
|
67
|
+
"available": False,
|
|
68
|
+
"provider": self._provider,
|
|
69
|
+
"default_model": self._default_model_name,
|
|
70
|
+
"structured_output_supported": None,
|
|
71
|
+
"detail": repr(exc),
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
def complete_text(
|
|
75
|
+
self,
|
|
76
|
+
prompt: str,
|
|
77
|
+
*,
|
|
78
|
+
system_prompt: str | None = None,
|
|
79
|
+
model_name: str | None = None,
|
|
80
|
+
temperature: float = 0.0,
|
|
81
|
+
) -> dict[str, Any]:
|
|
82
|
+
"""Complete a prompt and return the response text.
|
|
83
|
+
|
|
84
|
+
Raises ``INVALID_INPUT`` if no model is resolvable.
|
|
85
|
+
Raises ``BACKEND_UNAVAIL`` if the API call fails.
|
|
86
|
+
"""
|
|
87
|
+
client = self._get_client()
|
|
88
|
+
resolved_model = self._resolve_model(model_name)
|
|
89
|
+
messages = _build_messages(prompt, system_prompt)
|
|
90
|
+
try:
|
|
91
|
+
response = client.chat.completions.create(
|
|
92
|
+
model=resolved_model,
|
|
93
|
+
messages=messages,
|
|
94
|
+
temperature=temperature,
|
|
95
|
+
timeout=_COMPLETION_TIMEOUT_SECONDS,
|
|
96
|
+
)
|
|
97
|
+
except Exception as exc:
|
|
98
|
+
raise GroundworkersError("BACKEND_UNAVAIL", f"LLM call failed: {exc}") from exc
|
|
99
|
+
return {
|
|
100
|
+
"text": response.choices[0].message.content,
|
|
101
|
+
"model": response.model,
|
|
102
|
+
"provider": self._provider,
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
def complete_structured(
|
|
106
|
+
self,
|
|
107
|
+
prompt: str,
|
|
108
|
+
response_schema: dict[str, Any],
|
|
109
|
+
*,
|
|
110
|
+
system_prompt: str | None = None,
|
|
111
|
+
model_name: str | None = None,
|
|
112
|
+
temperature: float = 0.0,
|
|
113
|
+
) -> dict[str, Any]:
|
|
114
|
+
"""Complete a prompt and return a parsed JSON dict guided by response_schema.
|
|
115
|
+
|
|
116
|
+
The schema is injected into the system prompt and JSON mode is requested
|
|
117
|
+
from the API. This is compatible with Ollama, vLLM, and OpenAI endpoints.
|
|
118
|
+
|
|
119
|
+
The response is parsed but not validated against the schema — callers are
|
|
120
|
+
responsible for validating the returned dict (e.g. with Pydantic).
|
|
121
|
+
|
|
122
|
+
Raises ``INVALID_INPUT`` if no model is resolvable or if response_schema
|
|
123
|
+
is not JSON-serializable.
|
|
124
|
+
Raises ``BACKEND_UNAVAIL`` if the API call fails.
|
|
125
|
+
Raises ``QUERY_ERROR`` if the response is not valid JSON.
|
|
126
|
+
"""
|
|
127
|
+
client = self._get_client()
|
|
128
|
+
resolved_model = self._resolve_model(model_name)
|
|
129
|
+
try:
|
|
130
|
+
schema_json = json.dumps(response_schema, indent=2)
|
|
131
|
+
except (TypeError, ValueError) as exc:
|
|
132
|
+
raise GroundworkersError(
|
|
133
|
+
"INVALID_INPUT", f"response_schema is not JSON-serializable: {exc}"
|
|
134
|
+
) from exc
|
|
135
|
+
schema_directive = f"Respond with a JSON object matching this schema:\n{schema_json}"
|
|
136
|
+
augmented_system = f"{system_prompt}\n\n{schema_directive}" if system_prompt else schema_directive
|
|
137
|
+
messages = _build_messages(prompt, augmented_system)
|
|
138
|
+
try:
|
|
139
|
+
response = client.chat.completions.create(
|
|
140
|
+
model=resolved_model,
|
|
141
|
+
messages=messages,
|
|
142
|
+
temperature=temperature,
|
|
143
|
+
response_format={"type": "json_object"},
|
|
144
|
+
timeout=_COMPLETION_TIMEOUT_SECONDS,
|
|
145
|
+
)
|
|
146
|
+
except Exception as exc:
|
|
147
|
+
raise GroundworkersError("BACKEND_UNAVAIL", f"LLM call failed: {exc}") from exc
|
|
148
|
+
content = response.choices[0].message.content or ""
|
|
149
|
+
try:
|
|
150
|
+
return json.loads(content)
|
|
151
|
+
except json.JSONDecodeError as exc:
|
|
152
|
+
raise GroundworkersError("QUERY_ERROR", f"LLM response was not valid JSON: {exc}") from exc
|
|
153
|
+
|
|
154
|
+
def _get_client(self) -> Any:
|
|
155
|
+
if self._client is None:
|
|
156
|
+
try:
|
|
157
|
+
self._client = self._client_factory()
|
|
158
|
+
except Exception as exc:
|
|
159
|
+
raise GroundworkersError("BACKEND_UNAVAIL", f"LLM client could not be initialised: {exc}") from exc
|
|
160
|
+
return self._client
|
|
161
|
+
|
|
162
|
+
def _resolve_model(self, model_name: str | None) -> str:
|
|
163
|
+
resolved = model_name or self._default_model_name
|
|
164
|
+
if resolved is None:
|
|
165
|
+
raise GroundworkersError(
|
|
166
|
+
"INVALID_INPUT",
|
|
167
|
+
"No model specified and no default model is configured",
|
|
168
|
+
)
|
|
169
|
+
return resolved
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _build_messages(prompt: str, system_prompt: str | None) -> list[dict[str, str]]:
|
|
173
|
+
messages: list[dict[str, str]] = []
|
|
174
|
+
if system_prompt:
|
|
175
|
+
messages.append({"role": "system", "content": system_prompt})
|
|
176
|
+
messages.append({"role": "user", "content": prompt})
|
|
177
|
+
return messages
|