fhir-mcp-shared 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,54 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ *.egg
7
+ *.egg-info/
8
+ dist/
9
+ build/
10
+ .eggs/
11
+
12
+ # uv
13
+ .venv/
14
+ .uv/
15
+ # uv.lock is intentionally committed — pins all transitive deps for CI reproducibility
16
+
17
+ # Environments
18
+ .env
19
+ .env.local
20
+ .env.*.local
21
+
22
+ # IDE
23
+ .vscode/settings.json
24
+ .idea/
25
+ *.iml
26
+
27
+ # Testing
28
+ .pytest_cache/
29
+ .coverage
30
+ coverage.xml
31
+ htmlcov/
32
+ .tox/
33
+
34
+ # Mypy
35
+ .mypy_cache/
36
+ .dmypy.json
37
+
38
+ # Ruff
39
+ .ruff_cache/
40
+
41
+ # MkDocs
42
+ site/
43
+
44
+ # Docker
45
+ *.tar
46
+
47
+ # OS
48
+ .DS_Store
49
+ Thumbs.db
50
+
51
+ # Secrets (belt-and-suspenders; gitleaks is the real guard)
52
+ *.pem
53
+ *.key
54
+ *_secret*
@@ -0,0 +1,41 @@
1
+ Metadata-Version: 2.4
2
+ Name: fhir-mcp-shared
3
+ Version: 0.1.0
4
+ Summary: Shared utilities for the fhir-mcp-suite monorepo (logging, LangFuse, base models, eval)
5
+ Project-URL: Homepage, https://github.com/pcmedsinge/fhir-mcp-suite
6
+ Project-URL: Repository, https://github.com/pcmedsinge/fhir-mcp-suite
7
+ Project-URL: Bug Tracker, https://github.com/pcmedsinge/fhir-mcp-suite/issues
8
+ Project-URL: Documentation, https://pcmedsinge.github.io/fhir-mcp-suite/
9
+ Project-URL: Changelog, https://github.com/pcmedsinge/fhir-mcp-suite/releases
10
+ Author-email: Parag Medsinge <pcmedsinge@gmail.com>
11
+ License: Apache-2.0
12
+ Keywords: eval,fhir,healthcare,langfuse,logging,mcp,pydantic
13
+ Classifier: Development Status :: 5 - Production/Stable
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Healthcare Industry
16
+ Classifier: License :: OSI Approved :: Apache Software License
17
+ Classifier: Operating System :: OS Independent
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Scientific/Engineering :: Medical Science Apps.
21
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
+ Classifier: Typing :: Typed
23
+ Requires-Python: >=3.12
24
+ Requires-Dist: langfuse>=3.0
25
+ Requires-Dist: pydantic-settings>=2.6
26
+ Requires-Dist: pydantic>=2.9
27
+ Requires-Dist: structlog>=24.4
28
+ Description-Content-Type: text/markdown
29
+
30
+ # fhir-mcp-shared
31
+
32
+ Internal shared utilities for the [fhir-mcp-suite](https://github.com/pcmedsinge/fhir-mcp-suite) monorepo.
33
+
34
+ Not published to PyPI. Used as a uv workspace dependency by `mcp-fhir`, `mcp-terminology`, and `mcp-clinical-reasoner`.
35
+
36
+ ## Contents
37
+
38
+ - `logging.py` — structlog configuration (JSON + console renderers)
39
+ - `langfuse.py` — LangFuse v3 wrapper with graceful no-op degradation
40
+ - `models/` — shared Pydantic models (`FhirResource`, `ValidationReport`, etc.)
41
+ - `eval/` — golden-query eval harness (`EvalRunner`, `GoldenCase`, `EvalResult`)
@@ -0,0 +1,12 @@
1
+ # fhir-mcp-shared
2
+
3
+ Internal shared utilities for the [fhir-mcp-suite](https://github.com/pcmedsinge/fhir-mcp-suite) monorepo.
4
+
5
+ Not published to PyPI. Used as a uv workspace dependency by `mcp-fhir`, `mcp-terminology`, and `mcp-clinical-reasoner`.
6
+
7
+ ## Contents
8
+
9
+ - `logging.py` — structlog configuration (JSON + console renderers)
10
+ - `langfuse.py` — LangFuse v3 wrapper with graceful no-op degradation
11
+ - `models/` — shared Pydantic models (`FhirResource`, `ValidationReport`, etc.)
12
+ - `eval/` — golden-query eval harness (`EvalRunner`, `GoldenCase`, `EvalResult`)
@@ -0,0 +1,48 @@
1
+ [project]
2
+ name = "fhir-mcp-shared"
3
+ version = "0.1.0"
4
+ description = "Shared utilities for the fhir-mcp-suite monorepo (logging, LangFuse, base models, eval)"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ license = { text = "Apache-2.0" }
8
+ authors = [{ name = "Parag Medsinge", email = "pcmedsinge@gmail.com" }]
9
+ keywords = ["mcp", "fhir", "healthcare", "langfuse", "logging", "pydantic", "eval"]
10
+ classifiers = [
11
+ "Development Status :: 5 - Production/Stable",
12
+ "Intended Audience :: Developers",
13
+ "Intended Audience :: Healthcare Industry",
14
+ "License :: OSI Approved :: Apache Software License",
15
+ "Operating System :: OS Independent",
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: Python :: 3.12",
18
+ "Topic :: Scientific/Engineering :: Medical Science Apps.",
19
+ "Topic :: Software Development :: Libraries :: Python Modules",
20
+ "Typing :: Typed",
21
+ ]
22
+ dependencies = [
23
+ "pydantic>=2.9",
24
+ "pydantic-settings>=2.6",
25
+ "structlog>=24.4",
26
+ "langfuse>=3.0",
27
+ ]
28
+
29
+ [project.urls]
30
+ Homepage = "https://github.com/pcmedsinge/fhir-mcp-suite"
31
+ Repository = "https://github.com/pcmedsinge/fhir-mcp-suite"
32
+ "Bug Tracker" = "https://github.com/pcmedsinge/fhir-mcp-suite/issues"
33
+ Documentation = "https://pcmedsinge.github.io/fhir-mcp-suite/"
34
+ Changelog = "https://github.com/pcmedsinge/fhir-mcp-suite/releases"
35
+
36
+ [build-system]
37
+ requires = ["hatchling"]
38
+ build-backend = "hatchling.build"
39
+
40
+ [tool.hatch.build.targets.wheel]
41
+ packages = ["src/fhir_mcp_shared"]
42
+
43
+ [dependency-groups]
44
+ dev = [
45
+ "pytest>=8.3",
46
+ "pytest-asyncio>=0.24",
47
+ "pytest-cov>=5.0",
48
+ ]
@@ -0,0 +1,13 @@
1
+ """fhir-mcp-shared — internal utilities for the fhir-mcp-suite monorepo.
2
+
3
+ Public surface (intended for import by package servers):
4
+
5
+ from fhir_mcp_shared.logging import configure_logging
6
+ from fhir_mcp_shared.langfuse import span, generation, get_client
7
+ from fhir_mcp_shared.models import ValidationIssue, ValidationReport, FhirResource
8
+ from fhir_mcp_shared.eval import EvalRunner, GoldenCase, EvalResult
9
+ """
10
+
11
+ from fhir_mcp_shared.logging import configure_logging
12
+
13
+ __all__ = ["configure_logging"]
@@ -0,0 +1,120 @@
1
+ """Eval harness skeleton — golden-query runner for all three MCP servers.
2
+
3
+ Usage::
4
+
5
+ from fhir_mcp_shared.eval import EvalRunner, GoldenCase, EvalResult
6
+
7
+ runner = EvalRunner(cases=load_golden("evals/mcp-fhir/golden_queries.json"))
8
+ results = await runner.run(invoke_fn=my_tool_fn)
9
+ runner.assert_threshold(results, min_pass_rate=0.90)
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import json
15
+ from collections.abc import Awaitable, Callable
16
+ from pathlib import Path
17
+ from typing import Any
18
+
19
+ import structlog
20
+ from pydantic import BaseModel, Field
21
+
22
+ log = structlog.get_logger(__name__)
23
+
24
+
25
+ class GoldenCase(BaseModel):
26
+ """A single golden-query test case."""
27
+
28
+ id: str
29
+ description: str
30
+ tool: str = Field(description="MCP tool name to invoke")
31
+ input: dict[str, Any] = Field(description="Tool input arguments")
32
+ expected: dict[str, Any] = Field(description="Expected fields in the response")
33
+ tags: list[str] = Field(default_factory=list)
34
+
35
+
36
+ class EvalResult(BaseModel):
37
+ """Outcome of running a single golden case."""
38
+
39
+ case_id: str
40
+ passed: bool
41
+ score: float = Field(ge=0.0, le=1.0, description="0.0 = fail, 1.0 = full pass")
42
+ actual: dict[str, Any] = Field(default_factory=dict)
43
+ notes: str = ""
44
+
45
+
46
+ class EvalRunner:
47
+ """Runs a golden-query suite against an async tool-invoke function."""
48
+
49
+ def __init__(self, cases: list[GoldenCase]) -> None:
50
+ self.cases = cases
51
+
52
+ @classmethod
53
+ def from_file(cls, path: str | Path) -> EvalRunner:
54
+ """Load golden cases from a JSON file."""
55
+ data = json.loads(Path(path).read_text())
56
+ return cls(cases=[GoldenCase.model_validate(c) for c in data])
57
+
58
+ async def run(
59
+ self,
60
+ invoke_fn: Callable[[str, dict[str, Any]], Awaitable[dict[str, Any]]],
61
+ tags: list[str] | None = None,
62
+ ) -> list[EvalResult]:
63
+ """Run all cases (optionally filtered by tag) and return results.
64
+
65
+ Args:
66
+ invoke_fn: An async callable ``(tool_name, input) -> dict``.
67
+ tags: If provided, only run cases whose tags overlap.
68
+ """
69
+ results: list[EvalResult] = []
70
+ for case in self.cases:
71
+ if tags and not set(tags) & set(case.tags):
72
+ continue
73
+ log.info("eval_case_start", case_id=case.id, tool=case.tool)
74
+ try:
75
+ actual = await invoke_fn(case.tool, case.input)
76
+ passed, score, notes = self._check(case.expected, actual)
77
+ except Exception as exc:
78
+ log.warning("eval_case_error", case_id=case.id, error=str(exc))
79
+ passed, score, notes = False, 0.0, f"exception: {exc}"
80
+ actual = {}
81
+ results.append(
82
+ EvalResult(case_id=case.id, passed=passed, score=score, actual=actual, notes=notes)
83
+ )
84
+ log.info("eval_case_done", case_id=case.id, passed=passed, score=score)
85
+ return results
86
+
87
+ def _check(self, expected: dict[str, Any], actual: dict[str, Any]) -> tuple[bool, float, str]:
88
+ """Check that all expected keys/values appear in actual (subset match).
89
+
90
+ Returns (passed, score, notes).
91
+ """
92
+ if not expected:
93
+ return True, 1.0, "no assertions defined"
94
+
95
+ hits = 0
96
+ misses: list[str] = []
97
+ for key, expected_val in expected.items():
98
+ actual_val = actual.get(key)
99
+ if actual_val == expected_val:
100
+ hits += 1
101
+ else:
102
+ misses.append(f"{key}: expected {expected_val!r}, got {actual_val!r}")
103
+
104
+ score = hits / len(expected)
105
+ passed = len(misses) == 0
106
+ notes = "; ".join(misses) if misses else "all assertions passed"
107
+ return passed, score, notes
108
+
109
+ @staticmethod
110
+ def assert_threshold(results: list[EvalResult], min_pass_rate: float = 0.85) -> None:
111
+ """Raise AssertionError if pass rate is below threshold (used in CI)."""
112
+ if not results:
113
+ raise AssertionError("No eval results to check")
114
+ pass_rate = sum(r.passed for r in results) / len(results)
115
+ if pass_rate < min_pass_rate:
116
+ failures = [r for r in results if not r.passed]
117
+ details = "\n".join(f" {r.case_id}: {r.notes}" for r in failures)
118
+ raise AssertionError(
119
+ f"Eval pass rate {pass_rate:.1%} < threshold {min_pass_rate:.1%}\n{details}"
120
+ )
@@ -0,0 +1,200 @@
1
+ """LangFuse v3 wrapper — gracefully degrades when credentials are absent.
2
+
3
+ Usage in any MCP server::
4
+
5
+ from fhir_mcp_shared.langfuse import span, generation, trace
6
+
7
+ # Per-request trace (wraps one MCP tool call)
8
+ with trace("fhir_read", session_id="session-abc", user_id="user-1") as t:
9
+ with span("http_get", parent=t, resource_type="Patient") as s:
10
+ result = await do_read()
11
+ if s:
12
+ s.update(output={"bytes": len(result)})
13
+
14
+ # Simpler span without explicit trace:
15
+ with span("fhir_search", resource_type="Patient") as s:
16
+ ...
17
+
18
+ # LLM generation (for mcp-clinical-reasoner):
19
+ with generation("llm_call", model="gpt-4o-mini", input=prompt) as gen:
20
+ resp = await openai_client.chat.completions.create(...)
21
+ if gen:
22
+ gen.update(output=resp.choices[0].message.content,
23
+ usage_details={"input": resp.usage.prompt_tokens,
24
+ "output": resp.usage.completion_tokens})
25
+
26
+ If ``LANGFUSE_PUBLIC_KEY`` / ``LANGFUSE_SECRET_KEY`` are not set, all
27
+ helpers are no-ops and the server runs without observability.
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ import os
33
+ import uuid
34
+ from collections.abc import Generator
35
+ from contextlib import contextmanager, suppress
36
+ from typing import Any
37
+
38
+ import structlog
39
+
40
+ log = structlog.get_logger(__name__)
41
+
42
+ _client: Any | None = None
43
+ _initialized: bool = False
44
+
45
+
46
+ def get_client() -> Any | None:
47
+ """Return a singleton Langfuse client, or ``None`` if credentials aren't set."""
48
+ global _client, _initialized
49
+ if _initialized:
50
+ return _client
51
+ _initialized = True
52
+
53
+ public_key = os.getenv("LANGFUSE_PUBLIC_KEY", "")
54
+ secret_key = os.getenv("LANGFUSE_SECRET_KEY", "")
55
+ host = os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com")
56
+
57
+ if not public_key or not secret_key:
58
+ log.debug("langfuse_disabled", reason="LANGFUSE_PUBLIC_KEY / LANGFUSE_SECRET_KEY not set")
59
+ return None
60
+
61
+ try:
62
+ from langfuse import Langfuse # type: ignore
63
+ except ImportError as exc:
64
+ log.warning("langfuse_import_failed", error=str(exc))
65
+ return None
66
+
67
+ try:
68
+ _client = Langfuse(public_key=public_key, secret_key=secret_key, host=host)
69
+ log.info("langfuse_initialized", host=host)
70
+ except Exception as exc:
71
+ log.warning("langfuse_init_failed", error=str(exc))
72
+ return _client
73
+
74
+
75
+ @contextmanager
76
+ def trace(
77
+ name: str,
78
+ *,
79
+ session_id: str | None = None,
80
+ user_id: str | None = None,
81
+ tags: list[str] | None = None,
82
+ **metadata: Any,
83
+ ) -> Generator[Any, None, None]:
84
+ """Context manager that creates a top-level LangFuse trace.
85
+
86
+ A trace represents one logical request (e.g. one MCP ``call_tool``
87
+ invocation). Nest ``span()`` calls inside with ``parent=t``.
88
+
89
+ Yields the trace object (or ``None`` if LangFuse is disabled).
90
+
91
+ Example::
92
+
93
+ with trace("fhir_read", session_id=session_id) as t:
94
+ with span("http_get", parent=t, resource_type="Patient"):
95
+ ...
96
+ """
97
+ client = get_client()
98
+ if client is None:
99
+ yield None
100
+ return
101
+
102
+ trace_id = str(uuid.uuid4())
103
+ try:
104
+ tr = client.trace(
105
+ id=trace_id,
106
+ name=name,
107
+ session_id=session_id,
108
+ user_id=user_id,
109
+ tags=tags or [],
110
+ metadata=metadata,
111
+ )
112
+ try:
113
+ yield tr
114
+ finally:
115
+ with suppress(Exception):
116
+ client.flush()
117
+ except Exception as exc:
118
+ log.warning("langfuse_trace_error", name=name, error=str(exc))
119
+ yield None
120
+
121
+
122
+ @contextmanager
123
+ def span(
124
+ name: str,
125
+ *,
126
+ parent: Any = None,
127
+ **kwargs: Any,
128
+ ) -> Generator[Any, None, None]:
129
+ """Context manager that wraps a logical step in a LangFuse span.
130
+
131
+ Args:
132
+ name: Span name (e.g. ``"fhir_read"``).
133
+ parent: A trace or span object to nest under (optional).
134
+ **kwargs: Metadata attached to the span.
135
+
136
+ Yields the span object (or ``None`` if LangFuse is disabled) so callers
137
+ can attach output metadata::
138
+
139
+ with span("validate", profile=profile_url) as s:
140
+ result = validate(resource)
141
+ if s:
142
+ s.update(output=result.model_dump())
143
+ """
144
+ client = get_client()
145
+ if client is None:
146
+ yield None
147
+ return
148
+
149
+ try:
150
+ kwargs_clean = {k: v for k, v in kwargs.items() if v is not None}
151
+ if parent is not None:
152
+ s = parent.span(name=name, metadata=kwargs_clean)
153
+ else:
154
+ s = client.start_span(name=name, metadata=kwargs_clean)
155
+ try:
156
+ yield s
157
+ finally:
158
+ s.end()
159
+ except Exception as exc:
160
+ log.warning("langfuse_span_error", name=name, error=str(exc))
161
+ yield None
162
+
163
+
164
+ @contextmanager
165
+ def generation(
166
+ name: str,
167
+ model: str = "",
168
+ *,
169
+ parent: Any = None,
170
+ **kwargs: Any,
171
+ ) -> Generator[Any, None, None]:
172
+ """Context manager for an LLM generation span.
173
+
174
+ Callers should call ``gen.update(output=..., usage_details=...)`` inside
175
+ the block for cost/token tracking.
176
+
177
+ Args:
178
+ name: Generation name.
179
+ model: Model identifier (e.g. ``"gpt-4o-mini"``).
180
+ parent: Parent trace or span (optional).
181
+ **kwargs: Extra metadata.
182
+ """
183
+ client = get_client()
184
+ if client is None:
185
+ yield None
186
+ return
187
+
188
+ try:
189
+ kwargs_clean = {k: v for k, v in kwargs.items() if v is not None}
190
+ if parent is not None:
191
+ g = parent.generation(name=name, model=model, metadata=kwargs_clean)
192
+ else:
193
+ g = client.start_generation(name=name, model=model, metadata=kwargs_clean)
194
+ try:
195
+ yield g
196
+ finally:
197
+ g.end()
198
+ except Exception as exc:
199
+ log.warning("langfuse_generation_error", name=name, error=str(exc))
200
+ yield None
@@ -0,0 +1,68 @@
1
+ """Structured logging setup using structlog.
2
+
3
+ Call ``configure_logging()`` once at process startup (e.g. in server.py).
4
+ After that, every module can use::
5
+
6
+ import structlog
7
+ log = structlog.get_logger(__name__)
8
+ log.info("event", key="value")
9
+
10
+ In development (LOG_FORMAT=console) output is human-readable with colours.
11
+ In production (LOG_FORMAT=json, the default) every line is a JSON object
12
+ suitable for Loki / Azure Monitor ingestion.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import logging
18
+ import sys
19
+
20
+ import structlog
21
+
22
+
23
+ def configure_logging(level: str = "INFO", fmt: str = "json") -> None:
24
+ """Configure structlog and stdlib logging.
25
+
26
+ Args:
27
+ level: Logging level string, e.g. "DEBUG", "INFO", "WARNING".
28
+ fmt: "json" (default, for production) or "console" (dev).
29
+ """
30
+ log_level = getattr(logging, level.upper(), logging.INFO)
31
+
32
+ shared_processors: list[structlog.types.Processor] = [
33
+ structlog.contextvars.merge_contextvars,
34
+ structlog.stdlib.add_logger_name,
35
+ structlog.stdlib.add_log_level,
36
+ structlog.processors.TimeStamper(fmt="iso"),
37
+ structlog.processors.StackInfoRenderer(),
38
+ ]
39
+
40
+ if fmt == "console":
41
+ renderer: structlog.types.Processor = structlog.dev.ConsoleRenderer()
42
+ else:
43
+ renderer = structlog.processors.JSONRenderer()
44
+
45
+ structlog.configure(
46
+ processors=[
47
+ *shared_processors,
48
+ structlog.stdlib.ProcessorFormatter.wrap_for_formatter,
49
+ ],
50
+ logger_factory=structlog.stdlib.LoggerFactory(),
51
+ wrapper_class=structlog.stdlib.BoundLogger,
52
+ cache_logger_on_first_use=True,
53
+ )
54
+
55
+ formatter = structlog.stdlib.ProcessorFormatter(
56
+ processors=[
57
+ structlog.stdlib.ProcessorFormatter.remove_processors_meta,
58
+ renderer,
59
+ ],
60
+ foreign_pre_chain=shared_processors,
61
+ )
62
+
63
+ handler = logging.StreamHandler(sys.stderr)
64
+ handler.setFormatter(formatter)
65
+
66
+ root_logger = logging.getLogger()
67
+ root_logger.handlers = [handler]
68
+ root_logger.setLevel(log_level)
@@ -0,0 +1,16 @@
1
+ """Base Pydantic models shared across all three MCP servers."""
2
+
3
+ from fhir_mcp_shared.models.fhir import FhirResource, FhirSearchParams
4
+ from fhir_mcp_shared.models.validation import (
5
+ ValidationIssue,
6
+ ValidationReport,
7
+ ValidationSeverity,
8
+ )
9
+
10
+ __all__ = [
11
+ "FhirResource",
12
+ "FhirSearchParams",
13
+ "ValidationIssue",
14
+ "ValidationReport",
15
+ "ValidationSeverity",
16
+ ]
@@ -0,0 +1,33 @@
1
+ """FHIR resource and search models."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from pydantic import BaseModel, Field, HttpUrl
8
+
9
+
10
+ class FhirResource(BaseModel):
11
+ """A single FHIR resource as returned by the server."""
12
+
13
+ resource_type: str = Field(alias="resourceType")
14
+ id: str | None = None
15
+ meta: dict[str, Any] | None = None
16
+ # All remaining fields stored as-is; avoids re-modelling the full FHIR spec.
17
+ extra: dict[str, Any] = Field(default_factory=dict)
18
+
19
+ model_config = {"populate_by_name": True, "extra": "allow"}
20
+
21
+
22
+ class FhirSearchParams(BaseModel):
23
+ """Parameters for a FHIR search request."""
24
+
25
+ resource_type: str = Field(description="FHIR resource type, e.g. 'Patient', 'Observation'")
26
+ params: dict[str, str] = Field(
27
+ default_factory=dict,
28
+ description="FHIR search parameters, e.g. {'family': 'Smith', '_count': '10'}",
29
+ )
30
+ base_url: HttpUrl | None = Field(
31
+ default=None,
32
+ description="Override the default FHIR server base URL for this request.",
33
+ )
@@ -0,0 +1,41 @@
1
+ """Validation report models — HAPI validator output, normalised.
2
+
3
+ Ported from P1 (fhir-mapping-agent) with minor adaptations for P3.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from enum import StrEnum
9
+
10
+ from pydantic import BaseModel, Field
11
+
12
+
13
+ class ValidationSeverity(StrEnum):
14
+ FATAL = "fatal"
15
+ ERROR = "error"
16
+ WARNING = "warning"
17
+ INFORMATION = "information"
18
+
19
+
20
+ class ValidationIssue(BaseModel):
21
+ severity: ValidationSeverity
22
+ code: str = Field(description="HAPI/FHIR issue code, e.g. 'required', 'code-invalid'")
23
+ location: str | None = Field(default=None, description="FHIRPath of the offending element")
24
+ message: str
25
+
26
+
27
+ class ValidationReport(BaseModel):
28
+ profile: str
29
+ resource_type: str
30
+ is_conformant: bool
31
+ issues: list[ValidationIssue] = Field(default_factory=list)
32
+ error_count: int = 0
33
+ warning_count: int = 0
34
+
35
+ def model_post_init(self, _ctx: object, /) -> None:
36
+ self.error_count = sum(
37
+ 1
38
+ for i in self.issues
39
+ if i.severity in (ValidationSeverity.ERROR, ValidationSeverity.FATAL)
40
+ )
41
+ self.warning_count = sum(1 for i in self.issues if i.severity == ValidationSeverity.WARNING)
@@ -0,0 +1,52 @@
1
+ """Placeholder tests for shared library."""
2
+
3
+ from fhir_mcp_shared.eval import EvalRunner, GoldenCase
4
+
5
+
6
+ def test_eval_runner_all_pass() -> None:
7
+ cases = [
8
+ GoldenCase(
9
+ id="t1",
10
+ description="basic pass",
11
+ tool="fhir_read",
12
+ input={"resource_type": "Patient", "resource_id": "1"},
13
+ expected={"resourceType": "Patient"},
14
+ )
15
+ ]
16
+ runner = EvalRunner(cases=cases)
17
+
18
+ async def invoke(_tool: str, _input: dict) -> dict: # type: ignore[type-arg]
19
+ return {"resourceType": "Patient", "id": "1"}
20
+
21
+ import asyncio
22
+
23
+ results = asyncio.run(runner.run(invoke_fn=invoke))
24
+ assert len(results) == 1
25
+ assert results[0].passed
26
+ EvalRunner.assert_threshold(results, min_pass_rate=1.0)
27
+
28
+
29
+ def test_eval_runner_fail_threshold() -> None:
30
+ cases = [
31
+ GoldenCase(
32
+ id="t1",
33
+ description="fail case",
34
+ tool="fhir_read",
35
+ input={},
36
+ expected={"resourceType": "Patient"},
37
+ )
38
+ ]
39
+ runner = EvalRunner(cases=cases)
40
+
41
+ async def invoke(_tool: str, _input: dict) -> dict: # type: ignore[type-arg]
42
+ return {}
43
+
44
+ import asyncio
45
+
46
+ results = asyncio.run(runner.run(invoke_fn=invoke))
47
+ assert not results[0].passed
48
+
49
+ import pytest
50
+
51
+ with pytest.raises(AssertionError, match="pass rate"):
52
+ EvalRunner.assert_threshold(results, min_pass_rate=0.85)