fhir-reada 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fhir_reada-0.1.0/LICENSE +21 -0
- fhir_reada-0.1.0/PKG-INFO +9 -0
- fhir_reada-0.1.0/README.md +75 -0
- fhir_reada-0.1.0/fhir_reada/__init__.py +1 -0
- fhir_reada-0.1.0/fhir_reada/cli.py +43 -0
- fhir_reada-0.1.0/fhir_reada/cohort_builder.py +28 -0
- fhir_reada-0.1.0/fhir_reada/deid_utils.py +17 -0
- fhir_reada-0.1.0/fhir_reada/exporter.py +12 -0
- fhir_reada-0.1.0/fhir_reada/fhir_connector.py +34 -0
- fhir_reada-0.1.0/fhir_reada.egg-info/PKG-INFO +9 -0
- fhir_reada-0.1.0/fhir_reada.egg-info/SOURCES.txt +15 -0
- fhir_reada-0.1.0/fhir_reada.egg-info/dependency_links.txt +1 -0
- fhir_reada-0.1.0/fhir_reada.egg-info/entry_points.txt +2 -0
- fhir_reada-0.1.0/fhir_reada.egg-info/requires.txt +2 -0
- fhir_reada-0.1.0/fhir_reada.egg-info/top_level.txt +1 -0
- fhir_reada-0.1.0/pyproject.toml +13 -0
- fhir_reada-0.1.0/setup.cfg +4 -0
fhir_reada-0.1.0/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2025 Tobenna Oduah
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
@@ -0,0 +1,9 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: fhir-reada
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: A FHIR cohort extractor and de-identification tool for clinical research.
|
5
|
+
Author-email: Tobenna Oduah <tjlite81@gmail.com>
|
6
|
+
License: MIT
|
7
|
+
License-File: LICENSE
|
8
|
+
Requires-Dist: requests
|
9
|
+
Requires-Dist: python-dateutil
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# FHIR-READA
|
2
|
+
|
3
|
+
**FHIR Research Extractor And De-identified Analyzer**
|
4
|
+
|
5
|
+
A Python CLI tool for extracting, de-identifying, and exporting patient data from FHIR servers — built for research teams, clinicians, and medical institutions.
|
6
|
+
|
7
|
+
---
|
8
|
+
|
9
|
+
## 🔍 Features
|
10
|
+
|
11
|
+
- Filter patients by age range and diagnosis
|
12
|
+
- Extract from any FHIR-compliant server
|
13
|
+
- Fetch Conditions and Observations
|
14
|
+
- Automatically de-identify patient IDs
|
15
|
+
- Export results to CSV
|
16
|
+
- Easy-to-use CLI with flexible filters
|
17
|
+
|
18
|
+
---
|
19
|
+
|
20
|
+
## 💻 Installation
|
21
|
+
|
22
|
+
**From PyPI**
|
23
|
+
|
24
|
+
```bash
|
25
|
+
pip install fhir-reada
|
26
|
+
```
|
27
|
+
|
28
|
+
**From Source (for development or testing)**
|
29
|
+
|
30
|
+
```bash
|
31
|
+
git clone https://github.com/TOduah/fhir-reada.git
|
32
|
+
cd fhir-reada
|
33
|
+
pip install -e .
|
34
|
+
```
|
35
|
+
|
36
|
+
---
|
37
|
+
|
38
|
+
## 🚀 Usage (CLI)
|
39
|
+
|
40
|
+
```bash
|
41
|
+
fhir-reada --url https://hapi.fhir.org/baseR4 --min-age 25 --max-age 40 --diagnosis diabetes --out output.csv
|
42
|
+
```
|
43
|
+
|
44
|
+
---
|
45
|
+
|
46
|
+
## ⚙️ CLI Options
|
47
|
+
|
48
|
+
| Option | Description |
|
49
|
+
|----------------|------------------------------------------------------|
|
50
|
+
| `--url` | FHIR base URL (e.g. `https://hapi.fhir.org/baseR4`) |
|
51
|
+
| `--min-age` | Minimum age for cohort (e.g. `25`) |
|
52
|
+
| `--max-age` | Maximum age for cohort (e.g. `40`) |
|
53
|
+
| `--diagnosis` | Diagnosis keyword (optional, e.g. `diabetes`) |
|
54
|
+
| `--out` | Output filename for CSV export |
|
55
|
+
|
56
|
+
---
|
57
|
+
|
58
|
+
## 🏥 Who Is This For?
|
59
|
+
|
60
|
+
- Research institutions running clinical studies
|
61
|
+
- Hospital data science or IT departments
|
62
|
+
- Medical schools teaching FHIR and analytics
|
63
|
+
- Independent developers working with public or private FHIR APIs
|
64
|
+
|
65
|
+
---
|
66
|
+
|
67
|
+
## 📄 License
|
68
|
+
|
69
|
+
This project is licensed under the [MIT License](LICENSE).
|
70
|
+
|
71
|
+
---
|
72
|
+
|
73
|
+
## 💬 Author
|
74
|
+
|
75
|
+
Built by [Tobenna Oduah](https://github.com/toduah).
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = "0.1.0"
|
@@ -0,0 +1,43 @@
|
|
1
|
+
import argparse
|
2
|
+
from fhir_reada.fhir_connector import FHIRClient
|
3
|
+
from fhir_reada.cohort_builder import build_cohort
|
4
|
+
from fhir_reada.deid_utils import deidentify_patient_bundle
|
5
|
+
from fhir_reada.exporter import export_to_csv
|
6
|
+
|
7
|
+
def run_cli(url, min_age, max_age, diagnosis, output_file):
|
8
|
+
client = FHIRClient(url)
|
9
|
+
age_range = (min_age, max_age)
|
10
|
+
|
11
|
+
print(f"Fetching patients aged {min_age}–{max_age} from {url}...")
|
12
|
+
patients = build_cohort(client, age_range)
|
13
|
+
|
14
|
+
bundles = []
|
15
|
+
for patient in patients:
|
16
|
+
pid = patient["id"]
|
17
|
+
conditions = client.get_conditions(pid)
|
18
|
+
|
19
|
+
if diagnosis and not any(diagnosis.lower() in c.lower() for c in conditions):
|
20
|
+
continue
|
21
|
+
|
22
|
+
observations = client.get_observations(pid)
|
23
|
+
bundles.append({
|
24
|
+
"patient": patient,
|
25
|
+
"conditions": conditions,
|
26
|
+
"observations": observations
|
27
|
+
})
|
28
|
+
|
29
|
+
print("De-identifying and exporting to CSV...")
|
30
|
+
deidentified = [deidentify_patient_bundle(b) for b in bundles]
|
31
|
+
export_to_csv(deidentified, filename=output_file)
|
32
|
+
print(f"Exported to {output_file}")
|
33
|
+
|
34
|
+
def main():
|
35
|
+
parser = argparse.ArgumentParser(description="FHIR Cohort Extractor")
|
36
|
+
parser.add_argument("--url", required=True, help="FHIR base URL (e.g., https://hapi.fhir.org/baseR4)")
|
37
|
+
parser.add_argument("--min-age", type=int, default=3)
|
38
|
+
parser.add_argument("--max-age", type=int, default=90)
|
39
|
+
parser.add_argument("--diagnosis", help="Filter for specific condition keyword (e.g., diabetes)")
|
40
|
+
parser.add_argument("--out", required=True, help="Output CSV file")
|
41
|
+
|
42
|
+
args = parser.parse_args()
|
43
|
+
run_cli(args.url, args.min_age, args.max_age, args.diagnosis, args.out)
|
@@ -0,0 +1,28 @@
|
|
1
|
+
from datetime import date, timedelta
|
2
|
+
|
3
|
+
def calculate_birthdate_range(age_range):
|
4
|
+
today = date.today()
|
5
|
+
oldest_birthdate = today - timedelta(days=age_range[1]*365.25)
|
6
|
+
youngest_birthdate = today - timedelta(days=age_range[0]*365.25)
|
7
|
+
return (oldest_birthdate.isoformat(), youngest_birthdate.isoformat())
|
8
|
+
|
9
|
+
def extract_patient_info(entry):
|
10
|
+
resource = entry["resource"]
|
11
|
+
name = resource.get("name", [{}])[0]
|
12
|
+
return {
|
13
|
+
"id": resource.get("id"),
|
14
|
+
"name": " ".join(name.get("given", [])) + " " + name.get("family", ""),
|
15
|
+
"gender": resource.get("gender"),
|
16
|
+
"birthDate": resource.get("birthDate"),
|
17
|
+
}
|
18
|
+
|
19
|
+
def build_cohort(client, age_range):
|
20
|
+
birthdate_range = calculate_birthdate_range(age_range)
|
21
|
+
entries = client.search_patients(birthdate_range)
|
22
|
+
|
23
|
+
patients = []
|
24
|
+
for entry in entries:
|
25
|
+
patient = extract_patient_info(entry)
|
26
|
+
patients.append(patient)
|
27
|
+
|
28
|
+
return patients
|
@@ -0,0 +1,17 @@
|
|
1
|
+
from hashlib import sha256
|
2
|
+
|
3
|
+
def deidentify_patient_bundle(bundle):
|
4
|
+
patient = bundle["patient"]
|
5
|
+
conditions = bundle["conditions"]
|
6
|
+
observations = bundle["observations"]
|
7
|
+
|
8
|
+
patient_id = patient.get("id", "")
|
9
|
+
pseudo_id = sha256(patient_id.encode()).hexdigest()[:10]
|
10
|
+
|
11
|
+
return {
|
12
|
+
"id": pseudo_id,
|
13
|
+
"gender": patient.get("gender"),
|
14
|
+
"birthDate": patient.get("birthDate"),
|
15
|
+
"conditions": "; ".join(conditions) if conditions else "None",
|
16
|
+
"observations": "; ".join(observations) if observations else "None"
|
17
|
+
}
|
@@ -0,0 +1,12 @@
|
|
1
|
+
import csv
|
2
|
+
|
3
|
+
def export_to_csv(patients, filename="cohort.csv"):
|
4
|
+
if not patients:
|
5
|
+
print("No data to export.")
|
6
|
+
return
|
7
|
+
|
8
|
+
keys = patients[0].keys()
|
9
|
+
with open(filename, "w", newline="", encoding="utf-8") as f:
|
10
|
+
writer = csv.DictWriter(f, fieldnames=keys)
|
11
|
+
writer.writeheader()
|
12
|
+
writer.writerows(patients)
|
@@ -0,0 +1,34 @@
|
|
1
|
+
import requests
|
2
|
+
|
3
|
+
class FHIRClient:
|
4
|
+
def __init__(self, base_url):
|
5
|
+
self.base_url = base_url.rstrip('/')
|
6
|
+
|
7
|
+
def search_patients(self, birthdate_range=None):
|
8
|
+
params = {}
|
9
|
+
if birthdate_range:
|
10
|
+
params["birthdate"] = [f"ge{birthdate_range[0]}", f"le{birthdate_range[1]}"]
|
11
|
+
response = requests.get(f"{self.base_url}/Patient", params=params)
|
12
|
+
response.raise_for_status()
|
13
|
+
return response.json().get("entry", [])
|
14
|
+
|
15
|
+
def get_conditions(self, patient_id):
|
16
|
+
url = f"{self.base_url}/Condition?patient={patient_id}"
|
17
|
+
response = requests.get(url)
|
18
|
+
response.raise_for_status()
|
19
|
+
entries = response.json().get("entry", [])
|
20
|
+
return [entry["resource"].get("code", {}).get("text", "Unknown") for entry in entries]
|
21
|
+
|
22
|
+
def get_observations(self, patient_id):
|
23
|
+
url = f"{self.base_url}/Observation?patient={patient_id}"
|
24
|
+
response = requests.get(url)
|
25
|
+
response.raise_for_status()
|
26
|
+
entries = response.json().get("entry", [])
|
27
|
+
result = []
|
28
|
+
for entry in entries:
|
29
|
+
obs = entry["resource"]
|
30
|
+
code = obs.get("code", {}).get("text", "Unknown")
|
31
|
+
value = obs.get("valueQuantity", {}).get("value", "N/A")
|
32
|
+
unit = obs.get("valueQuantity", {}).get("unit", "")
|
33
|
+
result.append(f"{code}: {value} {unit}".strip())
|
34
|
+
return result
|
@@ -0,0 +1,9 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: fhir-reada
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: A FHIR cohort extractor and de-identification tool for clinical research.
|
5
|
+
Author-email: Tobenna Oduah <tjlite81@gmail.com>
|
6
|
+
License: MIT
|
7
|
+
License-File: LICENSE
|
8
|
+
Requires-Dist: requests
|
9
|
+
Requires-Dist: python-dateutil
|
@@ -0,0 +1,15 @@
|
|
1
|
+
LICENSE
|
2
|
+
README.md
|
3
|
+
pyproject.toml
|
4
|
+
fhir_reada/__init__.py
|
5
|
+
fhir_reada/cli.py
|
6
|
+
fhir_reada/cohort_builder.py
|
7
|
+
fhir_reada/deid_utils.py
|
8
|
+
fhir_reada/exporter.py
|
9
|
+
fhir_reada/fhir_connector.py
|
10
|
+
fhir_reada.egg-info/PKG-INFO
|
11
|
+
fhir_reada.egg-info/SOURCES.txt
|
12
|
+
fhir_reada.egg-info/dependency_links.txt
|
13
|
+
fhir_reada.egg-info/entry_points.txt
|
14
|
+
fhir_reada.egg-info/requires.txt
|
15
|
+
fhir_reada.egg-info/top_level.txt
|
@@ -0,0 +1 @@
|
|
1
|
+
|
@@ -0,0 +1 @@
|
|
1
|
+
fhir_reada
|
@@ -0,0 +1,13 @@
|
|
1
|
+
[project]
|
2
|
+
name = "fhir-reada"
|
3
|
+
version = "0.1.0"
|
4
|
+
description = "A FHIR cohort extractor and de-identification tool for clinical research."
|
5
|
+
authors = [{name = "Tobenna Oduah", email = "tjlite81@gmail.com"}]
|
6
|
+
license = {text = "MIT"}
|
7
|
+
dependencies = [
|
8
|
+
"requests",
|
9
|
+
"python-dateutil"
|
10
|
+
]
|
11
|
+
|
12
|
+
[project.scripts]
|
13
|
+
fhir-reada = "fhir_reada.cli:main"
|