fhir-sheets 2.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of fhir-sheets might be problematic. Click here for more details.
- fhir_sheets/__init__.py +0 -0
- fhir_sheets/__init__.pyi +0 -0
- fhir_sheets/cli/__init__.py +0 -0
- fhir_sheets/cli/__init__.pyi +0 -0
- fhir_sheets/cli/main.py +69 -0
- fhir_sheets/cli/main.pyi +6 -0
- fhir_sheets/core/__init__.py +0 -0
- fhir_sheets/core/__init__.pyi +0 -0
- fhir_sheets/core/config/FhirSheetsConfiguration.py +12 -0
- fhir_sheets/core/conversion.py +390 -0
- fhir_sheets/core/conversion.pyi +22 -0
- fhir_sheets/core/fhir_formatting.py +279 -0
- fhir_sheets/core/fhir_formatting.pyi +13 -0
- fhir_sheets/core/model/cohort_data_entity.py +45 -0
- fhir_sheets/core/model/common.py +12 -0
- fhir_sheets/core/model/resource_definition_entity.py +30 -0
- fhir_sheets/core/model/resource_link_entity.py +32 -0
- fhir_sheets/core/read_input.py +102 -0
- fhir_sheets/core/read_input.pyi +8 -0
- fhir_sheets/core/special_values.py +361 -0
- fhir_sheets/core/special_values.pyi +52 -0
- fhir_sheets/core/util.py +1 -0
- fhir_sheets/core/util.pyi +0 -0
- fhir_sheets-2.1.6.dist-info/METADATA +77 -0
- fhir_sheets-2.1.6.dist-info/RECORD +27 -0
- fhir_sheets-2.1.6.dist-info/WHEEL +4 -0
- fhir_sheets-2.1.6.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import datetime
|
|
3
|
+
|
|
4
|
+
#Dictionary of regexes
|
|
5
|
+
type_regexes = {
|
|
6
|
+
'code': r'[^\s]+( [^\s]+)*',
|
|
7
|
+
'decimal': r'-?(0|[1-9][0-9]{0,17})(\.[0-9]{1,17})?([eE][+-]?[0-9]{1,9}})?',
|
|
8
|
+
'id': r'[A-Za-z0-9\-\.]{1,64}',
|
|
9
|
+
'integer': r'[0]|[-+]?[1-9][0-9]*',
|
|
10
|
+
'oid': r'urn:oid:[0-2](\.(0|[1-9][0-9]*))+',
|
|
11
|
+
'positiveInt': r'[1-9][0-9]*',
|
|
12
|
+
'unsignedInt': r'[0]|([1-9][0-9]*)',
|
|
13
|
+
'uuid': r'urn:uuid:[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}'
|
|
14
|
+
}
|
|
15
|
+
# Assign final_struct[key] to value; with formatting given the valueType
|
|
16
|
+
def assign_value(final_struct, key, value, valueType):
|
|
17
|
+
# Removing white space
|
|
18
|
+
if isinstance(value, str):
|
|
19
|
+
value = value.strip()
|
|
20
|
+
# Checking for null or empty string values. If so; we do not construct the value
|
|
21
|
+
if not value:
|
|
22
|
+
return final_struct
|
|
23
|
+
# If the valueType is not provide, do not construct the value.
|
|
24
|
+
if valueType is None:
|
|
25
|
+
return final_struct
|
|
26
|
+
# Swtich case for valueType to construct a value
|
|
27
|
+
try:
|
|
28
|
+
if valueType.lower() == 'address':
|
|
29
|
+
address_value = parse_flexible_address(value)
|
|
30
|
+
if address_value:
|
|
31
|
+
final_struct[key] = address_value
|
|
32
|
+
elif valueType.lower() == 'base64binary':
|
|
33
|
+
final_struct[key] = value
|
|
34
|
+
elif valueType.lower() == 'boolean':
|
|
35
|
+
final_struct[key] = bool(value)
|
|
36
|
+
elif valueType.lower() == 'codeableconcept':
|
|
37
|
+
final_struct[key] = caret_delimited_string_to_codeableconcept(value)
|
|
38
|
+
elif valueType.lower() == 'code':
|
|
39
|
+
match = re.search(type_regexes['code'], value)
|
|
40
|
+
final_struct[key] = match.group(0) if match else ''
|
|
41
|
+
elif valueType.lower() == 'coding':
|
|
42
|
+
final_struct[key] = caret_delimited_string_to_coding(value)
|
|
43
|
+
elif valueType.lower() == 'date':
|
|
44
|
+
if isinstance(value, datetime.datetime):
|
|
45
|
+
final_struct[key] = value.date()
|
|
46
|
+
elif isinstance(value, str):
|
|
47
|
+
final_struct[key] = parse_iso8601_date(value).replace(tzinfo=datetime.timezone.utc)
|
|
48
|
+
elif valueType.lower() == 'datetime':
|
|
49
|
+
if isinstance(value, datetime.datetime):
|
|
50
|
+
final_struct[key] = value.replace(tzinfo=datetime.timezone.utc)
|
|
51
|
+
else:
|
|
52
|
+
final_struct[key] = parse_iso8601_datetime(value).replace(tzinfo=datetime.timezone.utc)
|
|
53
|
+
elif valueType.lower() == 'decimal':
|
|
54
|
+
final_struct[key] = value
|
|
55
|
+
elif valueType.lower() == 'id':
|
|
56
|
+
match = re.search(value, type_regexes['id'])
|
|
57
|
+
final_struct[key] = match.group(0) if match else ''
|
|
58
|
+
elif valueType.lower() == 'instant':
|
|
59
|
+
if isinstance(value, datetime.datetime):
|
|
60
|
+
final_struct[key] = value.replace(tzinfo=datetime.timezone.utc)
|
|
61
|
+
else:
|
|
62
|
+
final_struct[key] = final_struct[key] = parse_iso8601_instant(value).replace(tzinfo=datetime.timezone.utc)
|
|
63
|
+
elif valueType.lower() == 'integer':
|
|
64
|
+
match = re.search(value, type_regexes['integer'])
|
|
65
|
+
final_struct[key] = int(match.group(0)) if match else 0
|
|
66
|
+
elif valueType.lower() == 'oid':
|
|
67
|
+
match = re.search(value, type_regexes['oid'])
|
|
68
|
+
final_struct[key] = match.group(0) if match else ''
|
|
69
|
+
elif valueType.lower() == 'positiveInt':
|
|
70
|
+
match = re.search(value, type_regexes['positiveInt'])
|
|
71
|
+
final_struct[key] = int(match.group(0)) if match else 0
|
|
72
|
+
elif valueType.lower() == 'quantity':
|
|
73
|
+
final_struct[key] = string_to_quantity(value)
|
|
74
|
+
elif valueType.lower() == 'string':
|
|
75
|
+
final_struct[key] = value
|
|
76
|
+
elif valueType.lower() == 'string[]':
|
|
77
|
+
if not key in final_struct:
|
|
78
|
+
final_struct[key] = [value]
|
|
79
|
+
else:
|
|
80
|
+
final_struct[key].append(value)
|
|
81
|
+
elif valueType.lower() == 'time':
|
|
82
|
+
if isinstance(value, datetime.time):
|
|
83
|
+
final_struct[key] = value
|
|
84
|
+
else:
|
|
85
|
+
final_struct[key] = parse_iso8601_time(value)
|
|
86
|
+
elif valueType.lower() == 'unsignedInt':
|
|
87
|
+
match = re.search(value, type_regexes['unsignedInt'])
|
|
88
|
+
final_struct[key] = int(match.group(0)) if match else 0
|
|
89
|
+
elif valueType.lower() == 'uri':
|
|
90
|
+
final_struct[key] = value
|
|
91
|
+
elif valueType.lower() == 'url':
|
|
92
|
+
final_struct[key] = value
|
|
93
|
+
elif valueType.lower() == 'uuid':
|
|
94
|
+
match = re.search(value, type_regexes['uuid'])
|
|
95
|
+
final_struct[key] = match.group(0) if match else ''
|
|
96
|
+
elif valueType.lower() == 'coding':
|
|
97
|
+
if not isinstance(final_struct, list):
|
|
98
|
+
final_struct = []
|
|
99
|
+
final_struct.append(value)
|
|
100
|
+
else:
|
|
101
|
+
print(f"ERROR: - Rending Value - {key} - {value} - {valueType} - Saw a valueType of '{valueType}' unsupported in current formatting")
|
|
102
|
+
except ValueError as e:
|
|
103
|
+
print(e)
|
|
104
|
+
return final_struct
|
|
105
|
+
|
|
106
|
+
def parse_iso8601_date(input_string):
|
|
107
|
+
# Regular expression to match ISO 8601 format with optional timezone 'Z'
|
|
108
|
+
pattern = r'(\d{4}-\d{2}-\d{2})'
|
|
109
|
+
match = re.search(pattern, input_string)
|
|
110
|
+
# Check if the input string matches the pattern
|
|
111
|
+
if match:
|
|
112
|
+
return datetime.datetime.strptime(match.group(1), '%Y-%m-%d')
|
|
113
|
+
else:
|
|
114
|
+
raise ValueError(f"Input string '{input_string}' is not in the valid ISO 8601 date format")
|
|
115
|
+
|
|
116
|
+
def parse_iso8601_datetime(input_string):
|
|
117
|
+
# Regular expression to match ISO 8601 format with optional timezone 'Z'
|
|
118
|
+
pattern = r'(\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}(Z)?)?)'
|
|
119
|
+
match = re.search(pattern, input_string)
|
|
120
|
+
# Check if the input string matches the pattern
|
|
121
|
+
if match:
|
|
122
|
+
# Convert to datetime object
|
|
123
|
+
if input_string.endswith('Z'):
|
|
124
|
+
# If it has 'Z', convert to UTC
|
|
125
|
+
try:
|
|
126
|
+
return datetime.datetime.strptime(match.group(1), '%Y-%m-%dT%H:%M:%S').replace(tzinfo=datetime.timezone.utc)
|
|
127
|
+
except ValueError: # If it fails (because the time part is missing), parse the date-only format and set time to midnight
|
|
128
|
+
try:
|
|
129
|
+
parsed_date = datetime.datetime.strptime(match.group(1), '%Y-%m-%d')
|
|
130
|
+
parsed_datetime = parsed_date.replace(hour=0, minute=0, second=0)
|
|
131
|
+
return parsed_datetime
|
|
132
|
+
except ValueError: # Neither format worked so catch an entire error
|
|
133
|
+
raise ValueError(f"Input string '{input_string}' is not in the valid ISO 8601 format date or datetime format")
|
|
134
|
+
else:
|
|
135
|
+
# Otherwise, just convert without timezone
|
|
136
|
+
try:
|
|
137
|
+
return datetime.datetime.strptime(match.group(1), '%Y-%m-%dT%H:%M:%S').replace(tzinfo=datetime.timezone.utc)
|
|
138
|
+
except ValueError: # If it fails (because the time part is missing), parse the date-only format and set time to midnight
|
|
139
|
+
try:
|
|
140
|
+
parsed_date = datetime.datetime.strptime(match.group(1), '%Y-%m-%d')
|
|
141
|
+
parsed_datetime = parsed_date.replace(hour=0, minute=0, second=0)
|
|
142
|
+
return parsed_datetime
|
|
143
|
+
except ValueError: # Neither format worked so catch an entire error
|
|
144
|
+
raise ValueError(f"Input string '{input_string}' is not in the valid ISO 8601 format date or datetime format")
|
|
145
|
+
else:
|
|
146
|
+
raise ValueError(f"Input string '{input_string}' is not in the valid ISO 8601 format date or datetime format")
|
|
147
|
+
|
|
148
|
+
def parse_iso8601_instant(input_string):
|
|
149
|
+
# Regular expression to match ISO 8601 instant format with optional milliseconds and 'Z'
|
|
150
|
+
pattern = r'(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d{1,3})?(Z)?)'
|
|
151
|
+
match = re.search(pattern, input_string)
|
|
152
|
+
# Check if the input string matches the pattern
|
|
153
|
+
if match:
|
|
154
|
+
# If it ends with 'Z', it's UTC
|
|
155
|
+
if input_string.endswith('Z'):
|
|
156
|
+
if '.' in input_string:
|
|
157
|
+
# With milliseconds
|
|
158
|
+
return datetime.datetime.strptime(match.group(1), '%Y-%m-%dT%H:%M:%S.%f').replace(tzinfo=datetime.timezone.utc)
|
|
159
|
+
else:
|
|
160
|
+
# Without milliseconds
|
|
161
|
+
return datetime.datetime.strptime(match.group(1), '%Y-%m-%dT%H:%M:%S').replace(tzinfo=datetime.timezone.utc)
|
|
162
|
+
else:
|
|
163
|
+
if '.' in input_string:
|
|
164
|
+
# With milliseconds
|
|
165
|
+
return datetime.datetime.strptime(match.group(1), '%Y-%m-%dT%H:%M:%S.%f')
|
|
166
|
+
else:
|
|
167
|
+
# Without milliseconds
|
|
168
|
+
return datetime.datetime.strptime(match.group(1), '%Y-%m-%dT%H:%M:%S')
|
|
169
|
+
else:
|
|
170
|
+
raise ValueError(f"Input string '{input_string}' is not in the valid ISO 8601 instant format")
|
|
171
|
+
|
|
172
|
+
def parse_iso8601_time(input_string):
|
|
173
|
+
# Regular expression to match the time format HH:MM:SS or HH:MM:SS.ssssss
|
|
174
|
+
pattern = r'((?:[01][0-9]|2[0-3]):[0-5][0-9]:([0-5][0-9]|60)(\.[0-9]{1,9})?)'
|
|
175
|
+
match = re.search(pattern, input_string)
|
|
176
|
+
# Check if the input string matches the pattern
|
|
177
|
+
if match:
|
|
178
|
+
# Parse the time
|
|
179
|
+
time_parts = match.group(1).split(':')
|
|
180
|
+
hours = int(time_parts[0])
|
|
181
|
+
minutes = int(time_parts[1])
|
|
182
|
+
seconds = float(time_parts[2]) # This can handle the fractional part
|
|
183
|
+
|
|
184
|
+
return datetime.time(hour=hours, minute=minutes, second=int(seconds), microsecond=int((seconds % 1) * 1_000_000))
|
|
185
|
+
else:
|
|
186
|
+
raise ValueError(f"Input string '{input_string}' is not in the valid time format")
|
|
187
|
+
|
|
188
|
+
def parse_flexible_address(address):
|
|
189
|
+
# Attempt to capture postal code, which is often at the end and typically numeric (though it may vary internationally)
|
|
190
|
+
postal_code_pattern = r'(?P<postalCode>\d{5}(?:-\d{4})?|)'
|
|
191
|
+
|
|
192
|
+
# State is typically a two-letter code (though this may vary internationally as well)
|
|
193
|
+
state_pattern = r'(?P<state>[A-Za-z]{2}|)'
|
|
194
|
+
|
|
195
|
+
# This captures a country after a comma (or space-separated) if it's present
|
|
196
|
+
country_pattern = r'(?:\s*(?P<country>[\w\s]+|))?$'
|
|
197
|
+
|
|
198
|
+
# Compile the full pattern to match the postal code, state, and country
|
|
199
|
+
full_pattern = rf'^(?P<line>.*?)\^(?P<city>.*?)\^(?P<district>.*?)\^{postal_code_pattern}\^{state_pattern}\^{country_pattern}'
|
|
200
|
+
|
|
201
|
+
match = re.search(full_pattern, address)
|
|
202
|
+
|
|
203
|
+
if match:
|
|
204
|
+
# Extract the components found in the regex
|
|
205
|
+
result = {k: v for k, v in match.groupdict().items() if v not in ("", None)}
|
|
206
|
+
if not result:
|
|
207
|
+
return None
|
|
208
|
+
#Assign the line as an array of 1
|
|
209
|
+
if result['line'] and isinstance(result['line'], str):
|
|
210
|
+
result['line'] = [result['line']]
|
|
211
|
+
return result
|
|
212
|
+
else:
|
|
213
|
+
return None # Return None if the format doesn't match
|
|
214
|
+
|
|
215
|
+
def caret_delimited_string_to_codeableconcept(caret_delimited_str):
|
|
216
|
+
# Split the string by '~' to separate multiple codings
|
|
217
|
+
codings = caret_delimited_str.split('~')
|
|
218
|
+
|
|
219
|
+
# Initialize the CodeableConcept dictionary
|
|
220
|
+
codeable_concept = {"coding": []}
|
|
221
|
+
parts = []
|
|
222
|
+
# Loop over each coding section
|
|
223
|
+
for coding_str in codings:
|
|
224
|
+
# Split each part by '^' to get system, code, and display (optionally text at the end)
|
|
225
|
+
parts = coding_str.split('^')
|
|
226
|
+
|
|
227
|
+
# Create a coding dictionary from the components
|
|
228
|
+
coding_dict = {}
|
|
229
|
+
if len(parts) > 0:
|
|
230
|
+
coding_dict['system'] = parts[0] if parts[0] else ''
|
|
231
|
+
if len(parts) > 1:
|
|
232
|
+
coding_dict['code'] = parts[1] if parts[1] else ''
|
|
233
|
+
if len(parts) > 2:
|
|
234
|
+
coding_dict['display'] = parts[2] if parts[2] else ''
|
|
235
|
+
|
|
236
|
+
# Add coding to the 'coding' list in CodeableConcept
|
|
237
|
+
codeable_concept['coding'].append(coding_dict)
|
|
238
|
+
|
|
239
|
+
# Check if the last element contains 'text' (for the entire CodeableConcept)
|
|
240
|
+
if len(parts) == 4:
|
|
241
|
+
codeable_concept['text'] = parts[3]
|
|
242
|
+
return codeable_concept
|
|
243
|
+
|
|
244
|
+
def caret_delimited_string_to_coding(caret_delimited_str):
|
|
245
|
+
# Split the string by '~' to separate multiple codings
|
|
246
|
+
|
|
247
|
+
# Initialize the CodeableConcept dictionary
|
|
248
|
+
coding = {}
|
|
249
|
+
|
|
250
|
+
parts = caret_delimited_str.split('^')
|
|
251
|
+
|
|
252
|
+
# Create a coding dictionary from the components
|
|
253
|
+
if len(parts) > 0:
|
|
254
|
+
coding['system'] = parts[0] if parts[0] else ''
|
|
255
|
+
if len(parts) > 1:
|
|
256
|
+
coding['code'] = parts[1] if parts[1] else ''
|
|
257
|
+
if len(parts) > 2:
|
|
258
|
+
coding['display'] = parts[2] if parts[2] else ''
|
|
259
|
+
return coding
|
|
260
|
+
|
|
261
|
+
def string_to_quantity(quantity_str):
|
|
262
|
+
# Split the string into value and unit by whitespace
|
|
263
|
+
parts = quantity_str.split('^',maxsplit=1)
|
|
264
|
+
|
|
265
|
+
# Initialize the Quantity dictionary
|
|
266
|
+
quantity = {}
|
|
267
|
+
|
|
268
|
+
# First part is the value (convert to float)
|
|
269
|
+
if len(parts) > 0:
|
|
270
|
+
quantity['value'] = float(parts[0])
|
|
271
|
+
|
|
272
|
+
# Second part is the unit (if present)
|
|
273
|
+
if len(parts) > 1:
|
|
274
|
+
quantity['unit'] = parts[1]
|
|
275
|
+
quantity['system'] = 'http://unitsofmeasure.org'
|
|
276
|
+
quantity['code'] = parts[1]
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
return quantity
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from _typeshed import Incomplete
|
|
2
|
+
|
|
3
|
+
type_regexes: Incomplete
|
|
4
|
+
|
|
5
|
+
def assign_value(final_struct, key, value, valueType): ...
|
|
6
|
+
def parse_iso8601_date(input_string): ...
|
|
7
|
+
def parse_iso8601_datetime(input_string): ...
|
|
8
|
+
def parse_iso8601_instant(input_string): ...
|
|
9
|
+
def parse_iso8601_time(input_string): ...
|
|
10
|
+
def parse_flexible_address(address): ...
|
|
11
|
+
def caret_delimited_string_to_codeableconcept(caret_delimited_str): ...
|
|
12
|
+
def caret_delimited_string_to_coding(caret_delimited_str): ...
|
|
13
|
+
def string_to_quantity(quantity_str): ...
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
from typing import Dict, Any, List, Optional, Tuple
|
|
2
|
+
|
|
3
|
+
from .common import get_value_from_keys
|
|
4
|
+
|
|
5
|
+
class HeaderEntry:
|
|
6
|
+
def __init__(self, entityName, fieldName, jsonPath, valueType, valueSets):
|
|
7
|
+
self.entityName: Optional[str] = entityName
|
|
8
|
+
self.fieldName: Optional[str] = fieldName
|
|
9
|
+
self.jsonPath: Optional[str] = jsonPath
|
|
10
|
+
self.valueType: Optional[str] = valueType
|
|
11
|
+
self.valueSets: Optional[str] = valueSets
|
|
12
|
+
|
|
13
|
+
@classmethod
|
|
14
|
+
def from_dict(cls, data: Dict[str, Any]):
|
|
15
|
+
return cls(get_value_from_keys(data, ['entityName', 'entity_name'], ''), get_value_from_keys(data, ['fieldName', 'field_name'], ''),get_value_from_keys(data, ['jsonPath', 'json_path'], ''),get_value_from_keys(data, ['valueType', "value_type"], ''),get_value_from_keys(data, ['valueSets', 'value_sets'], ''))
|
|
16
|
+
|
|
17
|
+
def __repr__(self) -> str:
|
|
18
|
+
return (f"\nHeaderEntry(entityName='{self.entityName}', \n\tfieldName='{self.fieldName}', \n\tjsonPath='{self.jsonPath}',\n\tvalueType='{self.valueType}', "
|
|
19
|
+
f"\n\tvalueSets='{self.valueSets}')")
|
|
20
|
+
|
|
21
|
+
class PatientEntry:
|
|
22
|
+
def __init__(self, entries:Dict[Tuple[str,str],str]):
|
|
23
|
+
self.entries = entries
|
|
24
|
+
|
|
25
|
+
@classmethod
|
|
26
|
+
def from_dict(cls, entries:Dict[Tuple[str,str],str]):
|
|
27
|
+
return cls(entries)
|
|
28
|
+
|
|
29
|
+
def __repr__(self) -> str:
|
|
30
|
+
return (f"PatientEntry(\n\t'{self.entries}')")
|
|
31
|
+
|
|
32
|
+
class CohortData:
|
|
33
|
+
def __init__(self, headers: List[HeaderEntry], patients: List[PatientEntry]):
|
|
34
|
+
self.headers = headers
|
|
35
|
+
self.patients = patients
|
|
36
|
+
|
|
37
|
+
@classmethod
|
|
38
|
+
def from_dict(cls, headers: List[Dict[str, Any]], patients: List[Dict[Tuple[str,str],str]]):
|
|
39
|
+
return cls([HeaderEntry.from_dict(header) for header in headers], [PatientEntry.from_dict(patient) for patient in patients])
|
|
40
|
+
|
|
41
|
+
def __repr__(self) -> str:
|
|
42
|
+
return (f"CohortData(\n\t-----\n\theaders='{self.headers}',\n\t-----\n\tpatients='{self.patients}')")
|
|
43
|
+
|
|
44
|
+
def get_num_patients(self):
|
|
45
|
+
return len(self.patients)
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
|
|
2
|
+
from typing import Any, Dict, List
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def get_value_from_keys(data: Dict[str, Any], keys: List[str], default: Any) -> Any:
|
|
6
|
+
lower_data = {k.lower(): v for k, v in data.items()}
|
|
7
|
+
"""Helper function to find the first existing key and return its value."""
|
|
8
|
+
for key in keys:
|
|
9
|
+
lower_key = key.lower()
|
|
10
|
+
if lower_key in lower_data:
|
|
11
|
+
return lower_data[lower_key]
|
|
12
|
+
return default
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from typing import Any, Dict, List, Optional
|
|
2
|
+
|
|
3
|
+
from .common import get_value_from_keys
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ResourceDefinition:
|
|
7
|
+
"""
|
|
8
|
+
A class to represent a Resource Definition for FHIR initialization.
|
|
9
|
+
"""
|
|
10
|
+
entityName_keys = ['Entity Name', 'name', 'entity_name']
|
|
11
|
+
resourceType_keys = ['ResourceType', 'resource_type', 'type']
|
|
12
|
+
profile_keys = ['Profile(s)', 'profiles', 'profile_list']
|
|
13
|
+
|
|
14
|
+
def __init__(self, entityName: str, resourceType: str, profiles: List[str]):
|
|
15
|
+
"""
|
|
16
|
+
Initializes the ResourceLink object from a dictionary.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
data: A dictionary containing 'EntityName', 'ResourceType', and 'Profile(s)'.
|
|
20
|
+
"""
|
|
21
|
+
self.entityName = entityName
|
|
22
|
+
self.resourceType = resourceType
|
|
23
|
+
self.profiles = profiles
|
|
24
|
+
|
|
25
|
+
@classmethod
|
|
26
|
+
def from_dict(cls, data: Dict[str, Any]):
|
|
27
|
+
return cls(get_value_from_keys(data, cls.entityName_keys, ''), get_value_from_keys(data, cls.resourceType_keys, ''), get_value_from_keys(data, cls.profile_keys, []))
|
|
28
|
+
|
|
29
|
+
def __repr__(self) -> str:
|
|
30
|
+
return f"ResourceDefinition(entityName='{self.entityName}', resourceType='{self.resourceType}', profiles={self.profiles})"
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from typing import Any, Dict
|
|
2
|
+
|
|
3
|
+
from .common import get_value_from_keys
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ResourceLink:
|
|
7
|
+
"""
|
|
8
|
+
A class to represent a Fhir Reference between two resources.
|
|
9
|
+
"""
|
|
10
|
+
originResource_keys = ['OriginResource', 'Origin Resource', 'origin_resource']
|
|
11
|
+
referencePath_keys = ['ReferencePath', 'Reference Path', 'reference_path']
|
|
12
|
+
destinationResource_keys = ['DestinationResource', 'Destination Resource', 'destination_resource']
|
|
13
|
+
def __init__(self, originResource: str, referencePath: str, destinationResource: str):
|
|
14
|
+
"""
|
|
15
|
+
Initializes the ResourceLink object from a dictionary.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
data: A dictionary containing 'OriginResource', 'ReferencePath', and 'DestinationResource'.
|
|
19
|
+
"""
|
|
20
|
+
self.originResource = originResource
|
|
21
|
+
self.referencePath = referencePath
|
|
22
|
+
self.destinationResource = destinationResource
|
|
23
|
+
|
|
24
|
+
@classmethod
|
|
25
|
+
def from_dict(cls, data: Dict[str, Any]):
|
|
26
|
+
return cls(get_value_from_keys(data, cls.originResource_keys, ''), get_value_from_keys(data, cls.referencePath_keys, ''),
|
|
27
|
+
get_value_from_keys(data, cls.destinationResource_keys, ''))
|
|
28
|
+
|
|
29
|
+
def __repr__(self) -> str:
|
|
30
|
+
return (f"ResourceLink(originResource='{self.originResource}', "
|
|
31
|
+
f"referencePath='{self.referencePath}', "
|
|
32
|
+
f"destinationResource='{self.destinationResource}')")
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
import openpyxl
|
|
3
|
+
|
|
4
|
+
from .model.cohort_data_entity import CohortData, CohortData
|
|
5
|
+
|
|
6
|
+
from .model.resource_definition_entity import ResourceDefinition
|
|
7
|
+
from .model.resource_link_entity import ResourceLink
|
|
8
|
+
|
|
9
|
+
# Function to read the xlsx file and access specific sheets
|
|
10
|
+
def read_xlsx_and_process(file_path):
|
|
11
|
+
# Load the workbook
|
|
12
|
+
workbook = openpyxl.load_workbook(file_path)
|
|
13
|
+
resource_definition_entities = []
|
|
14
|
+
resource_link_entities = []
|
|
15
|
+
cohort_data = CohortData.from_dict([],[])
|
|
16
|
+
# Example of accessing specific sheets
|
|
17
|
+
if 'ResourceDefinitions' in workbook.sheetnames:
|
|
18
|
+
sheet = workbook['ResourceDefinitions']
|
|
19
|
+
resource_definition_entities = process_sheet_resource_definitions(sheet)
|
|
20
|
+
|
|
21
|
+
if 'ResourceLinks' in workbook.sheetnames:
|
|
22
|
+
sheet = workbook['ResourceLinks']
|
|
23
|
+
resource_link_entities = process_sheet_resource_links(sheet)
|
|
24
|
+
|
|
25
|
+
if 'PatientData' in workbook.sheetnames:
|
|
26
|
+
sheet = workbook['PatientData']
|
|
27
|
+
cohort_data = process_sheet_patient_data_revised(sheet, resource_definition_entities)
|
|
28
|
+
|
|
29
|
+
return resource_definition_entities, resource_link_entities, cohort_data
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# Function to process the specific sheet with 'Entity Name', 'ResourceType', and 'Profile(s)'
|
|
33
|
+
def process_sheet_resource_definitions(sheet) -> List[ResourceDefinition]:
|
|
34
|
+
resource_definitions = []
|
|
35
|
+
resource_definition_entities = []
|
|
36
|
+
headers = [cell.value for cell in next(sheet.iter_rows(min_row=1, max_row=1))] # Get headers
|
|
37
|
+
|
|
38
|
+
for row in sheet.iter_rows(min_row=3, values_only=True):
|
|
39
|
+
row_data = dict((h, r) for h, r in zip(headers, row) if h is not None) # Create a dictionary for each row
|
|
40
|
+
if all(cell is None or cell == "" for cell in row_data.values()):
|
|
41
|
+
continue
|
|
42
|
+
# Split 'Profile(s)' column into a list of URLs
|
|
43
|
+
if row_data.get("Profile(s)"):
|
|
44
|
+
row_data["Profile(s)"] = [url.strip() for url in row_data["Profile(s)"].split(",")]
|
|
45
|
+
resource_definition_entities.append(ResourceDefinition.from_dict(row_data))
|
|
46
|
+
resource_definitions.append(row_data)
|
|
47
|
+
print(f"Resource Definitions\n----------{resource_definitions}")
|
|
48
|
+
return resource_definition_entities
|
|
49
|
+
|
|
50
|
+
# Function to process the specific sheet with 'OriginResource', 'ReferencePath', and 'DestinationResource'
|
|
51
|
+
def process_sheet_resource_links(sheet) -> List[ResourceLink]:
|
|
52
|
+
resource_links = []
|
|
53
|
+
resource_link_entities = []
|
|
54
|
+
headers = [cell.value for cell in next(sheet.iter_rows(min_row=1, max_row=1))] # Get headers
|
|
55
|
+
for row in sheet.iter_rows(min_row=3, values_only=True):
|
|
56
|
+
row_data = dict(zip(headers, row)) # Create a dictionary for each row
|
|
57
|
+
if all(cell is None or cell == "" for cell in row_data):
|
|
58
|
+
continue
|
|
59
|
+
resource_links.append(row_data)
|
|
60
|
+
resource_link_entities.append(ResourceLink.from_dict(row_data))
|
|
61
|
+
print(f"Resource Links\n----------{resource_links}")
|
|
62
|
+
return resource_link_entities
|
|
63
|
+
|
|
64
|
+
# Function to process the "PatientData" sheet for the Revised CohortData
|
|
65
|
+
def process_sheet_patient_data_revised(sheet, resource_definition_entities):
|
|
66
|
+
headers = []
|
|
67
|
+
patients = []
|
|
68
|
+
# Initialize the dictionary to store the processed data
|
|
69
|
+
# Process the Header Entries from the first 6 rows (Entity To Query, JsonPath, etc.) and the data from the rest.
|
|
70
|
+
for col in sheet.iter_cols(min_row=1, min_col=3, values_only=True): # Start from 3rd column
|
|
71
|
+
if all(entry is None for entry in col):
|
|
72
|
+
continue
|
|
73
|
+
entity_name = col[0] # The entity name comes from the first row (Entity To Query)
|
|
74
|
+
field_name = col[5] #The "Data Element" comes from the fifth row
|
|
75
|
+
if (entity_name is None or entity_name == "") and (field_name is not None and field_name != ""):
|
|
76
|
+
print(f"WARNING: - Reading Patient Data Issue - {field_name} - 'Entity To Query' cell missing for column labelled '{field_name}', please provide entity name from the ResourceDefinitions tab.")
|
|
77
|
+
|
|
78
|
+
if entity_name not in [entry.entityName for entry in resource_definition_entities]:
|
|
79
|
+
print(f"WARNING: - Reading Patient Data Issue - {field_name} - 'Entity To Query' cell has entity named '{entity_name}', however, the ResourceDefinition tab has no matching resource. Please provide a corresponding entry in the ResourceDefinition tab.")
|
|
80
|
+
|
|
81
|
+
# Create a header entry
|
|
82
|
+
header_data = {
|
|
83
|
+
"fieldName": field_name,
|
|
84
|
+
"entityName": entity_name,
|
|
85
|
+
"jsonPath": col[1], # JsonPath from the second row
|
|
86
|
+
"valueType": col[2], # Value Type from the third row
|
|
87
|
+
"valueSets": col[3] # Value Set from the fourth row
|
|
88
|
+
}
|
|
89
|
+
headers.append(header_data)
|
|
90
|
+
# Create a data entry
|
|
91
|
+
values = col[6:] # The values come from the 6th row and below
|
|
92
|
+
values = tuple(item for item in values if item is not None)
|
|
93
|
+
#Expand the patient dictionary set if needed
|
|
94
|
+
if len(values) > len(patients):
|
|
95
|
+
needed_count = len(values) - len(patients)
|
|
96
|
+
patients.extend([{}] * needed_count)
|
|
97
|
+
for patient_dict, value in zip(patients, values):
|
|
98
|
+
patient_dict[(entity_name, field_name)] = value
|
|
99
|
+
print(f"Headers\n----------{headers}")
|
|
100
|
+
print(f"Patients\n----------{patients}")
|
|
101
|
+
cohort_data = CohortData.from_dict(headers=headers, patients=patients)
|
|
102
|
+
return cohort_data
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
from .model.cohort_data_entity import CohortData as CohortData
|
|
2
|
+
from .model.resource_definition_entity import ResourceDefinition as ResourceDefinition
|
|
3
|
+
from .model.resource_link_entity import ResourceLink as ResourceLink
|
|
4
|
+
|
|
5
|
+
def read_xlsx_and_process(file_path): ...
|
|
6
|
+
def process_sheet_resource_definitions(sheet) -> list[ResourceDefinition]: ...
|
|
7
|
+
def process_sheet_resource_links(sheet) -> list[ResourceLink]: ...
|
|
8
|
+
def process_sheet_patient_data_revised(sheet, resource_definition_entities): ...
|