fhir-sheets 2.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of fhir-sheets might be problematic. Click here for more details.

@@ -0,0 +1,279 @@
1
+ import re
2
+ import datetime
3
+
4
+ #Dictionary of regexes
5
+ type_regexes = {
6
+ 'code': r'[^\s]+( [^\s]+)*',
7
+ 'decimal': r'-?(0|[1-9][0-9]{0,17})(\.[0-9]{1,17})?([eE][+-]?[0-9]{1,9}})?',
8
+ 'id': r'[A-Za-z0-9\-\.]{1,64}',
9
+ 'integer': r'[0]|[-+]?[1-9][0-9]*',
10
+ 'oid': r'urn:oid:[0-2](\.(0|[1-9][0-9]*))+',
11
+ 'positiveInt': r'[1-9][0-9]*',
12
+ 'unsignedInt': r'[0]|([1-9][0-9]*)',
13
+ 'uuid': r'urn:uuid:[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}'
14
+ }
15
+ # Assign final_struct[key] to value; with formatting given the valueType
16
+ def assign_value(final_struct, key, value, valueType):
17
+ # Removing white space
18
+ if isinstance(value, str):
19
+ value = value.strip()
20
+ # Checking for null or empty string values. If so; we do not construct the value
21
+ if not value:
22
+ return final_struct
23
+ # If the valueType is not provide, do not construct the value.
24
+ if valueType is None:
25
+ return final_struct
26
+ # Swtich case for valueType to construct a value
27
+ try:
28
+ if valueType.lower() == 'address':
29
+ address_value = parse_flexible_address(value)
30
+ if address_value:
31
+ final_struct[key] = address_value
32
+ elif valueType.lower() == 'base64binary':
33
+ final_struct[key] = value
34
+ elif valueType.lower() == 'boolean':
35
+ final_struct[key] = bool(value)
36
+ elif valueType.lower() == 'codeableconcept':
37
+ final_struct[key] = caret_delimited_string_to_codeableconcept(value)
38
+ elif valueType.lower() == 'code':
39
+ match = re.search(type_regexes['code'], value)
40
+ final_struct[key] = match.group(0) if match else ''
41
+ elif valueType.lower() == 'coding':
42
+ final_struct[key] = caret_delimited_string_to_coding(value)
43
+ elif valueType.lower() == 'date':
44
+ if isinstance(value, datetime.datetime):
45
+ final_struct[key] = value.date()
46
+ elif isinstance(value, str):
47
+ final_struct[key] = parse_iso8601_date(value).replace(tzinfo=datetime.timezone.utc)
48
+ elif valueType.lower() == 'datetime':
49
+ if isinstance(value, datetime.datetime):
50
+ final_struct[key] = value.replace(tzinfo=datetime.timezone.utc)
51
+ else:
52
+ final_struct[key] = parse_iso8601_datetime(value).replace(tzinfo=datetime.timezone.utc)
53
+ elif valueType.lower() == 'decimal':
54
+ final_struct[key] = value
55
+ elif valueType.lower() == 'id':
56
+ match = re.search(value, type_regexes['id'])
57
+ final_struct[key] = match.group(0) if match else ''
58
+ elif valueType.lower() == 'instant':
59
+ if isinstance(value, datetime.datetime):
60
+ final_struct[key] = value.replace(tzinfo=datetime.timezone.utc)
61
+ else:
62
+ final_struct[key] = final_struct[key] = parse_iso8601_instant(value).replace(tzinfo=datetime.timezone.utc)
63
+ elif valueType.lower() == 'integer':
64
+ match = re.search(value, type_regexes['integer'])
65
+ final_struct[key] = int(match.group(0)) if match else 0
66
+ elif valueType.lower() == 'oid':
67
+ match = re.search(value, type_regexes['oid'])
68
+ final_struct[key] = match.group(0) if match else ''
69
+ elif valueType.lower() == 'positiveInt':
70
+ match = re.search(value, type_regexes['positiveInt'])
71
+ final_struct[key] = int(match.group(0)) if match else 0
72
+ elif valueType.lower() == 'quantity':
73
+ final_struct[key] = string_to_quantity(value)
74
+ elif valueType.lower() == 'string':
75
+ final_struct[key] = value
76
+ elif valueType.lower() == 'string[]':
77
+ if not key in final_struct:
78
+ final_struct[key] = [value]
79
+ else:
80
+ final_struct[key].append(value)
81
+ elif valueType.lower() == 'time':
82
+ if isinstance(value, datetime.time):
83
+ final_struct[key] = value
84
+ else:
85
+ final_struct[key] = parse_iso8601_time(value)
86
+ elif valueType.lower() == 'unsignedInt':
87
+ match = re.search(value, type_regexes['unsignedInt'])
88
+ final_struct[key] = int(match.group(0)) if match else 0
89
+ elif valueType.lower() == 'uri':
90
+ final_struct[key] = value
91
+ elif valueType.lower() == 'url':
92
+ final_struct[key] = value
93
+ elif valueType.lower() == 'uuid':
94
+ match = re.search(value, type_regexes['uuid'])
95
+ final_struct[key] = match.group(0) if match else ''
96
+ elif valueType.lower() == 'coding':
97
+ if not isinstance(final_struct, list):
98
+ final_struct = []
99
+ final_struct.append(value)
100
+ else:
101
+ print(f"ERROR: - Rending Value - {key} - {value} - {valueType} - Saw a valueType of '{valueType}' unsupported in current formatting")
102
+ except ValueError as e:
103
+ print(e)
104
+ return final_struct
105
+
106
+ def parse_iso8601_date(input_string):
107
+ # Regular expression to match ISO 8601 format with optional timezone 'Z'
108
+ pattern = r'(\d{4}-\d{2}-\d{2})'
109
+ match = re.search(pattern, input_string)
110
+ # Check if the input string matches the pattern
111
+ if match:
112
+ return datetime.datetime.strptime(match.group(1), '%Y-%m-%d')
113
+ else:
114
+ raise ValueError(f"Input string '{input_string}' is not in the valid ISO 8601 date format")
115
+
116
+ def parse_iso8601_datetime(input_string):
117
+ # Regular expression to match ISO 8601 format with optional timezone 'Z'
118
+ pattern = r'(\d{4}-\d{2}-\d{2}(T\d{2}:\d{2}:\d{2}(Z)?)?)'
119
+ match = re.search(pattern, input_string)
120
+ # Check if the input string matches the pattern
121
+ if match:
122
+ # Convert to datetime object
123
+ if input_string.endswith('Z'):
124
+ # If it has 'Z', convert to UTC
125
+ try:
126
+ return datetime.datetime.strptime(match.group(1), '%Y-%m-%dT%H:%M:%S').replace(tzinfo=datetime.timezone.utc)
127
+ except ValueError: # If it fails (because the time part is missing), parse the date-only format and set time to midnight
128
+ try:
129
+ parsed_date = datetime.datetime.strptime(match.group(1), '%Y-%m-%d')
130
+ parsed_datetime = parsed_date.replace(hour=0, minute=0, second=0)
131
+ return parsed_datetime
132
+ except ValueError: # Neither format worked so catch an entire error
133
+ raise ValueError(f"Input string '{input_string}' is not in the valid ISO 8601 format date or datetime format")
134
+ else:
135
+ # Otherwise, just convert without timezone
136
+ try:
137
+ return datetime.datetime.strptime(match.group(1), '%Y-%m-%dT%H:%M:%S').replace(tzinfo=datetime.timezone.utc)
138
+ except ValueError: # If it fails (because the time part is missing), parse the date-only format and set time to midnight
139
+ try:
140
+ parsed_date = datetime.datetime.strptime(match.group(1), '%Y-%m-%d')
141
+ parsed_datetime = parsed_date.replace(hour=0, minute=0, second=0)
142
+ return parsed_datetime
143
+ except ValueError: # Neither format worked so catch an entire error
144
+ raise ValueError(f"Input string '{input_string}' is not in the valid ISO 8601 format date or datetime format")
145
+ else:
146
+ raise ValueError(f"Input string '{input_string}' is not in the valid ISO 8601 format date or datetime format")
147
+
148
+ def parse_iso8601_instant(input_string):
149
+ # Regular expression to match ISO 8601 instant format with optional milliseconds and 'Z'
150
+ pattern = r'(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d{1,3})?(Z)?)'
151
+ match = re.search(pattern, input_string)
152
+ # Check if the input string matches the pattern
153
+ if match:
154
+ # If it ends with 'Z', it's UTC
155
+ if input_string.endswith('Z'):
156
+ if '.' in input_string:
157
+ # With milliseconds
158
+ return datetime.datetime.strptime(match.group(1), '%Y-%m-%dT%H:%M:%S.%f').replace(tzinfo=datetime.timezone.utc)
159
+ else:
160
+ # Without milliseconds
161
+ return datetime.datetime.strptime(match.group(1), '%Y-%m-%dT%H:%M:%S').replace(tzinfo=datetime.timezone.utc)
162
+ else:
163
+ if '.' in input_string:
164
+ # With milliseconds
165
+ return datetime.datetime.strptime(match.group(1), '%Y-%m-%dT%H:%M:%S.%f')
166
+ else:
167
+ # Without milliseconds
168
+ return datetime.datetime.strptime(match.group(1), '%Y-%m-%dT%H:%M:%S')
169
+ else:
170
+ raise ValueError(f"Input string '{input_string}' is not in the valid ISO 8601 instant format")
171
+
172
+ def parse_iso8601_time(input_string):
173
+ # Regular expression to match the time format HH:MM:SS or HH:MM:SS.ssssss
174
+ pattern = r'((?:[01][0-9]|2[0-3]):[0-5][0-9]:([0-5][0-9]|60)(\.[0-9]{1,9})?)'
175
+ match = re.search(pattern, input_string)
176
+ # Check if the input string matches the pattern
177
+ if match:
178
+ # Parse the time
179
+ time_parts = match.group(1).split(':')
180
+ hours = int(time_parts[0])
181
+ minutes = int(time_parts[1])
182
+ seconds = float(time_parts[2]) # This can handle the fractional part
183
+
184
+ return datetime.time(hour=hours, minute=minutes, second=int(seconds), microsecond=int((seconds % 1) * 1_000_000))
185
+ else:
186
+ raise ValueError(f"Input string '{input_string}' is not in the valid time format")
187
+
188
+ def parse_flexible_address(address):
189
+ # Attempt to capture postal code, which is often at the end and typically numeric (though it may vary internationally)
190
+ postal_code_pattern = r'(?P<postalCode>\d{5}(?:-\d{4})?|)'
191
+
192
+ # State is typically a two-letter code (though this may vary internationally as well)
193
+ state_pattern = r'(?P<state>[A-Za-z]{2}|)'
194
+
195
+ # This captures a country after a comma (or space-separated) if it's present
196
+ country_pattern = r'(?:\s*(?P<country>[\w\s]+|))?$'
197
+
198
+ # Compile the full pattern to match the postal code, state, and country
199
+ full_pattern = rf'^(?P<line>.*?)\^(?P<city>.*?)\^(?P<district>.*?)\^{postal_code_pattern}\^{state_pattern}\^{country_pattern}'
200
+
201
+ match = re.search(full_pattern, address)
202
+
203
+ if match:
204
+ # Extract the components found in the regex
205
+ result = {k: v for k, v in match.groupdict().items() if v not in ("", None)}
206
+ if not result:
207
+ return None
208
+ #Assign the line as an array of 1
209
+ if result['line'] and isinstance(result['line'], str):
210
+ result['line'] = [result['line']]
211
+ return result
212
+ else:
213
+ return None # Return None if the format doesn't match
214
+
215
+ def caret_delimited_string_to_codeableconcept(caret_delimited_str):
216
+ # Split the string by '~' to separate multiple codings
217
+ codings = caret_delimited_str.split('~')
218
+
219
+ # Initialize the CodeableConcept dictionary
220
+ codeable_concept = {"coding": []}
221
+ parts = []
222
+ # Loop over each coding section
223
+ for coding_str in codings:
224
+ # Split each part by '^' to get system, code, and display (optionally text at the end)
225
+ parts = coding_str.split('^')
226
+
227
+ # Create a coding dictionary from the components
228
+ coding_dict = {}
229
+ if len(parts) > 0:
230
+ coding_dict['system'] = parts[0] if parts[0] else ''
231
+ if len(parts) > 1:
232
+ coding_dict['code'] = parts[1] if parts[1] else ''
233
+ if len(parts) > 2:
234
+ coding_dict['display'] = parts[2] if parts[2] else ''
235
+
236
+ # Add coding to the 'coding' list in CodeableConcept
237
+ codeable_concept['coding'].append(coding_dict)
238
+
239
+ # Check if the last element contains 'text' (for the entire CodeableConcept)
240
+ if len(parts) == 4:
241
+ codeable_concept['text'] = parts[3]
242
+ return codeable_concept
243
+
244
+ def caret_delimited_string_to_coding(caret_delimited_str):
245
+ # Split the string by '~' to separate multiple codings
246
+
247
+ # Initialize the CodeableConcept dictionary
248
+ coding = {}
249
+
250
+ parts = caret_delimited_str.split('^')
251
+
252
+ # Create a coding dictionary from the components
253
+ if len(parts) > 0:
254
+ coding['system'] = parts[0] if parts[0] else ''
255
+ if len(parts) > 1:
256
+ coding['code'] = parts[1] if parts[1] else ''
257
+ if len(parts) > 2:
258
+ coding['display'] = parts[2] if parts[2] else ''
259
+ return coding
260
+
261
+ def string_to_quantity(quantity_str):
262
+ # Split the string into value and unit by whitespace
263
+ parts = quantity_str.split('^',maxsplit=1)
264
+
265
+ # Initialize the Quantity dictionary
266
+ quantity = {}
267
+
268
+ # First part is the value (convert to float)
269
+ if len(parts) > 0:
270
+ quantity['value'] = float(parts[0])
271
+
272
+ # Second part is the unit (if present)
273
+ if len(parts) > 1:
274
+ quantity['unit'] = parts[1]
275
+ quantity['system'] = 'http://unitsofmeasure.org'
276
+ quantity['code'] = parts[1]
277
+
278
+
279
+ return quantity
@@ -0,0 +1,13 @@
1
+ from _typeshed import Incomplete
2
+
3
+ type_regexes: Incomplete
4
+
5
+ def assign_value(final_struct, key, value, valueType): ...
6
+ def parse_iso8601_date(input_string): ...
7
+ def parse_iso8601_datetime(input_string): ...
8
+ def parse_iso8601_instant(input_string): ...
9
+ def parse_iso8601_time(input_string): ...
10
+ def parse_flexible_address(address): ...
11
+ def caret_delimited_string_to_codeableconcept(caret_delimited_str): ...
12
+ def caret_delimited_string_to_coding(caret_delimited_str): ...
13
+ def string_to_quantity(quantity_str): ...
@@ -0,0 +1,45 @@
1
+ from typing import Dict, Any, List, Optional, Tuple
2
+
3
+ from .common import get_value_from_keys
4
+
5
+ class HeaderEntry:
6
+ def __init__(self, entityName, fieldName, jsonPath, valueType, valueSets):
7
+ self.entityName: Optional[str] = entityName
8
+ self.fieldName: Optional[str] = fieldName
9
+ self.jsonPath: Optional[str] = jsonPath
10
+ self.valueType: Optional[str] = valueType
11
+ self.valueSets: Optional[str] = valueSets
12
+
13
+ @classmethod
14
+ def from_dict(cls, data: Dict[str, Any]):
15
+ return cls(get_value_from_keys(data, ['entityName', 'entity_name'], ''), get_value_from_keys(data, ['fieldName', 'field_name'], ''),get_value_from_keys(data, ['jsonPath', 'json_path'], ''),get_value_from_keys(data, ['valueType', "value_type"], ''),get_value_from_keys(data, ['valueSets', 'value_sets'], ''))
16
+
17
+ def __repr__(self) -> str:
18
+ return (f"\nHeaderEntry(entityName='{self.entityName}', \n\tfieldName='{self.fieldName}', \n\tjsonPath='{self.jsonPath}',\n\tvalueType='{self.valueType}', "
19
+ f"\n\tvalueSets='{self.valueSets}')")
20
+
21
+ class PatientEntry:
22
+ def __init__(self, entries:Dict[Tuple[str,str],str]):
23
+ self.entries = entries
24
+
25
+ @classmethod
26
+ def from_dict(cls, entries:Dict[Tuple[str,str],str]):
27
+ return cls(entries)
28
+
29
+ def __repr__(self) -> str:
30
+ return (f"PatientEntry(\n\t'{self.entries}')")
31
+
32
+ class CohortData:
33
+ def __init__(self, headers: List[HeaderEntry], patients: List[PatientEntry]):
34
+ self.headers = headers
35
+ self.patients = patients
36
+
37
+ @classmethod
38
+ def from_dict(cls, headers: List[Dict[str, Any]], patients: List[Dict[Tuple[str,str],str]]):
39
+ return cls([HeaderEntry.from_dict(header) for header in headers], [PatientEntry.from_dict(patient) for patient in patients])
40
+
41
+ def __repr__(self) -> str:
42
+ return (f"CohortData(\n\t-----\n\theaders='{self.headers}',\n\t-----\n\tpatients='{self.patients}')")
43
+
44
+ def get_num_patients(self):
45
+ return len(self.patients)
@@ -0,0 +1,12 @@
1
+
2
+ from typing import Any, Dict, List
3
+
4
+
5
+ def get_value_from_keys(data: Dict[str, Any], keys: List[str], default: Any) -> Any:
6
+ lower_data = {k.lower(): v for k, v in data.items()}
7
+ """Helper function to find the first existing key and return its value."""
8
+ for key in keys:
9
+ lower_key = key.lower()
10
+ if lower_key in lower_data:
11
+ return lower_data[lower_key]
12
+ return default
@@ -0,0 +1,30 @@
1
+ from typing import Any, Dict, List, Optional
2
+
3
+ from .common import get_value_from_keys
4
+
5
+
6
+ class ResourceDefinition:
7
+ """
8
+ A class to represent a Resource Definition for FHIR initialization.
9
+ """
10
+ entityName_keys = ['Entity Name', 'name', 'entity_name']
11
+ resourceType_keys = ['ResourceType', 'resource_type', 'type']
12
+ profile_keys = ['Profile(s)', 'profiles', 'profile_list']
13
+
14
+ def __init__(self, entityName: str, resourceType: str, profiles: List[str]):
15
+ """
16
+ Initializes the ResourceLink object from a dictionary.
17
+
18
+ Args:
19
+ data: A dictionary containing 'EntityName', 'ResourceType', and 'Profile(s)'.
20
+ """
21
+ self.entityName = entityName
22
+ self.resourceType = resourceType
23
+ self.profiles = profiles
24
+
25
+ @classmethod
26
+ def from_dict(cls, data: Dict[str, Any]):
27
+ return cls(get_value_from_keys(data, cls.entityName_keys, ''), get_value_from_keys(data, cls.resourceType_keys, ''), get_value_from_keys(data, cls.profile_keys, []))
28
+
29
+ def __repr__(self) -> str:
30
+ return f"ResourceDefinition(entityName='{self.entityName}', resourceType='{self.resourceType}', profiles={self.profiles})"
@@ -0,0 +1,32 @@
1
+ from typing import Any, Dict
2
+
3
+ from .common import get_value_from_keys
4
+
5
+
6
+ class ResourceLink:
7
+ """
8
+ A class to represent a Fhir Reference between two resources.
9
+ """
10
+ originResource_keys = ['OriginResource', 'Origin Resource', 'origin_resource']
11
+ referencePath_keys = ['ReferencePath', 'Reference Path', 'reference_path']
12
+ destinationResource_keys = ['DestinationResource', 'Destination Resource', 'destination_resource']
13
+ def __init__(self, originResource: str, referencePath: str, destinationResource: str):
14
+ """
15
+ Initializes the ResourceLink object from a dictionary.
16
+
17
+ Args:
18
+ data: A dictionary containing 'OriginResource', 'ReferencePath', and 'DestinationResource'.
19
+ """
20
+ self.originResource = originResource
21
+ self.referencePath = referencePath
22
+ self.destinationResource = destinationResource
23
+
24
+ @classmethod
25
+ def from_dict(cls, data: Dict[str, Any]):
26
+ return cls(get_value_from_keys(data, cls.originResource_keys, ''), get_value_from_keys(data, cls.referencePath_keys, ''),
27
+ get_value_from_keys(data, cls.destinationResource_keys, ''))
28
+
29
+ def __repr__(self) -> str:
30
+ return (f"ResourceLink(originResource='{self.originResource}', "
31
+ f"referencePath='{self.referencePath}', "
32
+ f"destinationResource='{self.destinationResource}')")
@@ -0,0 +1,102 @@
1
+ from typing import List
2
+ import openpyxl
3
+
4
+ from .model.cohort_data_entity import CohortData, CohortData
5
+
6
+ from .model.resource_definition_entity import ResourceDefinition
7
+ from .model.resource_link_entity import ResourceLink
8
+
9
+ # Function to read the xlsx file and access specific sheets
10
+ def read_xlsx_and_process(file_path):
11
+ # Load the workbook
12
+ workbook = openpyxl.load_workbook(file_path)
13
+ resource_definition_entities = []
14
+ resource_link_entities = []
15
+ cohort_data = CohortData.from_dict([],[])
16
+ # Example of accessing specific sheets
17
+ if 'ResourceDefinitions' in workbook.sheetnames:
18
+ sheet = workbook['ResourceDefinitions']
19
+ resource_definition_entities = process_sheet_resource_definitions(sheet)
20
+
21
+ if 'ResourceLinks' in workbook.sheetnames:
22
+ sheet = workbook['ResourceLinks']
23
+ resource_link_entities = process_sheet_resource_links(sheet)
24
+
25
+ if 'PatientData' in workbook.sheetnames:
26
+ sheet = workbook['PatientData']
27
+ cohort_data = process_sheet_patient_data_revised(sheet, resource_definition_entities)
28
+
29
+ return resource_definition_entities, resource_link_entities, cohort_data
30
+
31
+
32
+ # Function to process the specific sheet with 'Entity Name', 'ResourceType', and 'Profile(s)'
33
+ def process_sheet_resource_definitions(sheet) -> List[ResourceDefinition]:
34
+ resource_definitions = []
35
+ resource_definition_entities = []
36
+ headers = [cell.value for cell in next(sheet.iter_rows(min_row=1, max_row=1))] # Get headers
37
+
38
+ for row in sheet.iter_rows(min_row=3, values_only=True):
39
+ row_data = dict((h, r) for h, r in zip(headers, row) if h is not None) # Create a dictionary for each row
40
+ if all(cell is None or cell == "" for cell in row_data.values()):
41
+ continue
42
+ # Split 'Profile(s)' column into a list of URLs
43
+ if row_data.get("Profile(s)"):
44
+ row_data["Profile(s)"] = [url.strip() for url in row_data["Profile(s)"].split(",")]
45
+ resource_definition_entities.append(ResourceDefinition.from_dict(row_data))
46
+ resource_definitions.append(row_data)
47
+ print(f"Resource Definitions\n----------{resource_definitions}")
48
+ return resource_definition_entities
49
+
50
+ # Function to process the specific sheet with 'OriginResource', 'ReferencePath', and 'DestinationResource'
51
+ def process_sheet_resource_links(sheet) -> List[ResourceLink]:
52
+ resource_links = []
53
+ resource_link_entities = []
54
+ headers = [cell.value for cell in next(sheet.iter_rows(min_row=1, max_row=1))] # Get headers
55
+ for row in sheet.iter_rows(min_row=3, values_only=True):
56
+ row_data = dict(zip(headers, row)) # Create a dictionary for each row
57
+ if all(cell is None or cell == "" for cell in row_data):
58
+ continue
59
+ resource_links.append(row_data)
60
+ resource_link_entities.append(ResourceLink.from_dict(row_data))
61
+ print(f"Resource Links\n----------{resource_links}")
62
+ return resource_link_entities
63
+
64
+ # Function to process the "PatientData" sheet for the Revised CohortData
65
+ def process_sheet_patient_data_revised(sheet, resource_definition_entities):
66
+ headers = []
67
+ patients = []
68
+ # Initialize the dictionary to store the processed data
69
+ # Process the Header Entries from the first 6 rows (Entity To Query, JsonPath, etc.) and the data from the rest.
70
+ for col in sheet.iter_cols(min_row=1, min_col=3, values_only=True): # Start from 3rd column
71
+ if all(entry is None for entry in col):
72
+ continue
73
+ entity_name = col[0] # The entity name comes from the first row (Entity To Query)
74
+ field_name = col[5] #The "Data Element" comes from the fifth row
75
+ if (entity_name is None or entity_name == "") and (field_name is not None and field_name != ""):
76
+ print(f"WARNING: - Reading Patient Data Issue - {field_name} - 'Entity To Query' cell missing for column labelled '{field_name}', please provide entity name from the ResourceDefinitions tab.")
77
+
78
+ if entity_name not in [entry.entityName for entry in resource_definition_entities]:
79
+ print(f"WARNING: - Reading Patient Data Issue - {field_name} - 'Entity To Query' cell has entity named '{entity_name}', however, the ResourceDefinition tab has no matching resource. Please provide a corresponding entry in the ResourceDefinition tab.")
80
+
81
+ # Create a header entry
82
+ header_data = {
83
+ "fieldName": field_name,
84
+ "entityName": entity_name,
85
+ "jsonPath": col[1], # JsonPath from the second row
86
+ "valueType": col[2], # Value Type from the third row
87
+ "valueSets": col[3] # Value Set from the fourth row
88
+ }
89
+ headers.append(header_data)
90
+ # Create a data entry
91
+ values = col[6:] # The values come from the 6th row and below
92
+ values = tuple(item for item in values if item is not None)
93
+ #Expand the patient dictionary set if needed
94
+ if len(values) > len(patients):
95
+ needed_count = len(values) - len(patients)
96
+ patients.extend([{}] * needed_count)
97
+ for patient_dict, value in zip(patients, values):
98
+ patient_dict[(entity_name, field_name)] = value
99
+ print(f"Headers\n----------{headers}")
100
+ print(f"Patients\n----------{patients}")
101
+ cohort_data = CohortData.from_dict(headers=headers, patients=patients)
102
+ return cohort_data
@@ -0,0 +1,8 @@
1
+ from .model.cohort_data_entity import CohortData as CohortData
2
+ from .model.resource_definition_entity import ResourceDefinition as ResourceDefinition
3
+ from .model.resource_link_entity import ResourceLink as ResourceLink
4
+
5
+ def read_xlsx_and_process(file_path): ...
6
+ def process_sheet_resource_definitions(sheet) -> list[ResourceDefinition]: ...
7
+ def process_sheet_resource_links(sheet) -> list[ResourceLink]: ...
8
+ def process_sheet_patient_data_revised(sheet, resource_definition_entities): ...