h-adminsim 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- h_adminsim/__init__.py +5 -0
- h_adminsim/admin_staff.py +280 -0
- h_adminsim/assets/configs/data4primary.yaml +47 -0
- h_adminsim/assets/configs/data4secondary.yaml +47 -0
- h_adminsim/assets/configs/data4tertiary.yaml +47 -0
- h_adminsim/assets/country/address.json +141859 -0
- h_adminsim/assets/country/country_code.json +244 -0
- h_adminsim/assets/departments/department.json +85 -0
- h_adminsim/assets/departments/symptom.json +4530 -0
- h_adminsim/assets/fhir.schema.json +75253 -0
- h_adminsim/assets/names/firstname.txt +1219 -0
- h_adminsim/assets/names/lastname.txt +88799 -0
- h_adminsim/assets/prompts/cancel_patient_system.txt +38 -0
- h_adminsim/assets/prompts/intake_staff_task_user.txt +16 -0
- h_adminsim/assets/prompts/intake_supervisor_system.txt +8 -0
- h_adminsim/assets/prompts/intake_supervisor_user.txt +31 -0
- h_adminsim/assets/prompts/reschedule_patient_system.txt +38 -0
- h_adminsim/assets/prompts/schedule_patient_rejected_system.txt +42 -0
- h_adminsim/assets/prompts/schedule_patient_system.txt +36 -0
- h_adminsim/assets/prompts/schedule_staff_reasoning.txt +57 -0
- h_adminsim/assets/prompts/schedule_staff_sc_tool_calling.txt +13 -0
- h_adminsim/assets/prompts/schedule_staff_system.txt +10 -0
- h_adminsim/assets/prompts/schedule_staff_tool_calling.txt +41 -0
- h_adminsim/client/__init__.py +3 -0
- h_adminsim/client/google_client.py +209 -0
- h_adminsim/client/openai_client.py +199 -0
- h_adminsim/client/vllm_client.py +160 -0
- h_adminsim/environment/__init__.py +1 -0
- h_adminsim/environment/hospital.py +462 -0
- h_adminsim/environment/op_scheduling_simulation.py +1126 -0
- h_adminsim/pipeline/__init__.py +3 -0
- h_adminsim/pipeline/data_generator.py +192 -0
- h_adminsim/pipeline/evaluator.py +33 -0
- h_adminsim/pipeline/simulation.py +231 -0
- h_adminsim/registry/__init__.py +5 -0
- h_adminsim/registry/errors.py +89 -0
- h_adminsim/registry/models.py +126 -0
- h_adminsim/registry/phrases.py +10 -0
- h_adminsim/registry/pydantic_models.py +21 -0
- h_adminsim/registry/variables.py +9 -0
- h_adminsim/supervisor.py +182 -0
- h_adminsim/task/agent_task.py +900 -0
- h_adminsim/task/fhir_manager.py +222 -0
- h_adminsim/task/schedule_assign.py +151 -0
- h_adminsim/tools/__init__.py +5 -0
- h_adminsim/tools/agent_data_builder.py +124 -0
- h_adminsim/tools/data_converter.py +536 -0
- h_adminsim/tools/data_synthesizer.py +365 -0
- h_adminsim/tools/evaluator.py +258 -0
- h_adminsim/tools/sanity_checker.py +216 -0
- h_adminsim/tools/scheduling_rule.py +420 -0
- h_adminsim/utils/__init__.py +136 -0
- h_adminsim/utils/common_utils.py +698 -0
- h_adminsim/utils/fhir_utils.py +190 -0
- h_adminsim/utils/filesys_utils.py +135 -0
- h_adminsim/utils/image_preprocess_utils.py +188 -0
- h_adminsim/utils/random_utils.py +358 -0
- h_adminsim/version.txt +1 -0
- h_adminsim-1.0.0.dist-info/LICENSE +30 -0
- h_adminsim-1.0.0.dist-info/METADATA +494 -0
- h_adminsim-1.0.0.dist-info/RECORD +62 -0
- h_adminsim-1.0.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
import re
|
|
2
|
+
|
|
3
|
+
from h_adminsim.utils.common_utils import (
|
|
4
|
+
iso_to_hour,
|
|
5
|
+
iso_to_date,
|
|
6
|
+
sort_schedule,
|
|
7
|
+
convert_time_list_to_merged_time,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def sanitize_id(s: str) -> str:
|
|
13
|
+
"""
|
|
14
|
+
Sanitize a string to conform to the pattern: ^[A-Za-z0-9\-\.]{1,64}$
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
s (str): The input string to sanitize.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
str: A sanitized string containing only allowed characters,
|
|
21
|
+
and no longer than 64 characters.
|
|
22
|
+
"""
|
|
23
|
+
cleaned = re.sub(r'[^A-Za-z0-9\-\.]', '', s)
|
|
24
|
+
return cleaned[:64]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def get_individual_id(hospital: str, department_code: str, individual_name: str) -> str:
|
|
29
|
+
"""
|
|
30
|
+
Make an individual ID.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
hospital (str): A hospital name.
|
|
34
|
+
department_code (str): A department code.
|
|
35
|
+
individual_name (str): An individual name.
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
str: A sanitized individual ID.
|
|
39
|
+
"""
|
|
40
|
+
return sanitize_id(f'{hospital}-{department_code.lower()}-{individual_name}')
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def get_practitionerrole_id(individual_id: str) -> str:
|
|
45
|
+
"""
|
|
46
|
+
Make a practitioner role ID for an individual.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
individual_id (str): An individual ID.
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
str: A practitioner role ID.
|
|
53
|
+
"""
|
|
54
|
+
return f'{individual_id}-role'
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def get_schedule_id(individual_id: str) -> str:
|
|
59
|
+
"""
|
|
60
|
+
Make a schedule ID for an individual.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
individual_id (str): An individual ID.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
str: A schedule ID.
|
|
67
|
+
"""
|
|
68
|
+
return f'{individual_id}-schedule'
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def get_slot_id(individual_id: str, date: str, time_segment_index: int) -> str:
|
|
73
|
+
"""
|
|
74
|
+
Make a slot ID for an individual.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
individual_id (str): An individual ID.
|
|
78
|
+
date (str): A date in ISO format (YYYY-MM-DD).
|
|
79
|
+
time_segment_index (int): An index of start time segment.
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
str: A slot ID.
|
|
83
|
+
"""
|
|
84
|
+
return f"{individual_id}-{date.replace('-', '')}-slot{time_segment_index}"
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def get_appointment_id(individual_id: str, date: str, start_time_segment_index: int, end_time_segment_index: int) -> str:
|
|
89
|
+
"""
|
|
90
|
+
Make an appointment ID for an individual.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
individual_id (str): An individual ID.
|
|
94
|
+
date (str): A date in ISO format (YYYY-MM-DD).
|
|
95
|
+
start_time_segment_index (int): An index of start time segment.
|
|
96
|
+
end_time_segment_index (int): An index of end time segment.
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
str: An appointment ID.
|
|
100
|
+
"""
|
|
101
|
+
return f"{individual_id}-{date.replace('-', '')}-appn{start_time_segment_index}-{end_time_segment_index}"
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def get_all_doctor_info(practitioners: list[dict],
|
|
106
|
+
practitioner_roles: list[dict],
|
|
107
|
+
schedules: list[dict],
|
|
108
|
+
slots: list[dict],
|
|
109
|
+
appointments: list[dict],
|
|
110
|
+
**kwargs) -> dict:
|
|
111
|
+
"""
|
|
112
|
+
Make a current state of doctoral information based on the FHIR server.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
practitioners (list[dict]): Practitioner resources currently used in the hospital environment of the simulation.
|
|
116
|
+
practitioner_roles (list[dict]): PractitionerRole resources currently used in the hospital environment of the simulation.
|
|
117
|
+
schedules (list[dict]): Schedule resources currently used in the hospital environment of the simulation.
|
|
118
|
+
slots (list[dict]): Slot resources currently used in the hospital environment of the simulation._
|
|
119
|
+
appointments (list[dict]): Appointment resources currently used in the hospital environment of the simulation.
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
dict: Current state of doctoral information.
|
|
123
|
+
"""
|
|
124
|
+
# Prepare several pre-required data
|
|
125
|
+
doctor_information = dict()
|
|
126
|
+
practitioner_ref_to_role = dict()
|
|
127
|
+
practitioner_ref_to_schedules = dict()
|
|
128
|
+
practitioner_ref_to_name = {
|
|
129
|
+
f"Practitioner/{practitioner['resource']['id']}": \
|
|
130
|
+
f"{practitioner['resource']['name'][0]['prefix'][0]} {practitioner['resource']['name'][0]['given'][0]} {practitioner['resource']['name'][0]['family']}" \
|
|
131
|
+
for practitioner in practitioners
|
|
132
|
+
}
|
|
133
|
+
for practitioner_role in practitioner_roles:
|
|
134
|
+
attributes = {attr['text']: attr['coding'][0]['display'] for attr in practitioner_role['resource']['characteristic']}
|
|
135
|
+
practitioner_ref_to_role[practitioner_role['resource']['practitioner']['reference']] = {
|
|
136
|
+
'department': practitioner_role['resource']['specialty'][0]['text'],
|
|
137
|
+
'specialty': {
|
|
138
|
+
'name': practitioner_role['resource']['specialty'][0]['coding'][0]['display'],
|
|
139
|
+
'code': practitioner_role['resource']['specialty'][0]['coding'][0]['code']
|
|
140
|
+
},
|
|
141
|
+
'capacity_per_hour': int(attributes['capacity_per_hour']),
|
|
142
|
+
'capacity': int(attributes['capacity']),
|
|
143
|
+
}
|
|
144
|
+
schedule_ref_to_practioner_ref = {
|
|
145
|
+
f"Schedule/{schedule['resource']['id']}": schedule['resource']['actor'][0]['reference'] for schedule in schedules
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
# Append fixed schedules of a doctor
|
|
149
|
+
for slot in slots:
|
|
150
|
+
resource = slot['resource']
|
|
151
|
+
practitioner_ref = schedule_ref_to_practioner_ref[slot['resource']['schedule']['reference']]
|
|
152
|
+
date = iso_to_date(resource['start'])
|
|
153
|
+
practitioner_dict = practitioner_ref_to_schedules.setdefault(practitioner_ref, {})
|
|
154
|
+
practitioner_dict.setdefault(date, [])
|
|
155
|
+
if not resource['status'] == 'free':
|
|
156
|
+
practitioner_dict[date].append([iso_to_hour(resource['start']), iso_to_hour(resource['end'])])
|
|
157
|
+
|
|
158
|
+
# Merge fixed schedule times
|
|
159
|
+
if all(k in kwargs for k in ['start', 'end', 'interval']):
|
|
160
|
+
for fixed_schedules in practitioner_ref_to_schedules.values():
|
|
161
|
+
for date, time_list in fixed_schedules.items():
|
|
162
|
+
fixed_schedules[date] = convert_time_list_to_merged_time(time_list=sort_schedule(time_list), **kwargs)
|
|
163
|
+
|
|
164
|
+
# Append patient appointments of a doctor
|
|
165
|
+
for appointment in appointments:
|
|
166
|
+
resource = appointment['resource']
|
|
167
|
+
for participant in resource['participant']:
|
|
168
|
+
participant_ref = participant['actor']['reference']
|
|
169
|
+
date = iso_to_date(resource['start'])
|
|
170
|
+
practitioner_dict = practitioner_ref_to_schedules.setdefault(participant_ref, {})
|
|
171
|
+
if participant_ref in practitioner_ref_to_name:
|
|
172
|
+
practitioner_dict[date].append([iso_to_hour(resource['start']), iso_to_hour(resource['end'])])
|
|
173
|
+
break
|
|
174
|
+
|
|
175
|
+
# Build the doctor information from FHIR
|
|
176
|
+
for practitioner in practitioners:
|
|
177
|
+
resource = practitioner['resource']
|
|
178
|
+
ref = f"Practitioner/{resource['id']}"
|
|
179
|
+
doctor_information[practitioner_ref_to_name[ref]] = {
|
|
180
|
+
'department': practitioner_ref_to_role[ref]['department'],
|
|
181
|
+
'specialty': practitioner_ref_to_role[ref]['specialty'],
|
|
182
|
+
'schedule': sort_schedule(practitioner_ref_to_schedules.get(ref, [])),
|
|
183
|
+
'capacity_per_hour': practitioner_ref_to_role[ref]['capacity_per_hour'],
|
|
184
|
+
'capacity': practitioner_ref_to_role[ref]['capacity'],
|
|
185
|
+
'gender': resource['gender'],
|
|
186
|
+
'telecom': resource['telecom'],
|
|
187
|
+
'birthDate': resource['birthDate']
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
return doctor_information
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
import orjson
|
|
4
|
+
from typing import Any
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from h_adminsim.utils import log
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def txt_load(path: str) -> str:
|
|
12
|
+
"""
|
|
13
|
+
Load and return the content of a text file.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
path (str): Path to the text file.
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
str: The full content of the text file as a string.
|
|
20
|
+
"""
|
|
21
|
+
with open(path, 'r') as f:
|
|
22
|
+
content = f.read()
|
|
23
|
+
return content
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def json_load(path: str) -> Any:
|
|
28
|
+
"""
|
|
29
|
+
Load and parse a JSON file.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
path (str): Path to the JSON file.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
Any: The parsed Python object (usually a dict or list) from the JSON file.
|
|
36
|
+
"""
|
|
37
|
+
with open(path, 'r') as f:
|
|
38
|
+
return json.load(f)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def json_save(path: str, data: dict):
|
|
43
|
+
"""
|
|
44
|
+
Save json file.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
path (str): Path to the json file.
|
|
48
|
+
data (dict): Data to save.
|
|
49
|
+
"""
|
|
50
|
+
with open(path, 'w') as f:
|
|
51
|
+
json.dump(data, f, indent=4, ensure_ascii=False)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def json_save_fast(path: str, data: dict):
|
|
56
|
+
"""
|
|
57
|
+
Save json file more faster.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
path (str): Path to the json file.
|
|
61
|
+
data (dict): Data to save.
|
|
62
|
+
"""
|
|
63
|
+
with open(path, 'wb') as f:
|
|
64
|
+
f.write(orjson.dumps(data, option=orjson.OPT_INDENT_2))
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def yaml_save(file: str='data.yaml', data: Any = None):
|
|
69
|
+
"""
|
|
70
|
+
Save data to an YAML file.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
file (str, optional): File name. Default is 'data.yaml'.
|
|
74
|
+
data (Any, optional): Data to save in YAML format.
|
|
75
|
+
"""
|
|
76
|
+
save_path = Path(file)
|
|
77
|
+
log(data.dumps())
|
|
78
|
+
with open(save_path, "w") as f:
|
|
79
|
+
f.write(data.dumps(modified_color=None, quote_str=True))
|
|
80
|
+
log(f"Config is saved at {save_path}")
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def get_files(path: str, ext: str = None) -> list[str]:
|
|
85
|
+
"""
|
|
86
|
+
Get all files in a directory with a specific extension.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
path (str): Folder path to search for files.
|
|
90
|
+
ext (str, optional): Extension that you want to filter. Defaults to None.
|
|
91
|
+
|
|
92
|
+
Raises:
|
|
93
|
+
ValueError: If file does not exist.
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
list[str]: List of file paths that match the given extension.
|
|
97
|
+
"""
|
|
98
|
+
if not os.path.isdir(path):
|
|
99
|
+
raise ValueError(f"Path {path} is not a directory.")
|
|
100
|
+
|
|
101
|
+
files = []
|
|
102
|
+
for root, _, filenames in os.walk(path):
|
|
103
|
+
for filename in filenames:
|
|
104
|
+
if ext is None or filename.endswith(ext):
|
|
105
|
+
files.append(os.path.join(root, filename))
|
|
106
|
+
|
|
107
|
+
return files
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def make_project_dir(config) -> Path:
|
|
112
|
+
"""
|
|
113
|
+
Make project folder.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
config: yaml config.
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
(path): project folder path.
|
|
120
|
+
"""
|
|
121
|
+
prefix = log('Make project folder')
|
|
122
|
+
project = config.project
|
|
123
|
+
name = config.data_name
|
|
124
|
+
|
|
125
|
+
save_dir = os.path.join(project, name)
|
|
126
|
+
if os.path.exists(save_dir):
|
|
127
|
+
log(f'{prefix}: Project {save_dir} already exists. New folder will be created.')
|
|
128
|
+
name = name + str(len(os.listdir(project))+1)
|
|
129
|
+
config.data_name = name
|
|
130
|
+
save_dir = os.path.join(project, name)
|
|
131
|
+
|
|
132
|
+
os.makedirs(project, exist_ok=True)
|
|
133
|
+
os.makedirs(save_dir)
|
|
134
|
+
|
|
135
|
+
return Path(save_dir)
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import os
|
|
3
|
+
import math
|
|
4
|
+
import base64
|
|
5
|
+
from PIL import Image
|
|
6
|
+
from collections import Counter
|
|
7
|
+
from typing import Tuple, Union
|
|
8
|
+
import matplotlib.pyplot as plt
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def encode_image(image_path: str, encode_base64: bool = True) -> Union[str, bytes]:
|
|
13
|
+
"""
|
|
14
|
+
Read an image file and return its binary content, optionally encoded in base64.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
image_path (str): Path to the image file.
|
|
18
|
+
encode_base64 (bool, optional): If True, the image is returned as a base64-encoded string.
|
|
19
|
+
If False, the raw binary content is returned. Defaults to True.
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
Union[str, bytes]: The base64-encoded string or raw binary content of the image,
|
|
23
|
+
depending on the value of `encode_base64`.
|
|
24
|
+
"""
|
|
25
|
+
with open(image_path, "rb") as image_file:
|
|
26
|
+
if encode_base64:
|
|
27
|
+
return base64.b64encode(image_file.read()).decode("utf-8")
|
|
28
|
+
return image_file.read()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def encode_resize_image(image_path: str, max_size: Tuple[int], encode_base64: bool = True) -> Union[str, bytes]:
|
|
33
|
+
"""
|
|
34
|
+
Resize an image to fit within the specified maximum dimensions and return its content,
|
|
35
|
+
optionally encoded in base64.
|
|
36
|
+
|
|
37
|
+
If the original image is smaller than the specified max size, it will be returned without resizing.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
image_path (str): Path to the input image file.
|
|
41
|
+
max_size (Tuple[int]): Maximum allowed size as (width, height). The image will be resized
|
|
42
|
+
proportionally to fit within this box while preserving aspect ratio.
|
|
43
|
+
encode_base64 (bool, optional): If True, the image content is returned as a base64-encoded string.
|
|
44
|
+
If False, raw binary data is returned. Defaults to True.
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
Union[str, bytes]: The resized image as a base64-encoded string or raw binary data,
|
|
48
|
+
depending on the value of `encode_base64`.
|
|
49
|
+
"""
|
|
50
|
+
with Image.open(image_path) as img:
|
|
51
|
+
original_width, original_height = img.size
|
|
52
|
+
|
|
53
|
+
# Resize the image
|
|
54
|
+
width_ratio = max_size[0] / original_width
|
|
55
|
+
height_ratio = max_size[1] / original_height
|
|
56
|
+
min_ratio = min(width_ratio, height_ratio)
|
|
57
|
+
|
|
58
|
+
if min_ratio >= 1:
|
|
59
|
+
return encode_image(image_path, encode_base64)
|
|
60
|
+
|
|
61
|
+
new_width = int(original_width * min_ratio)
|
|
62
|
+
new_height = int(original_height * min_ratio)
|
|
63
|
+
|
|
64
|
+
img = img.resize((new_width, new_height))
|
|
65
|
+
|
|
66
|
+
# Save image to memeory
|
|
67
|
+
img_buffer = io.BytesIO()
|
|
68
|
+
img.save(img_buffer, format="JPEG")
|
|
69
|
+
img_buffer.seek(0)
|
|
70
|
+
|
|
71
|
+
if encode_base64:
|
|
72
|
+
return base64.b64encode(img_buffer.read()).decode("utf-8")
|
|
73
|
+
return img_buffer.read()
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def get_image_extension(path: str) -> str:
|
|
78
|
+
"""
|
|
79
|
+
Extract and normalize the image file extension from the given file path.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
path (str): Path to the image file.
|
|
83
|
+
|
|
84
|
+
Raises:
|
|
85
|
+
ValueError: If the file extension is not a supported image format.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
str: Normalized image format string. Returns "png" for PNG files,
|
|
89
|
+
and "jpeg" for JPG or JPEG files.
|
|
90
|
+
"""
|
|
91
|
+
ext = os.path.splitext(path)
|
|
92
|
+
if ext == "png":
|
|
93
|
+
return ext
|
|
94
|
+
elif ext in ["jpeg", "jpg"]:
|
|
95
|
+
return "jpeg"
|
|
96
|
+
else:
|
|
97
|
+
raise ValueError(f"Unsupported image format: {ext}")
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def autopct_format(values: list[int], threshold: float = 5.0):
|
|
102
|
+
"""
|
|
103
|
+
Returns a formatting function for pie chart percentages and counts.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
values (list[int]): List of values corresponding to each pie chart slice.
|
|
107
|
+
threshold (float): Minimum percentage value required to display the label. Slices with
|
|
108
|
+
percentages below this threshold will not show any text.
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
function: A function that takes a percentage (float) and returns a formatted string
|
|
112
|
+
showing both the percentage and the corresponding count, e.g., '42.0%\n(21)'.
|
|
113
|
+
"""
|
|
114
|
+
def my_autopct(pct):
|
|
115
|
+
total = sum(values)
|
|
116
|
+
count = int(round(pct * total / 100.0))
|
|
117
|
+
return f'{pct:.1f}%\n({count})' if pct > threshold else ''
|
|
118
|
+
return my_autopct
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def draw_fail_donut_subplots(fail_data_dict: dict, save_path: str):
|
|
123
|
+
"""
|
|
124
|
+
Draws donut-style pie chart subplots showing the failure type distribution for each task.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
fail_data_dict (dict): A dictionary where keys are task names and values are lists of failure types (e.g., error codes).
|
|
128
|
+
save_path (str): File path where the final figure will be saved as a PNG.
|
|
129
|
+
"""
|
|
130
|
+
keys = list(fail_data_dict.keys())
|
|
131
|
+
num_plots = len(keys)
|
|
132
|
+
|
|
133
|
+
if num_plots == 0:
|
|
134
|
+
return
|
|
135
|
+
|
|
136
|
+
# Calculate subplot layout (maximum of 4 columns)
|
|
137
|
+
ncols = min(4, num_plots)
|
|
138
|
+
nrows = math.ceil(num_plots / ncols)
|
|
139
|
+
|
|
140
|
+
fig, axes = plt.subplots(nrows, ncols, figsize=(5 * ncols, 7 * nrows))
|
|
141
|
+
axes = axes.flatten() if num_plots > 1 else [axes]
|
|
142
|
+
cmap = plt.get_cmap('tab10')
|
|
143
|
+
|
|
144
|
+
for idx, key in enumerate(keys):
|
|
145
|
+
failed_cases = fail_data_dict[key]
|
|
146
|
+
fail_summary = Counter(failed_cases)
|
|
147
|
+
labels = list(fail_summary.keys())
|
|
148
|
+
sizes = list(fail_summary.values())
|
|
149
|
+
sorted_items = sorted(zip(labels, sizes), key=lambda x: x[1], reverse=True)
|
|
150
|
+
labels, sizes = zip(*sorted_items)
|
|
151
|
+
total = sum(sizes)
|
|
152
|
+
percentages = [s / total * 100 for s in sizes]
|
|
153
|
+
colors = cmap.colors[:len(labels)]
|
|
154
|
+
|
|
155
|
+
ax = axes[idx]
|
|
156
|
+
pct_str = autopct_format(sizes, 4.0)
|
|
157
|
+
|
|
158
|
+
wedges, texts, autotexts = ax.pie(
|
|
159
|
+
sizes,
|
|
160
|
+
labels=None,
|
|
161
|
+
autopct=pct_str,
|
|
162
|
+
pctdistance=0.7,
|
|
163
|
+
startangle=90,
|
|
164
|
+
counterclock=False,
|
|
165
|
+
colors=colors,
|
|
166
|
+
wedgeprops=dict(width=0.7)
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
legend_labels = [f"{label} ({pct:.1f}%, {size})" if pct < 4.0 else label for label, size, pct in zip(labels, sizes, percentages)]
|
|
170
|
+
ax.legend(
|
|
171
|
+
wedges,
|
|
172
|
+
legend_labels,
|
|
173
|
+
title="Failure Types",
|
|
174
|
+
loc="lower center",
|
|
175
|
+
bbox_to_anchor=(0.5, -0.3),
|
|
176
|
+
ncol=2,
|
|
177
|
+
fontsize=9
|
|
178
|
+
)
|
|
179
|
+
ax.set_title(f'"{key}"', fontsize=12, pad=10)
|
|
180
|
+
ax.axis('equal')
|
|
181
|
+
|
|
182
|
+
# Clear remaining empty subplots
|
|
183
|
+
for idx in range(len(keys), len(axes)):
|
|
184
|
+
axes[idx].axis('off')
|
|
185
|
+
|
|
186
|
+
fig.suptitle('Failure Type Distribution by Task', fontsize=16)
|
|
187
|
+
plt.tight_layout()
|
|
188
|
+
plt.savefig(save_path, dpi=300)
|