PyPI - h-adminsim - Versions diffs - 1.0.0__py3-none-any.whl - Mend

h-adminsim 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

h_adminsim/__init__.py +5 -0
h_adminsim/admin_staff.py +280 -0
h_adminsim/assets/configs/data4primary.yaml +47 -0
h_adminsim/assets/configs/data4secondary.yaml +47 -0
h_adminsim/assets/configs/data4tertiary.yaml +47 -0
h_adminsim/assets/country/address.json +141859 -0
h_adminsim/assets/country/country_code.json +244 -0
h_adminsim/assets/departments/department.json +85 -0
h_adminsim/assets/departments/symptom.json +4530 -0
h_adminsim/assets/fhir.schema.json +75253 -0
h_adminsim/assets/names/firstname.txt +1219 -0
h_adminsim/assets/names/lastname.txt +88799 -0
h_adminsim/assets/prompts/cancel_patient_system.txt +38 -0
h_adminsim/assets/prompts/intake_staff_task_user.txt +16 -0
h_adminsim/assets/prompts/intake_supervisor_system.txt +8 -0
h_adminsim/assets/prompts/intake_supervisor_user.txt +31 -0
h_adminsim/assets/prompts/reschedule_patient_system.txt +38 -0
h_adminsim/assets/prompts/schedule_patient_rejected_system.txt +42 -0
h_adminsim/assets/prompts/schedule_patient_system.txt +36 -0
h_adminsim/assets/prompts/schedule_staff_reasoning.txt +57 -0
h_adminsim/assets/prompts/schedule_staff_sc_tool_calling.txt +13 -0
h_adminsim/assets/prompts/schedule_staff_system.txt +10 -0
h_adminsim/assets/prompts/schedule_staff_tool_calling.txt +41 -0
h_adminsim/client/__init__.py +3 -0
h_adminsim/client/google_client.py +209 -0
h_adminsim/client/openai_client.py +199 -0
h_adminsim/client/vllm_client.py +160 -0
h_adminsim/environment/__init__.py +1 -0
h_adminsim/environment/hospital.py +462 -0
h_adminsim/environment/op_scheduling_simulation.py +1126 -0
h_adminsim/pipeline/__init__.py +3 -0
h_adminsim/pipeline/data_generator.py +192 -0
h_adminsim/pipeline/evaluator.py +33 -0
h_adminsim/pipeline/simulation.py +231 -0
h_adminsim/registry/__init__.py +5 -0
h_adminsim/registry/errors.py +89 -0
h_adminsim/registry/models.py +126 -0
h_adminsim/registry/phrases.py +10 -0
h_adminsim/registry/pydantic_models.py +21 -0
h_adminsim/registry/variables.py +9 -0
h_adminsim/supervisor.py +182 -0
h_adminsim/task/agent_task.py +900 -0
h_adminsim/task/fhir_manager.py +222 -0
h_adminsim/task/schedule_assign.py +151 -0
h_adminsim/tools/__init__.py +5 -0
h_adminsim/tools/agent_data_builder.py +124 -0
h_adminsim/tools/data_converter.py +536 -0
h_adminsim/tools/data_synthesizer.py +365 -0
h_adminsim/tools/evaluator.py +258 -0
h_adminsim/tools/sanity_checker.py +216 -0
h_adminsim/tools/scheduling_rule.py +420 -0
h_adminsim/utils/__init__.py +136 -0
h_adminsim/utils/common_utils.py +698 -0
h_adminsim/utils/fhir_utils.py +190 -0
h_adminsim/utils/filesys_utils.py +135 -0
h_adminsim/utils/image_preprocess_utils.py +188 -0
h_adminsim/utils/random_utils.py +358 -0
h_adminsim/version.txt +1 -0
h_adminsim-1.0.0.dist-info/LICENSE +30 -0
h_adminsim-1.0.0.dist-info/METADATA +494 -0
h_adminsim-1.0.0.dist-info/RECORD +62 -0
h_adminsim-1.0.0.dist-info/WHEEL +4 -0

h_adminsim/task/agent_task.py ADDED Viewed

@@ -0,0 +1,900 @@
+import os
+import json
+import random
+from copy import deepcopy
+from decimal import getcontext
+from importlib import resources
+from typing import Tuple, Union, Optional
+from dotenv import load_dotenv, find_dotenv
+from patientsim import PatientAgent
+from patientsim import AdminStaffAgent as IntakeAdminStaffAgent
+from patientsim.environment import OPSimulation as OPFVIntakeSimulation
+from h_adminsim import SupervisorAgent
+from h_adminsim import AdminStaffAgent as SchedulingAdminStaffAgent
+from h_adminsim.environment.hospital import HospitalEnvironment
+from h_adminsim.environment import OPScehdulingSimulation as OPFVScheduleSimulation
+from h_adminsim.tools.sanity_checker import SanityChecker
+from h_adminsim.tools import DataConverter, SchedulingRule
+from h_adminsim.registry import STATUS_CODES, PREFERENCE_PHRASE_PATIENT
+from h_adminsim.utils import colorstr, log
+from h_adminsim.utils.fhir_utils import *
+from h_adminsim.utils.common_utils import *
+class FirstVisitOutpatientTask:
+    def __init__(self):
+        self.token_stats = {
+            'patient_token': {'input':[], 'output': [], 'reasoning': []},
+            'admin_staff_token': {'input': [], 'output': [], 'reasoning': []},
+            'supervisor_token': {'input':[], 'output': [], 'reasoning': []}
+        }
+    def save_token_data(self,
+                        patient_token: Optional[dict] = None,
+                        admin_staff_token: Optional[dict] = None,
+                        supervisor_token: Optional[dict] = None):
+        """
+        Save the API token usage data
+        Args:
+            patient_token (Optional[dict], optional): Patient token information. Defaults to None.
+            admin_staff_token (Optional[dict], optional): Administration staff token information. Defaults to None.
+            supervisor_token (Optional[dict], optional): Supervisor token information. Defaults to None.
+        """
+        if patient_token:
+            self.token_stats['patient_token']['input'].extend(patient_token['prompt_tokens'])
+            self.token_stats['patient_token']['output'].extend(patient_token['completion_tokens'])
+            if 'reasoning_tokens' in patient_token:
+                self.token_stats['patient_token']['reasoning'].extend(patient_token['reasoning_tokens'])
+        if admin_staff_token:
+            self.token_stats['admin_staff_token']['input'].extend(admin_staff_token['prompt_tokens'])
+            self.token_stats['admin_staff_token']['output'].extend(admin_staff_token['completion_tokens'])
+            if 'reasoning_tokens' in admin_staff_token:
+                self.token_stats['admin_staff_token']['reasoning'].extend(admin_staff_token['reasoning_tokens'])
+        if supervisor_token:
+            self.token_stats['supervisor_token']['input'].extend(supervisor_token['prompt_tokens'])
+            self.token_stats['supervisor_token']['output'].extend(supervisor_token['completion_tokens'])
+            if 'reasoning_tokens' in supervisor_token:
+                self.token_stats['supervisor_token']['reasoning'].extend(supervisor_token['reasoning_tokens'])
+    def _init_task_models(self, model: str, vllm_endpoint: Optional[str] = None) -> Tuple[str, str, bool]:
+        """
+        Initialize the model for the task.
+        Args:
+            model (str): The model name.
+            vllm_endpoint (Optional[str], optional): The VLLM endpoint URL. Defaults to None.
+        Returns:
+            Tuple[str, str, bool]: The model name, VLLM endpoint URL, vllm usage flag.
+        """
+        if any(keyword in model.lower() for keyword in ['gemini', 'gpt']):
+            return model, None, False
+        else:
+            assert vllm_endpoint is not None, log('VLLM endpoint must be provided for non-Gemini/GPT models.', 'error')
+            return model, vllm_endpoint, True
+class OutpatientFirstIntake(FirstVisitOutpatientTask):
+    def __init__(self,
+                 patient_model: str,
+                 admin_staff_model: str,
+                 supervisor_agent: Optional[SupervisorAgent] = None,
+                 intake_max_inference: int = 5,
+                 max_retries: int = 8,
+                 admin_staff_last_task_user_prompt_path: Optional[str] = None,
+                 patient_vllm_endpoint: Optional[str] = None,
+                 admin_staff_vllm_endpoint: Optional[str] = None):
+        super().__init__()
+        # Initialize variables
+        self.name = 'intake'
+        self.patient_model, self.patient_vllm_endpoint, self.patient_use_vllm \
+            = self._init_task_models(patient_model, patient_vllm_endpoint)
+        self.admin_staff_model, self.admin_staff_vllm_endpoint, self.admin_staff_use_vllm \
+            = self._init_task_models(admin_staff_model, admin_staff_vllm_endpoint)
+        self.use_supervisor = True if isinstance(supervisor_agent, SupervisorAgent) else False
+        self.supervisor_client = supervisor_agent if self.use_supervisor else None
+        task_mechanism = 'Staff + Supervisor' if self.use_supervisor else 'Staff'
+        self.max_inferences = intake_max_inference
+        self.max_retries = max_retries
+        self._init_last_task_prompt(admin_staff_last_task_user_prompt_path)
+        self.patient_reasoning_kwargs = {'reasoning_effort': 'low'} if 'gpt-5' in self.patient_model.lower() else {}
+        self.staff_reasoning_kwargs = {'reasoning_effort': 'low'} if 'gpt-5' in self.admin_staff_model.lower() else {}
+        log(f'Patient intake tasks are conducted by {colorstr(task_mechanism)}')
+    def _init_last_task_prompt(self, admin_staff_last_task_user_prompt_path: Optional[str] = None) -> str:
+        """
+        Initialize the user prompt for the admnistration staff agent's last task.
+        Args:
+            admin_staff_last_task_user_prompt_path (Optional[str], optional): Path to a custom user prompt file.
+                                                                              If not provided, the default user prompt will be used. Defaults to None.
+        Raises:
+            FileNotFoundError: If the specified user prompt file does not exist.
+        Returns:
+            str: The user prompt.
+        """
+        if not self.use_supervisor:
+            if not admin_staff_last_task_user_prompt_path:
+                prompt_file_name = 'intake_staff_task_user.txt'
+                file_path = resources.files("h_adminsim.assets.prompts").joinpath(prompt_file_name)
+                self.last_task_user_prompt = file_path.read_text()
+            else:
+                if not os.path.exists(admin_staff_last_task_user_prompt_path):
+                    raise FileNotFoundError(colorstr("red", f"User prompt file not found: {admin_staff_last_task_user_prompt_path}"))
+                with open(admin_staff_last_task_user_prompt_path, 'r') as f:
+                    self.last_task_user_prompt = f.read()
+        else:
+            if admin_staff_last_task_user_prompt_path:
+                log('The admin_staff_last_task_user_prompt_path setting is ignored when using supervisor model.', 'warning')
+    @staticmethod
+    def postprocessing_department(text: str) -> str:
+        """
+        Post-processing method of text output, especially for the department decision.
+        Args:
+            text (str): Text input.
+        Returns:
+            str: Post-processed text output.
+        """
+        try:
+            pattern = re.compile(r'Answer:\s*\d+\.\s*(.+)')
+            text = pattern.search(text).group(1)
+        except:
+            text = 'wrong'
+        return text
+    @staticmethod
+    def postprocessing_information(text: str) -> Union[str, dict]:
+        """
+        Post-processing method of text output, especially for the patient information extraction.
+        Args:
+            text (str): Text input.
+        Returns:
+            Union[str, dict]: A dictionary if the text is valid JSON, otherwise the original string.
+        """
+        try:
+            if isinstance(text, str):
+                match = re.search(r'```json\s*(\{.*?\})\s*```', text, re.DOTALL)
+                if match:
+                    json_str = match.group(1)
+                    text_dict = json.loads(json_str)
+                else:
+                    try:
+                        text_dict = json.loads(text)
+                    except:
+                        return text
+            else:
+                text_dict = text
+            assert len(text_dict) == 6 and all(k in text_dict for k in ['name', 'gender', 'phone_number', 'personal_id', 'address', 'department'])   # Basic sanity check
+            return text_dict
+        except:
+            return str(text)
+    def _department_decision(self, prediction_department: str, prediction_supervison: Union[str, dict], gt_department: str) -> Tuple[str, list[str]]:
+        """
+        Determine the final department decision by considering both
+        the interaction agent result and the supervisor agent result.
+        Args:
+            prediction_department (str): The department predicted by the interaction agent.
+            prediction_supervison (Union[str, dict]): The supervisor agent's result.
+                If this is a dictionary, it should contain a 'department' field.
+            gt_department (str): The ground truth department for the patient.
+        Returns:
+            str: The final department decision.
+        """
+        try:
+            sup_department = prediction_supervison.pop('department')
+            if prediction_department == sup_department:
+                trial = ['match']
+            else:
+                if prediction_department in gt_department and sup_department not in gt_department:
+                    trial = ['mismatch - worse']
+                elif prediction_department not in gt_department and sup_department in gt_department:
+                    trial = ['mismatch - better']
+                else:
+                    trial = ['mismatch - both wrong']
+            return sup_department, trial
+        except:
+            return prediction_department, ['supervisor error']
+    def __call__(self, data_pair: Tuple[dict, dict], agent_test_data: dict, agent_results: dict, environment, verbose: bool = False) -> dict:
+        """
+        Estimates the most appropriate medical department for each patient using an LLM agent.
+        Args:
+            data_pair (Tuple[dict, dict]): A pair of ground truth and patient data for agent simulation.
+            agent_test_data (dict): A dictionary containing test data for a single hospital.
+                Expected to include:
+                    - 'department': Dictionary of available departments.
+            agent_results (dict): Placeholder for compatibility; not used in this method.
+            environment (HospitalEnvironment): Hospital environment instance to manage patient schedules.
+            verbose (bool): Whether logging the each result or not.
+        Returns:
+            dict: A dictionary with:
+                - 'gt': List of ground-truth departments.
+                - 'pred': List of predicted departments from the LLM agent.
+                - 'status': List of booleans indicating whether each prediction correct.
+                - 'status_code': List of status codes explaining each status.
+        """
+        gt, test_data = data_pair
+        departments = list(agent_test_data['department'].keys())
+        results = init_result_dict()
+        sanity_checker = SanityChecker()
+        # Append a ground truth
+        name, gender, birth_date, telecom, personal_id, address = \
+            gt['patient'], gt['gender'], gt['birthDate'], gt['telecom'][0]['value'], gt['identifier'][0]['value'], gt['address'][0]['text']
+        gt_data = {
+            'patient': {
+                'name': name,
+                'gender': gender,
+                'phone_number': telecom,
+                'personal_id': personal_id,
+                'address': address,
+            },
+            'department': gt['department']
+        }
+        results['gt'].append(gt_data)
+        # LLM call: Conversation and department decision
+        department_candidates = test_data['constraint']['symptom']['department']
+        if test_data['constraint']['symptom_level'] == 'simple':
+            medical_history = "None. This is the patient's first visit."
+            diagnosis = "Unknown for now, as this is the patient's first visit to the hospital."
+        elif test_data['constraint']['symptom_level'] == 'with_history':
+            medical_history = f"Diagnosed with {test_data['constraint']['symptom']['disease']} at a primary or secondary hospital."
+            diagnosis = test_data['constraint']['symptom']['disease']
+        else:
+            log("Patient's symptom level must be either 'simple' or 'with_history'.", "error")
+        # Simulation patient intake
+        patient_agent = PatientAgent(
+            self.patient_model,
+            'outpatient',
+            lang_proficiency_level='B',
+            recall_level='no_history' if test_data['constraint']['symptom_level'] == 'simple' else 'high',
+            use_vllm=self.patient_use_vllm,
+            vllm_endpoint=self.patient_vllm_endpoint,
+            department=department_candidates,
+            name=name,
+            birth_date=birth_date,
+            gender=gender,
+            telecom=telecom,
+            personal_id=personal_id,
+            address=address,
+            medical_history=medical_history,
+            diagnosis=diagnosis,
+            chiefcomplaint=test_data['constraint']['symptom']['symptom'],
+            temperature=0 if not 'gpt-5' in self.patient_model.lower() else 1
+        )
+        admin_staff_agent = IntakeAdminStaffAgent(
+            self.admin_staff_model,
+            departments,
+            max_inferences=self.max_inferences,
+            use_vllm=self.admin_staff_use_vllm,
+            vllm_endpoint=self.admin_staff_vllm_endpoint,
+            temperature=0 if not 'gpt-5' in self.admin_staff_model.lower() else 1
+        )
+        sim_environment = OPFVIntakeSimulation(patient_agent, admin_staff_agent, max_inferences=self.max_inferences)
+        output = run_with_retry(
+            sim_environment.simulate,
+            verbose=False,
+            patient_kwargs=self.patient_reasoning_kwargs,
+            staff_kwargs=self.staff_reasoning_kwargs,
+            max_retries=self.max_retries,
+        )
+        dialogs, patient_token, admin_staff_token = output['dialog_history'], output.get('patient_token_usage'), output.get('admin_staff_token_usage')
+        prediction_department = OutpatientFirstIntake.postprocessing_department(dialogs[-1]['content'])
+        # LLM call: Agent which should extract demographic information of the patient and evaluation the department decision result
+        dialogs = preprocess_dialog(dialogs)
+        if self.use_supervisor:
+            user_prompt = self.supervisor_client.user_prompt_template.format(
+                CONVERSATION=dialogs,
+                DEPARTMENTS=''.join([f'{i+1}. {department}\n' for i, department in enumerate(departments)])
+            )
+            prediction_supervision = run_with_retry(
+                self.supervisor_client,
+                user_prompt,
+                using_multi_turn=False,
+                verbose=False,
+                max_retries=self.max_retries,
+            )
+        else:
+            prediction_supervision = run_with_retry(
+                admin_staff_agent,
+                self.last_task_user_prompt,
+                verbose=False,
+                max_retries=self.max_retries,
+                **self.staff_reasoning_kwargs,
+            )
+        prediction_supervision = OutpatientFirstIntake.postprocessing_information(prediction_supervision)
+        # Append token data
+        self.save_token_data(
+            patient_token,
+            admin_staff_token,
+            supervisor_token=self.supervisor_client.client.token_usages if self.use_supervisor else {}
+        )
+        # Sanity check
+        department, trial = self._department_decision(prediction_department, prediction_supervision, gt['department'])
+        prediction = {'patient': prediction_supervision, 'department': [department]}
+        status, status_code = sanity_checker.intake_check(
+            prediction=prediction,
+            gt=gt_data,
+            conversations=dialogs
+        )
+        if verbose:
+            log(f'GT    : {gt_data}')
+            log(f'Pred  : {prediction}')
+            log(f'Status: {status_code}\n\n\n')
+        # Append results
+        results['pred'].append(prediction)
+        results['status'].append(status)
+        results['status_code'].append(status_code)
+        results['trial'].append(trial)
+        results['dialog'].append(dialogs)
+        return results
+class OutpatientFirstScheduling(FirstVisitOutpatientTask):
+    def __init__(self,
+                 patient_model: str,
+                 admin_staff_model: str,
+                 schedule_cancellation_prob: float = 0.05,
+                 request_early_schedule_prob: float = 0.1,
+                 preference_rejection_prob: float = 0.3,
+                 preference_rejection_prob_decay: float = 0.5,
+                 fhir_integration: bool = False,
+                 scheduling_max_inference: int = 5,
+                 scheduling_strategy: str = 'tool_calling',
+                 max_retries: int = 8,
+                 patient_vllm_endpoint: Optional[str] = None,
+                 admin_staff_vllm_endpoint: Optional[str] = None):
+        super().__init__()
+        # Initialize variables
+        getcontext().prec = 10
+        dotenv_path = find_dotenv(usecwd=True)
+        load_dotenv(dotenv_path, override=True)
+        self.name = 'schedule'
+        self.patient_model, self.patient_vllm_endpoint, self.patient_use_vllm \
+            = self._init_task_models(patient_model, patient_vllm_endpoint)
+        self.admin_staff_model, self.admin_staff_vllm_endpoint, self.admin_staff_use_vllm \
+            = self._init_task_models(admin_staff_model, admin_staff_vllm_endpoint)
+        # Initialize scheduling methods and a staff agent
+        self.admin_staff_agent = SchedulingAdminStaffAgent(
+            target_task='first_outpatient_scheduling',
+            model=self.admin_staff_model,
+            use_vllm=self.admin_staff_use_vllm,
+            vllm_endpoint=self.admin_staff_vllm_endpoint,
+            temperature=0 if not 'gpt-5' in self.admin_staff_model.lower() else 1
+        )
+        # Scheduling parameters
+        self.schedule_cancellation_prob = schedule_cancellation_prob
+        self.request_early_schedule_prob = request_early_schedule_prob
+        self.preference_rejection_prob = preference_rejection_prob
+        self.preference_rejection_prob_decay = preference_rejection_prob_decay
+        # Others
+        self.fhir_integration = fhir_integration
+        self.max_retries = max_retries
+        self.max_inferences = scheduling_max_inference
+        self.scheduling_strategy = scheduling_strategy
+        assert self.scheduling_strategy in ['reasoning', 'tool_calling'], \
+            log('Scheduling strategy must be either `reasoning` or `tool_calling`.', 'error')
+        self.schedule_patient_system_prompt_path = str(resources.files("h_adminsim.assets.prompts").joinpath('schedule_patient_system.txt'))
+        self.cancel_patient_system_prompt_path = str(resources.files("h_adminsim.assets.prompts").joinpath('cancel_patient_system.txt'))
+        self.reschedule_patient_system_prompt_path = str(resources.files("h_adminsim.assets.prompts").joinpath('reschedule_patient_system.txt'))
+        self.patient_reasoning_kwargs = {'reasoning_effort': 'low'} if 'gpt-5' in self.patient_model.lower() else {}
+        self.staff_reasoning_kwargs = {'reasoning_effort': 'low'} if 'gpt-5' in self.admin_staff_model.lower() else {}
+    def _init_simulation(self,
+                         system_prompt_path: str,
+                         environment: HospitalEnvironment,
+                         additional_patient_conditions: dict = {}) -> OPFVScheduleSimulation:
+        """
+        Initialize an outpatient first-visit intake and scheduling simulation.
+        Args:
+            system_prompt_path (str): Path to the system prompt used to initialize the patient agent.
+            environment (HospitalEnvironment): Hospital environment configuration for the simulation.
+            additional_patient_conditions (dict, optional): Additional patient-specific conditions for simulation control.
+        Returns:
+            OPFVIntakeSimulation: Configured outpatient intake and scheduling simulation instance.
+        """
+        patient_agent = PatientAgent(
+            self.patient_model,
+            'outpatient',
+            use_vllm=self.patient_use_vllm,
+            vllm_endpoint=self.patient_vllm_endpoint,
+            system_prompt_path=system_prompt_path,
+            log_verbose=False,
+            additional_patient_conditions=additional_patient_conditions,
+            temperature=0 if not 'gpt-5' in self.patient_model.lower() else 1
+        )
+        sim_environment = OPFVScheduleSimulation(
+            patient_agent=patient_agent,
+            admin_staff_agent=self.admin_staff_agent,
+            metadata=self._metadata,
+            department_data=self._department_data,
+            environment=environment,
+            scheduling_strategy=self.scheduling_strategy,
+            preference_rejection_prob=self.preference_rejection_prob,
+            preference_rejection_prob_decay=self.preference_rejection_prob_decay,
+            fhir_integration=self.fhir_integration,
+            sanity_checker=self.sanity_checker,
+        )
+        return sim_environment
+    def get_intake_information(self, gt: dict, agent_results: dict, doctor_information: dict) -> Tuple[dict, str, bool]:
+        """
+        Extracts the patient name and predicted department from agent results.
+        If predictions are not available, falls back to using ground truth labels.
+        Args:
+            gt (dict): Ground truth data of a patient.
+            agent_results (dict): A dictionary that may contain predicted department results under the key 'department'.
+            doctor_information (dict): Dictionary of doctor data including their existing schedules.
+                                       Each key is a doctor's name, and each value includes a 'schedule' field.
+        Returns:
+            Tuple[dict, str, bool]: Patient information, determined department, either predicted or ground truth and its sanity status.
+        """
+        # Prediction results are existing case
+        try:
+            for i, intake_gt in enumerate(agent_results['intake']['gt']):
+                if gt['patient'] == intake_gt['patient']['name']:
+                    break
+            patient_info = agent_results['intake']['pred'][i]['patient']
+            department = agent_results['intake']['pred'][i]['department'][0]
+            sanity = agent_results['intake']['status'][i]
+            assert gt['patient'] == agent_results['intake']['gt'][i]['patient']['name']
+        # Loading from the ground truth
+        except:
+            log('The predicted department is not given. The ground truth value will be used.', 'warning')
+            patient_info = {
+                'name': gt['patient'],
+                'gender': gt['gender'],
+                'phone_number': gt['telecom'][0]['value'],
+                'personal_id': gt['identifier'][0]['value'],
+                'address': gt['address'][0]['text'],
+            }
+            department = doctor_information[gt['attending_physician']]['department']
+            assert department in gt['department']
+            sanity = True
+        return patient_info, department, sanity
+    def cancellation_request(self,
+                        doctor_information: dict,
+                        environment: HospitalEnvironment,
+                        idx: Optional[int] = None,
+                        verbose: bool = False) -> Tuple[dict, Optional[dict]]:
+        """
+        Cancel a doctor's scheduled appointment.
+        Args:
+            doctor_information (dict): A dictionary containing information about the doctor(s) involved,
+                                       including availability and other relevant details.
+            environment (HospitalEnvironment): Hospital environment.
+            idx (int, optional): Specific patient schedule index.
+            verbose (bool, optional): Whether logging the each result or not. Defaults to False.
+        Returns:
+            Tuple[dict, Optional[dict]]: Updated doctor information and a result dictionary after cancellation.
+        """
+        if idx is None:
+            candidate_idx = [i for i, schedule in enumerate(environment.patient_schedules) if schedule['status'] == 'scheduled']
+            idx = random.choice(candidate_idx) if len(candidate_idx) else -1
+        if idx >= 0:
+            # Ground-truth cancelled schedule
+            cancelled_schedule = environment.patient_schedules[idx]
+            patient = cancelled_schedule['patient']
+            doctor, date, time = cancelled_schedule['attending_physician'], cancelled_schedule['date'], cancelled_schedule['schedule']
+            # Initialize simulation environment for cancellation
+            sim_environment = self._init_simulation(
+                system_prompt_path=self.cancel_patient_system_prompt_path,
+                environment=environment,
+                additional_patient_conditions={
+                    'patient_name': patient,
+                    'doctor_name': doctor,
+                    'date': date,
+                    'start_time': hour_to_hhmmss(time[0])
+                }
+            )
+            # Schedule cancellation simulation
+            doctor_information, result_dict = run_with_retry(
+                sim_environment.canceling_simulate,
+                gt_idx=idx,
+                doctor_information=doctor_information,
+                patient_schedules=environment.patient_schedules,
+                verbose=verbose,
+                max_inferences=self.max_inferences,
+                patient_kwargs=self.patient_reasoning_kwargs,
+                staff_kwargs=self.staff_reasoning_kwargs,
+                max_retries=self.max_retries,
+            )
+            # Successfully canceled
+            if result_dict['status'][0] is not False:   # No GT and correct case
+                # Update waiting list due to cancellation
+                doctor_information, rs_result_dict = self.automatic_waiting_list_update(
+                    sim_environment=sim_environment,
+                    environment=environment,
+                    doctor_information=doctor_information,
+                )
+                # Update result dictionary
+                for key in result_dict.keys():
+                    if len(rs_result_dict[key]):
+                        result_dict[key].append(tuple(rs_result_dict[key]))
+            return doctor_information, result_dict
+        return doctor_information, None
+    def rescheduling_request(self,
+                             doctor_information: dict,
+                             environment: HospitalEnvironment,
+                             idx: Optional[int] = None,
+                             verbose: bool = False) -> Tuple[dict, Optional[dict]]:
+        """
+        Add a patient schedule to the waiting list in the given environment.
+        Args:
+            doctor_information (dict): A dictionary containing information about the doctor(s) involved,
+                                       including availability and other relevant details.
+            environment (HospitalEnvironment): Hospital environment.
+            idx (int, optional): Specific patient schedule index.
+            verbose (bool, optional): Whether logging the each result or not. Defaults to False.
+        Returns:
+            Tuple[dict, Optional[dict]]: Updated doctor information and a result dictionary after cancellation.
+        """
+        result_dict = init_result_dict()
+        if idx is None:
+            candidate_idx = [i for i, schedule in enumerate(environment.patient_schedules) if schedule['status'] == 'scheduled']
+            idx = random.choice(candidate_idx) if len(candidate_idx) else -1
+        if idx >= 0:
+            requested_schedule = environment.patient_schedules[idx]
+            if all(requested_schedule != s[1] for s in environment.waiting_list):
+                # Ground-truth rescheduling requested schedule
+                patient = requested_schedule['patient']
+                doctor, date, time = requested_schedule['attending_physician'], requested_schedule['date'], requested_schedule['schedule']
+                # Initialize simulation environment for rescheduling request
+                sim_environment = self._init_simulation(
+                    system_prompt_path=self.reschedule_patient_system_prompt_path,
+                    environment=environment,
+                    additional_patient_conditions={
+                        'patient_name': patient,
+                        'doctor_name': doctor,
+                        'date': date,
+                        'start_time': hour_to_hhmmss(time[0])
+                    }
+                )
+                # Rescheduling request simulation
+                doctor_information, result_dict = run_with_retry(
+                    sim_environment.rescheduling_simulate,
+                    gt_idx=idx,
+                    doctor_information=doctor_information,
+                    patient_schedules=environment.patient_schedules,
+                    verbose=verbose,
+                    max_inferences=self.max_inferences,
+                    patient_kwargs=self.patient_reasoning_kwargs,
+                    staff_kwargs=self.staff_reasoning_kwargs,
+                    max_retries=self.max_retries,
+                )
+                if result_dict['status'][0] is not False:   # No GT and correct case
+                    if 'patient' in result_dict['pred'][0]:
+                        new_schedule = result_dict['pred'][0]
+                        doctor_information[new_schedule['attending_physician']]['schedule'][new_schedule['date']].append(new_schedule['schedule'])
+                        doctor_information[new_schedule['attending_physician']]['schedule'][new_schedule['date']].sort()
+                        self.update_env(
+                            status=True,
+                            prediction=new_schedule,
+                            environment=environment,
+                        )
+                return doctor_information, result_dict
+            return doctor_information, None
+        return doctor_information, None
+    def automatic_waiting_list_update(self,
+                                      sim_environment: OPFVScheduleSimulation,
+                                      environment: HospitalEnvironment,
+                                      doctor_information: Optional[dict] = None) -> Tuple[dict, dict]:
+        """
+        Automatically update the waiting list by attempting to reschedule patients.
+        Args:
+            sim_environment (OPFVScheduleSimulation): The simulation environment used for scheduling.
+            environment (HospitalEnvironment): Hospital environment.
+            doctor_information (Optional[dict], optional): A dictionary containing information about the doctor(s) involved,
+                                                           including availability and other relevant details. Defaults to None.
+        Returns:
+            Tuple[dict, dict]: Updated doctor information and a result dictionary.
+        """
+        all_result_dict = init_result_dict()
+        for result in sim_environment.automatic_waiting_list_update(
+            doctor_information=doctor_information,
+            **self.staff_reasoning_kwargs,
+        ):
+            doctor_information, result_dict = result['doctor_information'], result['result_dict']
+            if result_dict['status'][0]:
+                new_schedule, original = result_dict['pred'][0], result['original']
+                doctor_information[new_schedule['attending_physician']]['schedule'][new_schedule['date']].append(new_schedule['schedule'])
+                doctor_information[new_schedule['attending_physician']]['schedule'][new_schedule['date']].sort()
+                self.update_env(
+                    status=True,
+                    prediction=new_schedule,
+                    environment=environment,
+                )
+                log(f'{colorstr("[RESCHEDULED]")}: {original} is rescheduled to {new_schedule}')
+            all_result_dict['gt'].extend(result_dict['gt'])
+            all_result_dict['pred'].extend(result_dict['pred'])
+            all_result_dict['status'].extend(result_dict['status'])
+            all_result_dict['status_code'].extend(result_dict['status_code'])
+            all_result_dict['dialog'].extend(result_dict['dialog'])
+        return doctor_information, all_result_dict
+    def update_env(self,
+                   status: bool,
+                   prediction: Union[dict, str],
+                   environment: HospitalEnvironment,
+                   patient_information: Optional[dict] = None):
+        """
+        Update the simulation environment with scheduling results and optionally synchronize FHIR resources.
+        Args:
+            status (bool): Whether the scheduling task was successful. If True, FHIR resources may be updated.
+            prediction (Union[dict, str]): The predicted scheduling result (e.g., patient schedule information).
+            environment (HospitalEnvironment): The environment instance to be updated (must implement `update_env`).
+            patient_information (Optional[dict], optional): Patient-related predicted (or GT) information to generate FHIR Patient resources. Defaults to None.
+        """
+        # POST/PUT to FHIR
+        fhir_patient, fhir_appointment = None, None
+        if status and self.fhir_integration:
+            if patient_information is not None:
+                fhir_patient = DataConverter.data_to_patient(
+                    {
+                        'metadata': deepcopy(self._metadata),
+                        'department': deepcopy(self._department_data),
+                        'patient': {
+                            prediction['patient']: {
+                                'department': prediction['department'],
+                                'gender': patient_information['gender'],
+                                'telecom': [{'system': 'phone', 'value': patient_information['phone_number'], 'use': 'mobile'}],
+                                'birthDate': personal_id_to_birth_date(patient_information['personal_id']),
+                                'identifier': [{'value': patient_information['personal_id'], 'use': 'official'}],
+                                'address': [{'type': 'postal', 'text': patient_information['address'], 'use': 'home'}],
+                            }
+                        }
+                    }
+                )[0]
+            fhir_appointment = DataConverter.get_fhir_appointment(data={'metadata': deepcopy(self._metadata),
+                                                                        'department': deepcopy(self._department_data),
+                                                                        'information': deepcopy(prediction)})
+        environment.update_env(
+            status=status,
+            patient_schedule=prediction,
+            fhir_resources={'Patient': fhir_patient, 'Appointment': fhir_appointment}
+        )
+    def __call__(self, data_pair: Tuple[dict, dict], agent_test_data: dict, agent_results: dict, environment, verbose: bool = False) -> dict:
+        """
+        This method uses agent test data to prompt an LLM for scheduling decisions, post-processes
+        the output, runs sanity checks on predicted schedules, and collects the results for evaluation.
+        Args:
+            data_pair (Tuple[dict, dict]): A pair of ground truth and patient data for agent simulation.
+            agent_test_data (dict): Dictionary containing test data and metadata for a single hospital.
+                Expected keys include:
+                    - 'metadata': A dict containing start_hour, end_hour, and interval_hour under 'time'.
+                    - 'agent_data': A list of (ground_truth, test_data) pairs.
+                    - 'doctor': A dictionary of doctor profiles with department and schedule info.
+            agent_results (dict): Optional dictionary containing prior department predictions.
+                Used to extract department-level guidance per patient. Can be empty.
+            environment (HospitalEnvironment): Hospital environment instance to manage patient schedules.
+            verbose (bool, option): Whether logging the each result or not.
+        Returns:
+            dict: A dictionary with three keys:
+                - 'gt': List of ground truth results, each including patient info, attending physician, department, and schedule.
+                - 'pred': List of predicted results (either valid dict or fallback string).
+                - 'status': List of booleans indicating whether each prediction passed sanity checks.
+                - 'status_code': List of status codes explaining each status.
+        """
+        gt, test_data = data_pair
+        self._metadata = agent_test_data.get('metadata')
+        self._department_data = agent_test_data.get('department')
+        self._START_HOUR = self._metadata.get('time').get('start_hour')
+        self._END_HOUR = self._metadata.get('time').get('end_hour')
+        self._TIME_UNIT = self._metadata.get('time').get('interval_hour')
+        self.sanity_checker = SanityChecker(self._START_HOUR, self._END_HOUR, self._TIME_UNIT)
+        doctor_information = environment.get_general_doctor_info_from_fhir() if self.fhir_integration else agent_test_data.get('doctor')
+        patient_info, department, sanity = self.get_intake_information(gt, agent_results, doctor_information)
+        self.rules = SchedulingRule(self._metadata, self._department_data, environment, self.fhir_integration)
+        results = init_result_dict()
+        # Make scheduling GT list
+        gt_data = [
+            {
+                'patient': patient_info['name'] if sanity else gt.get('patient'),
+                'department': department if sanity else gt.get('department'),
+                'preference': preference,
+                'preferred_doctor': gt.get('attending_physician') if preference == 'doctor' else "Doesn't matter",
+                'valid_from': gt.get('valid_from') if preference == 'date' else None,
+            } for preference in gt.get('preference')
+        ]
+        staff_known_data = [
+            {
+                'patient': patient_info['name'],
+                'department': department,
+                'patient_intention': None,
+            } for _ in range(len(gt_data))
+        ]
+        # If the precedent department data is wrong, continue
+        if not sanity:
+            results['gt'].append(gt_data)
+            results['pred'].append({})
+            results['status'].append(False)
+            results['status_code'].append(STATUS_CODES['preceding'])
+            return results
+        #################################################### Regular Scheudling Simulation ####################################################
+        # Initialize the simulation environment using the first preference data
+        preference = gt_data[0].get('preference')
+        preference_desc = PREFERENCE_PHRASE_PATIENT[preference] if preference != 'date' \
+                    else PREFERENCE_PHRASE_PATIENT[preference].format(date=gt_data[0].get('valid_from'))
+        sim_environment = self._init_simulation(
+            system_prompt_path=self.schedule_patient_system_prompt_path,
+            environment=environment,
+            additional_patient_conditions={
+                'preference': preference,
+                'preference_desc': preference_desc,
+                'preferred_doctor': gt_data[0]['preferred_doctor'],
+            }
+        )
+        # Simulate the main scheduling task
+        doctor_information, result_dict = run_with_retry(
+            sim_environment.scheduling_simulate,
+            gt_data=gt_data,
+            staff_known_data=staff_known_data,
+            doctor_information=doctor_information,
+            verbose=verbose,
+            patient_kwargs=self.patient_reasoning_kwargs,
+            staff_kwargs=self.staff_reasoning_kwargs,
+            max_retries=self.max_retries,
+        )
+        prediction, status, status_code = \
+            result_dict['pred'][0], result_dict['status'][0], result_dict['status_code'][0]
+        if verbose:
+            log(f'Pred  : {prediction}')
+            log(f'Status: {status_code}')
+            log(f'Final Status: {status_code}\n\n\n')
+        # Update the simulation environment and the doctor information in the agent test data
+        if status:
+            doctor_information[prediction['attending_physician']]['schedule'][prediction['date']].append(prediction['schedule'])
+            doctor_information[prediction['attending_physician']]['schedule'][prediction['date']].sort()
+        self.update_env(
+            status=status,
+            prediction=prediction,
+            environment=environment,
+            patient_information=patient_info,
+        )
+        agent_test_data['doctor'] = doctor_information
+        # Append results
+        for key in result_dict.keys():
+            results[key] += result_dict[key]
+        #######################################################################################################################################
+        # Other events
+        ## Simulate the schedule cancellation requests
+        if random.random() < self.schedule_cancellation_prob:
+            doctor_information, result_dict = self.cancellation_request(
+                doctor_information=doctor_information,
+                environment=environment,
+                verbose=verbose,
+            )
+            if result_dict is not None:
+                agent_test_data['doctor'] = doctor_information
+                results['gt'].extend(result_dict['gt'])
+                results['pred'].extend(result_dict['pred'])
+                results['status'].extend(result_dict['status'])
+                results['status_code'].extend(result_dict['status_code'])
+                results['dialog'].extend(result_dict['dialog'])
+                if verbose:
+                    log(f'Pred  : {result_dict["pred"]}')
+                    log(f'Status: {result_dict["status_code"]}')
+                    log(f'Final Status: {result_dict["status_code"]}\n\n\n')
+        ## Simulate the resecheduling requests
+        if random.random() < self.request_early_schedule_prob:
+            doctor_information, result_dict = self.rescheduling_request(
+                doctor_information=doctor_information,
+                environment=environment,
+                verbose=verbose
+            )
+            if result_dict is not None:
+                agent_test_data['doctor'] = doctor_information
+                results['gt'].extend(result_dict['gt'])
+                results['pred'].extend(result_dict['pred'])
+                results['status'].extend(result_dict['status'])
+                results['status_code'].extend(result_dict['status_code'])
+                results['dialog'].extend(result_dict['dialog'])
+                if verbose:
+                    log(f'Pred  : {result_dict["pred"]}')
+                    log(f'Status: {result_dict["status_code"]}')
+                    log(f'Final Status: {result_dict["status_code"]}\n\n\n')
+        return results