PyPI - halib - Versions diffs - 0.2.6__tar.gz → 0.2.8__tar.gz - Mend

halib 0.2.6tar.gz → 0.2.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

{halib-0.2.6 → halib-0.2.8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: halib
-Version: 0.2.6
+Version: 0.2.8
 Summary: Small library for common tasks
 Author: Hoang Van Ha
 Author-email: hoangvanhauit@gmail.com
@@ -53,8 +53,9 @@ Dynamic: summary
 # Helper package for coding and automation
-**Version 0.2.6**
+**Version 0.2.8**
 + reorganize packages with most changes in `research` package; also rename `research` to `exp` (package for experiment management and utilities)
++ update `exp/perfcalc.py` to allow save computed performance to csv file (without explicit calling method `calc_perfs`)
 **Version 0.2.1**
 + `research/base_exp`: add `eval_exp` method to evaluate experiment (e.g., model evaluation on test set) after experiment running is done.

{halib-0.2.6 → halib-0.2.8}/README.md RENAMED Viewed

@@ -1,7 +1,8 @@
 # Helper package for coding and automation
-**Version 0.2.6**
+**Version 0.2.8**
 + reorganize packages with most changes in `research` package; also rename `research` to `exp` (package for experiment management and utilities)
++ update `exp/perfcalc.py` to allow save computed performance to csv file (without explicit calling method `calc_perfs`)
 **Version 0.2.1**
 + `research/base_exp`: add `eval_exp` method to evaluate experiment (e.g., model evaluation on test set) after experiment running is done.

{halib-0.2.6 → halib-0.2.8}/halib/exp/core/base_config.py RENAMED Viewed

@@ -42,8 +42,10 @@ class AutoNamedCfg(YAMLWizard, NamedCfg):
             # or handled by the loader.
             pass
 T = TypeVar("T", bound=AutoNamedCfg)
 class BaseSelectorCfg(Generic[T]):
     """
     Base class to handle the logic of selecting an item from a list by name.
@@ -72,6 +74,8 @@ class ExpBaseCfg(ABC, YAMLWizard):
     3 - a method cfg
     """
+    cfg_name: Optional[str] = None
     # Save to yaml fil
     def save_to_outdir(
         self, filename: str = "__config.yaml", outdir=None, override: bool = False
@@ -103,13 +107,24 @@ class ExpBaseCfg(ABC, YAMLWizard):
         """Load a configuration from a custom YAML file."""
         pass
-    @abstractmethod
-    def get_cfg_name(self):
-        """
-        Get the name of the configuration.
-        This method should be implemented in subclasses.
-        """
-        pass
+    def get_cfg_name(self, sep: str = "__", *args, **kwargs) -> str:
+        # auto get the config name from dataset, method, metric
+        # 2. Generate the canonical Config Name
+        name_parts = []
+        general_info = self.get_general_cfg().get_name()
+        dataset_info = self.get_dataset_cfg().get_name()
+        method_info = self.get_method_cfg().get_name()
+        name_parts = [
+            general_info,
+            f"ds_{dataset_info}",
+            f"mt_{method_info}",
+        ]
+        if "extra" in kwargs:
+            extra_info = kwargs["extra"]
+            assert isinstance(extra_info, str), "'extra' kwarg must be a string."
+            name_parts.append(extra_info)
+        self.cfg_name = sep.join(name_parts)
+        return self.cfg_name
     @abstractmethod
     def get_outdir(self):
@@ -120,7 +135,7 @@ class ExpBaseCfg(ABC, YAMLWizard):
         return None
     @abstractmethod
-    def get_general_cfg(self):
+    def get_general_cfg(self) -> NamedCfg:
         """
         Get the general configuration like output directory, log settings, SEED, etc.
         This method should be implemented in subclasses.
@@ -135,6 +150,14 @@ class ExpBaseCfg(ABC, YAMLWizard):
         """
         pass
+    @abstractmethod
+    def get_method_cfg(self) -> NamedCfg:
+        """
+        Get the method configuration.
+        This method should be implemented in subclasses.
+        """
+        pass
     @abstractmethod
     def get_metric_cfg(self) -> NamedCfg:
         """

{halib-0.2.6 → halib-0.2.8}/halib/exp/perf/perfcalc.py RENAMED Viewed

@@ -74,114 +74,191 @@ class PerfCalc(ABC):  # Abstract base class for performance calculation
                 "All dictionaries in raw_data_for_metrics must have the same keys as metric_names"
             )
-    # ! only need to override this method if torchmetrics are not used
-    def calc_exp_perf_metrics(
-        self, metric_names, raw_metrics_data, extra_data=None, *args, **kwargs
-    ):
-        assert isinstance(raw_metrics_data, dict) or isinstance(
-            raw_metrics_data, list
-        ), "raw_data_for_metrics must be a dictionary or a list"
-        if extra_data is not None:
-            assert isinstance(
-                extra_data, type(raw_metrics_data)
-            ), "extra_data must be of the same type as raw_data_for_metrics (dict or list)"
-        # prepare raw_metric data for processing
-        proc_metric_raw_data_ls = (
-            raw_metrics_data
-            if isinstance(raw_metrics_data, list)
-            else [raw_metrics_data.copy()]
-        )
-        self.valid_proc_metric_raw_data(metric_names, proc_metric_raw_data_ls)
-        # prepare extra data for processing
-        proc_extra_data_ls = []
-        if extra_data is not None:
-            proc_extra_data_ls = (
-                extra_data if isinstance(extra_data, list) else [extra_data.copy()]
-            )
-            assert len(proc_extra_data_ls) == len(
-                proc_metric_raw_data_ls
-            ), "extra_data must have the same length as raw_data_for_metrics if it is a list"
-        # validate the extra_data
-        self.valid_proc_extra_data(proc_extra_data_ls)
+    # =========================================================================
+    # 1. Formatting Logic (Decoupled)
+    # =========================================================================
+    def package_metrics(
+        self,
+        metric_results_list: List[dict],
+        extra_data_list: Optional[List[dict]] = None,
+    ) -> List[OrderedDict]:
+        """
+        Pure formatting function.
+        Takes ALREADY CALCULATED metrics and formats them
+        (adds metadata, prefixes keys, ensures column order).
+        """
+        # Normalize extra_data to a list if provided
+        if extra_data_list is None:
+            extra_data_list = [{} for _ in range(len(metric_results_list))]
+        elif isinstance(extra_data_list, dict):
+            extra_data_list = [extra_data_list]
+        assert len(extra_data_list) == len(
+            metric_results_list
+        ), "Length mismatch: metrics vs extra_data"
-        # calculate the metrics output results
-        metrics_backend = self.get_metric_backend()
         proc_outdict_list = []
-        for idx, raw_metrics_data in enumerate(proc_metric_raw_data_ls):
+        for metric_res, extra_item in zip(metric_results_list, extra_data_list):
+            # A. Base Metadata
             out_dict = {
                 "dataset": self.get_dataset_name(),
                 "experiment": self.get_experiment_name(),
             }
-            custom_fields = []
-            if len(proc_extra_data_ls) > 0:
-                # add extra data to the output dictionary
-                extra_data_item = proc_extra_data_ls[idx]
-                out_dict.update(extra_data_item)
-                custom_fields = list(extra_data_item.keys())
-            metric_results = metrics_backend.calc_metrics(
-                metrics_data_dict=raw_metrics_data, *args, **kwargs
-            )
-            metric_results_prefix = {
-                f"metric_{k}": v for k, v in metric_results.items()
-            }
-            out_dict.update(metric_results_prefix)
-            ordered_cols = (
-                REQUIRED_COLS + custom_fields + list(metric_results_prefix.keys())
+            # B. Attach Extra Data
+            out_dict.update(extra_item)
+            custom_fields = list(extra_item.keys())
+            # C. Prefix Metric Keys (e.g., 'acc' -> 'metric_acc')
+            metric_results_prefixed = {f"metric_{k}": v for k, v in metric_res.items()}
+            out_dict.update(metric_results_prefixed)
+            # D. Order Columns
+            all_cols = (
+                REQUIRED_COLS + custom_fields + list(metric_results_prefixed.keys())
             )
-            out_dict = OrderedDict(
-                (col, out_dict[col]) for col in ordered_cols if col in out_dict
+            ordered_out = OrderedDict(
+                (col, out_dict[col]) for col in all_cols if col in out_dict
             )
-            proc_outdict_list.append(out_dict)
+            proc_outdict_list.append(ordered_out)
         return proc_outdict_list
-    #! custom kwargs:
-    #! outfile - if provided, will save the output to a CSV file with the given path
-    #! outdir - if provided, will save the output to a CSV file in the given directory with a generated filename
-    #! return_df - if True, will return a DataFrame instead of a dictionary
-    def calc_perfs(
+    # =========================================================================
+    # 2. Calculation Logic (The Coordinator)
+    # =========================================================================
+    def calc_exp_perf_metrics(
         self,
+        metric_names: List[str],
         raw_metrics_data: Union[List[dict], dict],
         extra_data: Optional[Union[List[dict], dict]] = None,
         *args,
         **kwargs,
-    ) -> Tuple[Union[List[OrderedDict], pd.DataFrame], Optional[str]]:
+    ) -> List[OrderedDict]:
         """
-        Calculate the metrics.
-        This function should be overridden by the subclass if needed.
-        Must return a dictionary with keys as metric names and values as the calculated metrics.
+        Full workflow: Validates raw data -> Calculates via Backend -> Packages results.
         """
-        metric_names = self.get_metric_backend().metric_names
-        out_dict_list = self.calc_exp_perf_metrics(
-            metric_names=metric_names,
-            raw_metrics_data=raw_metrics_data,
-            extra_data=extra_data,
-            *args,
-            **kwargs,
+        # Prepare Raw Data
+        raw_data_ls = (
+            raw_metrics_data
+            if isinstance(raw_metrics_data, list)
+            else [raw_metrics_data]
         )
+        self.valid_proc_metric_raw_data(metric_names, raw_data_ls)
+        # Prepare Extra Data (Validation only)
+        extra_data_ls = None
+        if extra_data:
+            extra_data_ls = extra_data if isinstance(extra_data, list) else [extra_data]
+            self.valid_proc_extra_data(extra_data_ls)
+        # Calculate Metrics via Backend
+        metrics_backend = self.get_metric_backend()
+        calculated_results = []
+        for data_item in raw_data_ls:
+            res = metrics_backend.calc_metrics(
+                metrics_data_dict=data_item, *args, **kwargs
+            )
+            calculated_results.append(res)
+        # Delegate to Formatting
+        return self.package_metrics(calculated_results, extra_data_ls)
+    # =========================================================================
+    # 3. File Saving Logic (Decoupled)
+    # =========================================================================
+    def save_results_to_csv(
+        self, out_dict_list: List[OrderedDict], **kwargs
+    ) -> Tuple[pd.DataFrame, Optional[str]]:
+        """
+        Helper function to convert results to DataFrame and save to CSV.
+        """
         csv_outfile = kwargs.get("outfile", None)
+        # Determine Output Path
         if csv_outfile is not None:
             filePathNoExt, _ = os.path.splitext(csv_outfile)
-            # pprint(f"CSV Outfile Path (No Ext): {filePathNoExt}")
             csv_outfile = f"{filePathNoExt}{CSV_FILE_POSTFIX}.csv"
         elif "outdir" in kwargs:
             csvoutdir = kwargs["outdir"]
             csvfilename = f"{now_str()}_{self.get_dataset_name()}_{self.get_experiment_name()}_{CSV_FILE_POSTFIX}.csv"
             csv_outfile = os.path.join(csvoutdir, csvfilename)
-        # convert out_dict to a DataFrame
+        # Convert to DataFrame
         df = pd.DataFrame(out_dict_list)
-        # get the orders of the columns as the orders or the keys in out_dict
-        ordered_cols = list(out_dict_list[0].keys())
-        df = df[ordered_cols]  # reorder columns
+        if out_dict_list:
+            ordered_cols = list(out_dict_list[0].keys())
+            df = df[ordered_cols]
+        # Save to File
         if csv_outfile:
             df.to_csv(csv_outfile, index=False, sep=";", encoding="utf-8")
-        return_df = kwargs.get("return_df", False)
-        if return_df:  # return DataFrame instead of dict if requested
-            return df, csv_outfile
-        else:
-            return out_dict_list, csv_outfile
+        return df, csv_outfile
+    # =========================================================================
+    # 4. Public API: Standard Calculation
+    # raw_metrics_data: example: [{"preds": ..., "target": ...}, ...]
+    # =========================================================================
+    def calc_perfs(
+        self,
+        raw_metrics_data: Union[List[dict], dict],
+        extra_data: Optional[Union[List[dict], dict]] = None,
+        *args,
+        **kwargs,
+    ) -> Tuple[Union[List[OrderedDict], pd.DataFrame], Optional[str]]:
+        """
+        Standard use case: Calculate metrics AND save to CSV.
+        """
+        metric_names = self.get_metric_backend().metric_names
+        # 1. Calculate & Package
+        out_dict_list = self.calc_exp_perf_metrics(
+            metric_names=metric_names,
+            raw_metrics_data=raw_metrics_data,
+            extra_data=extra_data,
+            *args,
+            **kwargs,
+        )
+        # 2. Save
+        df, csv_outfile = self.save_results_to_csv(out_dict_list, **kwargs)
+        return (
+            (df, csv_outfile)
+            if kwargs.get("return_df", False)
+            else (out_dict_list, csv_outfile)
+        )
+    # =========================================================================
+    # 5. Public API: Manual / External Metrics (The Shortcut)
+    # =========================================================================
+    def save_computed_perfs(
+        self,
+        metrics_data: Union[List[dict], dict],
+        extra_data: Optional[Union[List[dict], dict]] = None,
+        **kwargs,
+    ) -> Tuple[Union[List[OrderedDict], pd.DataFrame], Optional[str]]:
+        # Ensure list format
+        if isinstance(metrics_data, dict):
+            metrics_data = [metrics_data]
+        if isinstance(extra_data, dict):
+            extra_data = [extra_data]
+        # 1. Package (Format)
+        formatted_list = self.package_metrics(metrics_data, extra_data)
+        # 2. Save
+        df, csv_outfile = self.save_results_to_csv(formatted_list, **kwargs)
+        return (
+            (df, csv_outfile)
+            if kwargs.get("return_df", False)
+            else (formatted_list, csv_outfile)
+        )
     @staticmethod
     def default_exp_csv_filter_fn(exp_file_name: str) -> bool:

{halib-0.2.6 → halib-0.2.8}/halib.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: halib
-Version: 0.2.6
+Version: 0.2.8
 Summary: Small library for common tasks
 Author: Hoang Van Ha
 Author-email: hoangvanhauit@gmail.com
@@ -53,8 +53,9 @@ Dynamic: summary
 # Helper package for coding and automation
-**Version 0.2.6**
+**Version 0.2.8**
 + reorganize packages with most changes in `research` package; also rename `research` to `exp` (package for experiment management and utilities)
++ update `exp/perfcalc.py` to allow save computed performance to csv file (without explicit calling method `calc_perfs`)
 **Version 0.2.1**
 + `research/base_exp`: add `eval_exp` method to evaluate experiment (e.g., model evaluation on test set) after experiment running is done.

{halib-0.2.6 → halib-0.2.8}/setup.py RENAMED Viewed

@@ -8,7 +8,7 @@ with open("requirements.txt") as f:
 setuptools.setup(
     name="halib",
-    version="0.2.6",
+    version="0.2.8",
     author="Hoang Van Ha",
     author_email="hoangvanhauit@gmail.com",
     description="Small library for common tasks",