PyPI - evalsense - Versions diffs - 0.1.0__py3-none-any.whl - Mend

evalsense 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

evalsense/__init__.py +0 -0
evalsense/cli/__init__.py +0 -0
evalsense/cli/__main__.py +4 -0
evalsense/cli/datasets.py +18 -0
evalsense/cli/main.py +25 -0
evalsense/constants.py +33 -0
evalsense/dataset_config/ACI-BENCH.yml +51 -0
evalsense/datasets/__init__.py +23 -0
evalsense/datasets/dataset_config.py +302 -0
evalsense/datasets/dataset_manager.py +292 -0
evalsense/datasets/managers/__init__.py +3 -0
evalsense/datasets/managers/aci_bench.py +83 -0
evalsense/evaluation/__init__.py +25 -0
evalsense/evaluation/evaluator.py +107 -0
evalsense/evaluation/evaluators/__init__.py +41 -0
evalsense/evaluation/evaluators/bertscore.py +273 -0
evalsense/evaluation/evaluators/bleu.py +159 -0
evalsense/evaluation/evaluators/g_eval.py +272 -0
evalsense/evaluation/evaluators/qags.py +910 -0
evalsense/evaluation/evaluators/rouge.py +134 -0
evalsense/evaluation/experiment.py +228 -0
evalsense/generation/__init__.py +4 -0
evalsense/generation/generation_steps.py +11 -0
evalsense/generation/model_config.py +70 -0
evalsense/logging.py +61 -0
evalsense/py.typed +0 -0
evalsense/tasks/__init__.py +7 -0
evalsense/tasks/task_preprocessor.py +106 -0
evalsense/utils/__init__.py +0 -0
evalsense/utils/dict.py +14 -0
evalsense/utils/files.py +249 -0
evalsense/utils/huggingface.py +20 -0
evalsense/utils/text.py +274 -0
evalsense/workflow/__init__.py +9 -0
evalsense/workflow/analysers/__init__.py +11 -0
evalsense/workflow/analysers/metric_correlation_analyser.py +201 -0
evalsense/workflow/analysers/tabular_analyser.py +93 -0
evalsense/workflow/pipeline.py +529 -0
evalsense/workflow/project.py +426 -0
evalsense/workflow/result_analyser.py +31 -0
evalsense-0.1.0.dist-info/METADATA +139 -0
evalsense-0.1.0.dist-info/RECORD +45 -0
evalsense-0.1.0.dist-info/WHEEL +4 -0
evalsense-0.1.0.dist-info/entry_points.txt +2 -0
evalsense-0.1.0.dist-info/licenses/LICENCE +21 -0

evalsense/__init__.py ADDED Viewed

File without changes

evalsense/cli/__init__.py ADDED Viewed

File without changes

evalsense/cli/__main__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from evalsense.cli.main import app
+from evalsense.constants import APP_NAME
+app(prog_name=APP_NAME)

evalsense/cli/datasets.py ADDED Viewed

@@ -0,0 +1,18 @@
+import typer
+datasets_app = typer.Typer(
+    no_args_is_help=True,
+    help="Manage datasets for EvalSense.",
+)
+@datasets_app.command(no_args_is_help=True)
+def get(name: str):
+    """
+    Download and prepare a dataset.
+    """
+    print(f"Downloading and preparing dataset {name}.")
+if __name__ == "__main__":
+    datasets_app()

evalsense/cli/main.py ADDED Viewed

@@ -0,0 +1,25 @@
+import typer
+from typing_extensions import Annotated
+from evalsense.cli.datasets import datasets_app
+app = typer.Typer(
+    no_args_is_help=True,
+    help="EvalSense: A tool for evaluating LLM performance on healthcare tasks.",
+)
+app.add_typer(datasets_app, name="datasets")
+@app.command(no_args_is_help=True)
+def run(
+    model: Annotated[str, typer.Option("--model", "-m")],
+    dataset: Annotated[str, typer.Option("--dataset", "-d")],
+):
+    """
+    Run a model on a dataset.
+    """
+    print(f"Running model {model} on dataset {dataset}.")
+if __name__ == "__main__":
+    app()

evalsense/constants.py ADDED Viewed

@@ -0,0 +1,33 @@
+import os
+from pathlib import Path
+from platformdirs import user_cache_dir
+# Application metadata
+APP_NAME = "evalsense"
+APP_AUTHOR = "NHS"
+USER_AGENT = "EvalSense/0.1.0"
+# Datasets
+DEFAULT_VERSION_NAME = "default"
+DEFAULT_HASH_TYPE = "sha256"
+if "OPENAI_API_KEY" in os.environ:
+    OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
+else:
+    OPENAI_API_KEY = None
+if "EVALSENSE_STORAGE_DIR" in os.environ:
+    STORAGE_PATH = Path(os.environ["EVALSENSE_STORAGE_DIR"])
+else:
+    STORAGE_PATH = Path(user_cache_dir(APP_NAME, APP_AUTHOR))
+DATA_PATH = STORAGE_PATH / "datasets"
+MODELS_PATH = STORAGE_PATH / "models"
+PROJECTS_PATH = STORAGE_PATH / "projects"
+if "HF_HUB_CACHE" not in os.environ:
+    os.environ["HF_HUB_CACHE"] = str(STORAGE_PATH / "huggingface")
+DATASET_CONFIG_PATHS = [Path(__file__).parent / "dataset_config"]
+if "DATASET_CONFIG_PATH" in os.environ:
+    for directory in os.environ["DATASET_CONFIG_PATH"].split(os.pathsep):
+        DATASET_CONFIG_PATHS.append(Path(directory))

evalsense/dataset_config/ACI-BENCH.yml ADDED Viewed

@@ -0,0 +1,51 @@
+name: ACI-BENCH
+description: "Dataset for benchmarking clinical note generation from doctor-patient dialogue."
+config_version: "v1"
+default_version: "5d3cd4d8a25b4ebb5b2b87c3923a7b2b7150e33d"
+source:
+  online: true
+  url_template: "https://raw.githubusercontent.com/wyim/aci-bench/{version}/data/challenge_data/{filename}"
+  requires_auth: false
+versions:
+  - name: 5d3cd4d8a25b4ebb5b2b87c3923a7b2b7150e33d
+    splits:
+      - name: train
+        files:
+          - name: train.csv
+            hash: "6c778d4ac5e6cc6f1964786f9286e8d765c210f22ed6b57f83aff8497409cea4"
+            hash_type: sha256
+          - name: train_metadata.csv
+            hash: "7da650e223f04ff6bf1666cb62d52ebd83ce71ecfc4b2311cbe64d9f3ab19d83"
+            hash_type: sha256
+      - name: valid
+        files:
+          - name: valid.csv
+            hash: "6629e89e3fb409d2b3eceab60dc7b32fe1d3fb8d4e07795039284965522aa4d0"
+            hash_type: sha256
+          - name: valid_metadata.csv
+            hash: "ae4c7eef6fc97e22f447c33e4546691715821e2a671ab53d80eb1ee5598e2914"
+            hash_type: sha256
+      - name: test1
+        files:
+          - name: clinicalnlp_taskB_test1.csv
+            hash: "5cc4008e68545f84913744a8e493a58bdf17ba7e1b7a0be46d6943d6bfca9471"
+            hash_type: sha256
+          - name: clinicalnlp_taskB_test1_metadata.csv
+            hash: "6960581701816c6dbe1aea8a53df6cff2f1ca92b24b036f6303242ca681cbafd"
+            hash_type: sha256
+      - name: test2
+        files:
+          - name: clinicalnlp_taskC_test2.csv
+            hash: "599e3330a14e25a0e056aee1365ffac7ebe50058f15821eae42a5513c2bb5a4f"
+            hash_type: sha256
+          - name: clinicalnlp_taskC_test2_metadata.csv
+            hash: "60e799ee5033767e9f5c2e9c3d84f64366628e2aae8637cd5d96ca29ba01b83c"
+            hash_type: sha256
+      - name: test3
+        files:
+          - name: clef_taskC_test3.csv
+            hash: "d3c18362a42124ea2bd1b2b4b66ba76a11bb123dfdb416471ae3b5924d1428ec"
+            hash_type: sha256
+          - name: clef_taskC_test3_metadata.csv
+            hash: "e1bec9323b2bed8e544ead77fae6251c4709bc9ffe2d0de346d843334760736b"
+            hash_type: sha256

evalsense/datasets/__init__.py ADDED Viewed

@@ -0,0 +1,23 @@
+from evalsense.datasets.dataset_config import (
+    OnlineSource,
+    LocalSource,
+    FileMetadata,
+    SplitMetadata,
+    VersionMetadata,
+    DatasetMetadata,
+    DatasetConfig,
+)
+from evalsense.datasets.dataset_manager import DatasetManager, DatasetRecord
+__all__ = [
+    "DatasetManager",
+    "DatasetRecord",
+    "DatasetConfig",
+    "OnlineSource",
+    "LocalSource",
+    "FileMetadata",
+    "SplitMetadata",
+    "VersionMetadata",
+    "DatasetMetadata",
+]

evalsense/datasets/dataset_config.py ADDED Viewed

@@ -0,0 +1,302 @@
+from pathlib import Path
+from typing import Literal, Optional
+from typing_extensions import override
+import warnings
+from pydantic import BaseModel, field_validator
+import yaml
+from evalsense.constants import (
+    DEFAULT_HASH_TYPE,
+    DATASET_CONFIG_PATHS,
+)
+from evalsense.utils.dict import deep_update
+from evalsense.utils.files import to_safe_filename
+# TODO: Handle folders
+class OnlineSource(BaseModel):
+    """The online source of the dataset file(s).
+    Attributes:
+        url_template (str): The URL template for the dataset file(s),
+            optionally taking a version and filename
+        requires_auth (bool, optional): Whether accessing the dataset file(s)
+            requires authentication
+    """
+    online: Literal[True]
+    url_template: str
+    requires_auth: bool = False
+class LocalSource(BaseModel):
+    """The local source of the dataset file(s).
+    Attributes:
+        path (str): The path to the dataset file(s)
+    """
+    online: Literal[False]
+    path: Path
+class FileMetadata(BaseModel):
+    """The metadata for a dataset file.
+    Attributes:
+        name (str): The name of the dataset file
+        hash (str, optional): The hash of the dataset file
+        hash_type (str): The type of hash used for the dataset file
+        source (OnlineSource | LocalSource, optional): The immediate source of
+            the dataset file (use `effective_source` to access the effective source,
+            which may be inherited)
+        parent (SplitMetadata): The parent split metadata
+    """
+    name: str
+    hash: str | None = None
+    hash_type: str = DEFAULT_HASH_TYPE
+    source: OnlineSource | LocalSource | None = None
+    parent: Optional["SplitMetadata"] = None
+    @property
+    def effective_source(self) -> OnlineSource | LocalSource:
+        """The effective source of the dataset file.
+        Returns:
+            (OnlineSource | LocalSource): The effective source.
+        """
+        if self.source is not None:
+            return self.source
+        if self.parent is None:
+            raise RuntimeError("Parent metadata not filled. Please report this issue.")
+        return self.parent.effective_source
+class SplitMetadata(BaseModel):
+    """The metadata for a dataset split.
+    Attributes:
+        name (str): The name of the dataset split
+        files (dict[str, FileMetadata]): The dataset files in the split
+        source (OnlineSource | LocalSource, optional): The immediate source of
+            the dataset split (use `effective_source` to access the effective source,
+            which may be inherited)
+        parent (VersionMetadata): The parent version metadata
+    """
+    name: str
+    files: dict[str, FileMetadata]
+    source: OnlineSource | LocalSource | None = None
+    parent: Optional["VersionMetadata"] = None
+    @field_validator("files", mode="before")
+    @classmethod
+    def convert_list_to_dict(cls, files):
+        if isinstance(files, list):
+            return {file["name"]: file for file in files}
+        return files
+    @override
+    def model_post_init(self, _):
+        for file in self.files.values():
+            file.parent = self
+    @property
+    def effective_source(self) -> OnlineSource | LocalSource:
+        """The effective source of the dataset split.
+        Returns:
+            (OnlineSource | LocalSource): The effective source.
+        """
+        if self.source is not None:
+            return self.source
+        if self.parent is None:
+            raise RuntimeError("Parent metadata not filled. Please report this issue.")
+        return self.parent.effective_source
+class VersionMetadata(BaseModel):
+    """The metadata for a dataset version.
+    Attributes:
+        name (str): The name of the dataset version
+        splits (dict[str, SplitMetadata], optional): The dataset splits in the version
+        files (dict[str, FileMetadata], optional): The dataset files in the version
+        source (OnlineSource | LocalSource, optional): The immediate source of
+            the dataset version (use `effective_source` to access the effective source,
+            which may be inherited)
+        parent (DatasetMetadata): The parent dataset metadata
+    """
+    name: str
+    splits: dict[str, SplitMetadata]
+    files: dict[str, FileMetadata] | None = None
+    source: OnlineSource | LocalSource | None = None
+    parent: Optional["DatasetMetadata"] = None
+    @field_validator("splits", "files", mode="before")
+    @classmethod
+    def convert_list_to_dict(cls, vs):
+        if isinstance(vs, list):
+            return {v["name"]: v for v in vs}
+        return vs
+    @override
+    def model_post_init(self, _):
+        for split in self.splits.values():
+            split.parent = self
+    @property
+    def effective_source(self) -> OnlineSource | LocalSource:
+        """The effective source of the dataset version.
+        Returns:
+            (OnlineSource | LocalSource): The effective source.
+        """
+        if self.source is not None:
+            return self.source
+        if self.parent is None:
+            raise RuntimeError("Parent metadata not filled. Please report this issue.")
+        return self.parent.effective_source
+    def get_files(self, splits: list[str]) -> dict[str, FileMetadata]:
+        """Gets the files for the specified splits.
+        Args:
+            splits (list[str]): The names of the splits.
+        Returns:
+            (dict[str, FileMetadata]): The files for the splits.
+        """
+        files = {}
+        if self.files is not None:
+            files.update(self.files)
+        for split_name in splits:
+            if split_name not in self.splits:
+                raise ValueError(
+                    f"Split '{split_name}' not found for version {self.name}."
+                )
+            files.update(self.splits[split_name].files)
+        return files
+class DatasetMetadata(BaseModel):
+    """The metadata for a dataset.
+    Attributes:
+        name (str): The name of the dataset
+        versions (dict[str, VersionMetadata]): The dataset versions
+        source (OnlineSource | LocalSource, optional): The immediate source of
+            the dataset (use `effective_source` to access the effective source,
+            which may be inherited)
+    """
+    name: str
+    versions: dict[str, VersionMetadata]
+    source: OnlineSource | LocalSource | None = None
+    @field_validator("versions", mode="before")
+    @classmethod
+    def convert_list_to_dict(cls, versions):
+        if isinstance(versions, list):
+            return {version["name"]: version for version in versions}
+        return versions
+    @override
+    def model_post_init(self, _):
+        for version in self.versions.values():
+            version.parent = self
+    @property
+    def effective_source(self) -> OnlineSource | LocalSource:
+        """The effective source of the dataset.
+        Returns:
+            (OnlineSource | LocalSource): The effective source.
+        """
+        if self.source is not None:
+            return self.source
+        raise ValueError("No effective source exists.")
+    def get_files(self, version: str, splits: list[str]) -> dict[str, FileMetadata]:
+        """Gets the files for the specified version and splits.
+        Args:
+            version (str): The name of the version.
+            splits (list[str]): The names of the splits.
+        Returns:
+            (dict[str, FileMetadata]): The files for the version and splits.
+        """
+        if version not in self.versions:
+            raise ValueError(f"Version '{version}' not found for dataset {self.name}.")
+        return self.versions[version].get_files(splits)
+    def get_splits(self, version: str) -> dict[str, SplitMetadata]:
+        """Gets the dataset splits for the specified version.
+        Args:
+            version (str): The name of the version.
+        Returns:
+            (dict[str, SplitMetadata]): The splits for the version.
+        """
+        if version not in self.versions:
+            raise ValueError(f"Version '{version}' not found for dataset {self.name}.")
+        return self.versions[version].splits
+class DatasetConfig:
+    """Configuration for a dataset.
+    Attributes:
+        dataset_name (str): The name of the dataset.
+        dataset_metadata (DatasetMetadata): The metadata for the dataset.
+    """
+    def __init__(self, dataset_name: str):
+        """Initializes a new DatasetConfig.
+        Args:
+            dataset_name (str): The name of the dataset.
+        """
+        self.dataset_name = dataset_name
+        config = {}
+        for config_path in DATASET_CONFIG_PATHS:
+            config_file = config_path / (to_safe_filename(dataset_name) + ".yml")
+            if config_file.exists():
+                try:
+                    with open(config_file, "r") as f:
+                        new_config = yaml.safe_load(f)
+                    config = deep_update(config, new_config)
+                except Exception as e:
+                    warnings.warn(
+                        f"Failed to load dataset config from {config_file}: {e}"
+                    )
+                    continue
+        self.dataset_metadata = DatasetMetadata(**config)
+    def get_files(self, version: str, splits: list[str]) -> dict[str, FileMetadata]:
+        """Gets the files for the specified version and splits.
+        Args:
+            version (str): The name of the version.
+            splits (list[str]): The names of the splits.
+        Returns:
+            (dict[str, FileMetadata]): The files for the version and splits.
+        """
+        return self.dataset_metadata.get_files(version, splits)
+    def get_splits(self, version: str) -> dict[str, SplitMetadata]:
+        """Gets the dataset splits for the specified version.
+        Args:
+            version (str): The name of the version.
+        Returns:
+            (dict[str, SplitMetadata]): The splits for the version.
+        """
+        return self.dataset_metadata.get_splits(version)