PyPI - morphlabs - Versions diffs - 0.1.0__tar.gz - Mend

morphlabs 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

morphlabs-0.1.0/LICENSE +21 -0
morphlabs-0.1.0/PKG-INFO +51 -0
morphlabs-0.1.0/README.md +29 -0
morphlabs-0.1.0/morphlabs/__init__.py +4 -0
morphlabs-0.1.0/morphlabs/io/__init__.py +3 -0
morphlabs-0.1.0/morphlabs/io/loading.py +112 -0
morphlabs-0.1.0/morphlabs/models/__init__.py +3 -0
morphlabs-0.1.0/morphlabs/models/scientia.py +120 -0
morphlabs-0.1.0/morphlabs.egg-info/PKG-INFO +51 -0
morphlabs-0.1.0/morphlabs.egg-info/SOURCES.txt +15 -0
morphlabs-0.1.0/morphlabs.egg-info/dependency_links.txt +1 -0
morphlabs-0.1.0/morphlabs.egg-info/requires.txt +10 -0
morphlabs-0.1.0/morphlabs.egg-info/top_level.txt +3 -0
morphlabs-0.1.0/pyproject.toml +38 -0
morphlabs-0.1.0/setup.cfg +4 -0
morphlabs-0.1.0/tests/test_loading.py +163 -0
morphlabs-0.1.0/tests/test_scientia.py +332 -0

morphlabs-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 Morphlabs
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

morphlabs-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,51 @@
+Metadata-Version: 2.4
+Name: morphlabs
+Version: 0.1.0
+Summary: Python SDK for Morphlabs biosignal processing API
+Author-email: Morphlabs <support@morphlabs.tech>
+Maintainer-email: Morphlabs <support@morphlabs.tech>
+License-Expression: MIT
+Project-URL: Homepage, https://morphlabs.tech
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: numpy>=1.20.0
+Requires-Dist: requests>=2.28.0
+Requires-Dist: mne>=1.0.0
+Requires-Dist: pandas>=1.3.0
+Requires-Dist: scipy>=1.7.0
+Requires-Dist: tenacity>=9.1.2
+Provides-Extra: dev
+Requires-Dist: pytest>=7.0; extra == "dev"
+Requires-Dist: pytest-cov; extra == "dev"
+Dynamic: license-file
+# morphlabs
+Python SDK for Morphlabs biosignal processing API.
+## Installation
+```bash
+pip install morphlabs
+```
+## Quick Start
+```python
+from morphlabs.models import Scientia
+# Set SCIENTIA_API_KEY environment variable or pass directly
+scientia = Scientia(api_key="your-api-key")
+# Clean EEG data
+cleaned_data = scientia.clean_data("path/to/eeg_file.csv")
+```
+## Documentation
+For full documentation, see [docs.morphlabs.tech](https://docs.morphlabs.tech)
+## License
+MIT

morphlabs-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,29 @@
+# morphlabs
+Python SDK for Morphlabs biosignal processing API.
+## Installation
+```bash
+pip install morphlabs
+```
+## Quick Start
+```python
+from morphlabs.models import Scientia
+# Set SCIENTIA_API_KEY environment variable or pass directly
+scientia = Scientia(api_key="your-api-key")
+# Clean EEG data
+cleaned_data = scientia.clean_data("path/to/eeg_file.csv")
+```
+## Documentation
+For full documentation, see [docs.morphlabs.tech](https://docs.morphlabs.tech)
+## License
+MIT

morphlabs-0.1.0/morphlabs/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .models import Scientia
+__version__ = "0.1.0"
+__all__ = ["Scientia"]

morphlabs-0.1.0/morphlabs/io/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .loading import EEGData
+__all__ = ['EEGData']

morphlabs-0.1.0/morphlabs/io/loading.py ADDED Viewed

@@ -0,0 +1,112 @@
+import numpy as np
+import pandas as pd
+import mne
+from pathlib import Path
+from typing import Optional
+class EEGData:
+    def __init__(self, file_path: str):
+        self.file_path = file_path
+        self._data = None
+        self._channels = None
+        self._pad_amount = None
+        if file_path is not None:
+            if not self.load_data():
+                raise ValueError(f"Failed to load data from {file_path}")
+    def get_data(self) -> Optional[list[np.ndarray]]:
+        return self._data
+    def get_channels(self) -> Optional[int]:
+        return self._channels
+    def get_pad_amount(self) -> Optional[int]:
+        return self._pad_amount
+    def load_data(self) -> bool:
+        self._validate_file_path(self.file_path)
+        match Path(self.file_path).suffix:
+            case '.csv':
+                self.load_data_from_csv(self.file_path)
+                return True
+            case '.edf':
+                self.load_data_from_edf(self.file_path)
+                return True
+            case '.bdf':
+                self.load_data_from_bdf(self.file_path)
+                return True
+            case _:
+                raise ValueError(f"Unsupported file type: {str(self.file_path).split('.')[-1]}, please use .csv, .edf, or .bdf files.")
+    def load_data_from_csv(self, file_path: str):
+        try:
+            data = pd.read_csv(file_path)
+            if data.empty:
+                raise ValueError(f"File is empty: '{file_path}'. Please provide a file with EEG data.")
+            self._channels = len(data.columns)
+            self._data, self._pad_amount = self.segment_data(data.values.T.astype(np.float32))
+            self.verify_montage()
+        except UnicodeDecodeError as e:
+            raise ValueError(f"Failed to load CSV file '{file_path}': File contains invalid characters. Details: {e}")
+        except pd.errors.ParserError as e:
+            raise ValueError(f"Failed to parse CSV file '{file_path}': File may be corrupted or incorrectly formatted. Details: {e}")
+        except pd.errors.EmptyDataError:
+            raise ValueError(f"File is empty: '{file_path}'. Please provide a file with EEG data.")
+        except ValueError:
+            raise
+        except Exception as e:
+            raise ValueError(f"Failed to load CSV file '{file_path}': {type(e).__name__}: {e}")
+    def load_data_from_edf(self, file_path: str):
+        try:
+            raw = mne.io.read_raw_edf(file_path, preload=True, verbose=False)
+            self._channels = len(raw.ch_names)
+            self._data, self._pad_amount = self.segment_data(raw.get_data().astype(np.float32))
+            self.verify_montage()
+        except ValueError:
+            raise
+        except Exception as e:
+            raise ValueError(f"Failed to load EDF file '{file_path}': File may be corrupted or not a valid EDF format. Details: {type(e).__name__}: {e}")
+    def load_data_from_bdf(self, file_path: str):
+        try:
+            raw = mne.io.read_raw_bdf(file_path, preload=True, verbose=False)
+            self._channels = len(raw.ch_names)
+            self._data, self._pad_amount = self.segment_data(raw.get_data().astype(np.float32))
+            self.verify_montage()
+        except ValueError:
+            raise
+        except Exception as e:
+            raise ValueError(f"Failed to load BDF file '{file_path}': File may be corrupted or not a valid BDF format. Details: {type(e).__name__}: {e}")
+    def _validate_file_path(self, file_path: str) -> None:
+        path = Path(file_path)
+        if not path.exists():
+            raise ValueError(f"File not found: '{file_path}'. Please check the file path exists.")
+        if not path.is_file():
+            raise ValueError(f"Path is not a file: '{file_path}'. Please provide a path to a file, not a directory.")
+        if path.stat().st_size == 0:
+            raise ValueError(f"File is empty: '{file_path}'. Please provide a file with EEG data.")
+    def segment_data(self, data: np.ndarray) -> tuple[list[np.ndarray], int]:
+        n_samples = data.shape[1]
+        window_size = 1000
+        segments = []
+        pad_amount = 0
+        for i in range(0, n_samples, window_size):
+            segment = data[:, i:i+window_size]
+            if segment.shape[1] != window_size:
+                pad_amount = window_size - segment.shape[1]
+                pad_width = ((0, 0), (0, pad_amount))
+                segment = np.pad(segment, pad_width, 'constant')
+            segments.append(segment)
+        return segments, pad_amount
+    def verify_montage(self) -> None:
+        if self._channels != 19:
+            raise ValueError(f"Unsupported number of channels: {self._channels}, Scientia currently only supports the 19 channels in the 10-20 system.")

morphlabs-0.1.0/morphlabs/models/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .scientia import Scientia
+__all__ = ['Scientia']

morphlabs-0.1.0/morphlabs/models/scientia.py ADDED Viewed

@@ -0,0 +1,120 @@
+import requests
+import numpy as np
+import os
+import logging
+from typing import Optional
+from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
+from morphlabs.io.loading import EEGData
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+API_ERROR_MESSAGES = {
+    400: "Bad request: The data sent to the API was invalid. Please check your input data format.",
+    401: "Authentication failed: Invalid API key. Please check your SCIENTIA_API_KEY.",
+    403: "Access denied: Your API key does not have permission for this operation. Please contact support.",
+    404: "API endpoint not found: Please check the base_url configuration.",
+    429: "Rate limit exceeded: Too many requests. Please wait a moment and try again.",
+    500: "Server error: The Scientia API encountered an internal error. Please try again later.",
+    502: "Bad gateway: The Scientia API is temporarily unavailable. Please try again later.",
+    503: "Service unavailable: The Scientia API is temporarily down for maintenance. Please try again later.",
+    504: "Gateway timeout: The request took too long. Please try again with smaller data segments.",
+}
+RETRYABLE_STATUS_CODES = [429, 500, 502, 503, 504]
+RUNPOD_API_KEY = "rpa_FFBJYB2SODF8Z3MCXWY5W78L8KI0A4BGQ999JLUZcqkk9u"
+class RetryableAPIError(Exception):
+    pass
+class Scientia:
+    def __init__(self, api_key: Optional[str] = None, base_url: Optional[str] = None):
+        api_key = api_key if api_key is not None else os.getenv("SCIENTIA_API_KEY")
+        if api_key is not None and not api_key.strip():
+            raise ValueError("API key cannot be empty or whitespace. Please provide a valid API key.")
+        self.api_key = api_key
+        if base_url is not None:
+            if not base_url.startswith(("http://", "https://")):
+                raise ValueError(f"Invalid base_url: '{base_url}'. URL must start with http:// or https://")
+            self.base_url = base_url.rstrip("/")
+        else:
+            self.base_url = "https://api.runpod.ai/v2/9ni9hifywn9z73/runsync"
+    def clean_data(self, data_path: str) -> np.ndarray:
+        if self.api_key is None:
+            raise ValueError("API key not found. Please pass the API key as an argument or set the SCIENTIA_API_KEY environment variable.")
+        data_obj = EEGData(data_path)
+        data_samples = data_obj.get_data()
+        pad_amount = data_obj.get_pad_amount()
+        cleaned_samples = []
+        logger.info(f"Cleaning {len(data_samples) * 1000  - pad_amount} samples")
+        for i, sample in enumerate(data_samples):
+            response = self._make_api_request(
+                url=self.base_url,
+                json={"input": {"api_key": self.api_key, "data": sample.tolist()}},
+                timeout=30
+            )
+            logger.info(f"Cleaned {(i + 1) * 1000} of {len(data_samples)*1000 - pad_amount} samples")
+            # Validate response structure
+            try:
+                response_data = response.json()
+            except requests.exceptions.JSONDecodeError:
+                raise ValueError("Invalid response from API: Expected JSON but received invalid data.")
+            # Handle RunPod response format
+            status = response_data.get("status")
+            if status != "COMPLETED":
+                if status in ("IN_QUEUE", "IN_PROGRESS"):
+                    raise ValueError(f"Scientia API request is still processing (status: {status}). Please try again.")
+                elif status == "FAILED":
+                    error_msg = response_data.get("error", "Unknown error")
+                    raise ValueError(f"Scientia API request failed: {error_msg}")
+                elif status == "CANCELLED":
+                    raise ValueError("Scientia API request was cancelled.")
+                else:
+                    raise ValueError(f"Unexpected response status from Scientia API: {status}")
+            output = response_data.get("output")
+            if output is None or "reconstructed" not in output:
+                raise ValueError("Invalid response from API: Missing 'reconstructed' field in response.")
+            cleaned_sample = np.array(output['reconstructed'])
+            if i == len(data_samples) - 1 and pad_amount > 0:
+                cleaned_sample = cleaned_sample[:,:-pad_amount]
+            cleaned_samples.append(cleaned_sample)
+        return self._reconstruct_data(cleaned_samples)
+    def _reconstruct_data(self, cleaned_samples: list[np.ndarray]) -> np.ndarray:
+        return np.concatenate(cleaned_samples, axis=1)
+    @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=15), retry=retry_if_exception_type(RetryableAPIError))
+    def _make_api_request(self, url: str, json: dict, timeout: int) -> requests.Response:
+        headers = {"Authorization": f"Bearer {RUNPOD_API_KEY}"}
+        try:
+            response = requests.post(url, headers=headers, json=json, timeout=timeout)
+        except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
+            raise RetryableAPIError(f"Network error: {e}")
+        if response.status_code in RETRYABLE_STATUS_CODES:
+            raise RetryableAPIError(f"API request failed with status {response.status_code}: {response.text}")
+        if not response.ok:
+            error_msg = API_ERROR_MESSAGES.get(response.status_code, f"API request failed with status {response.status_code}: {response.text}")
+            raise ValueError(error_msg)
+        return response

morphlabs-0.1.0/morphlabs.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,51 @@
+Metadata-Version: 2.4
+Name: morphlabs
+Version: 0.1.0
+Summary: Python SDK for Morphlabs biosignal processing API
+Author-email: Morphlabs <support@morphlabs.tech>
+Maintainer-email: Morphlabs <support@morphlabs.tech>
+License-Expression: MIT
+Project-URL: Homepage, https://morphlabs.tech
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: numpy>=1.20.0
+Requires-Dist: requests>=2.28.0
+Requires-Dist: mne>=1.0.0
+Requires-Dist: pandas>=1.3.0
+Requires-Dist: scipy>=1.7.0
+Requires-Dist: tenacity>=9.1.2
+Provides-Extra: dev
+Requires-Dist: pytest>=7.0; extra == "dev"
+Requires-Dist: pytest-cov; extra == "dev"
+Dynamic: license-file
+# morphlabs
+Python SDK for Morphlabs biosignal processing API.
+## Installation
+```bash
+pip install morphlabs
+```
+## Quick Start
+```python
+from morphlabs.models import Scientia
+# Set SCIENTIA_API_KEY environment variable or pass directly
+scientia = Scientia(api_key="your-api-key")
+# Clean EEG data
+cleaned_data = scientia.clean_data("path/to/eeg_file.csv")
+```
+## Documentation
+For full documentation, see [docs.morphlabs.tech](https://docs.morphlabs.tech)
+## License
+MIT

morphlabs-0.1.0/morphlabs.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,15 @@
+LICENSE
+README.md
+pyproject.toml
+morphlabs/__init__.py
+morphlabs.egg-info/PKG-INFO
+morphlabs.egg-info/SOURCES.txt
+morphlabs.egg-info/dependency_links.txt
+morphlabs.egg-info/requires.txt
+morphlabs.egg-info/top_level.txt
+morphlabs/io/__init__.py
+morphlabs/io/loading.py
+morphlabs/models/__init__.py
+morphlabs/models/scientia.py
+tests/test_loading.py
+tests/test_scientia.py

morphlabs-0.1.0/morphlabs.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

morphlabs-0.1.0/morphlabs.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,10 @@
+numpy>=1.20.0
+requests>=2.28.0
+mne>=1.0.0
+pandas>=1.3.0
+scipy>=1.7.0
+tenacity>=9.1.2
+[dev]
+pytest>=7.0
+pytest-cov

morphlabs-0.1.0/morphlabs.egg-info/top_level.txt ADDED Viewed

@@ -0,0 +1,3 @@
+dist
+morphlabs
+venv

morphlabs-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,38 @@
+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "morphlabs"
+version = "0.1.0"
+description = "Python SDK for Morphlabs biosignal processing API"
+readme = "README.md"
+license = "MIT"
+authors = [
+    {name = "Morphlabs", email = "support@morphlabs.tech"}
+]
+maintainers = [
+    {name = "Morphlabs", email = "support@morphlabs.tech"}
+]
+requires-python = ">=3.8"
+dependencies = [
+    "numpy>=1.20.0",
+    "requests>=2.28.0",
+    "mne>=1.0.0",
+    "pandas>=1.3.0",
+    "scipy>=1.7.0",
+    "tenacity>=9.1.2"
+]
+[project.urls]
+Homepage = "https://morphlabs.tech"
+[project.optional-dependencies]
+dev = ["pytest>=7.0", "pytest-cov"]
+[tool.setuptools.packages.find]
+where = ["."]
+exclude = ["tests", "tests.*"]

morphlabs-0.1.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

morphlabs-0.1.0/tests/test_loading.py ADDED Viewed

@@ -0,0 +1,163 @@
+import pytest
+import numpy as np
+from pathlib import Path
+from morphlabs.io import EEGData
+@pytest.fixture
+def test_data_path():
+    return Path(__file__).parent / "test_data"
+@pytest.fixture
+def valid_edf_2500_samples(test_data_path):
+    return EEGData(test_data_path / "valid_19ch_2500samples.edf")
+@pytest.fixture
+def valid_csv_1000_samples(test_data_path):
+    return EEGData(test_data_path / "valid_19ch_1000samples.csv")
+@pytest.fixture
+def valid_csv_2500_samples(test_data_path):
+    return EEGData(test_data_path / "valid_19ch_2500samples.csv")
+@pytest.fixture
+def valid_bdf_2500_samples(test_data_path):
+    return EEGData(test_data_path / "valid_19ch_2500samples.bdf")
+@pytest.fixture
+def valid_csv_500_samples(test_data_path):
+    return EEGData(test_data_path / "valid_19ch_500samples.csv")
+@pytest.mark.parametrize("fixture_name", [
+    "valid_edf_2500_samples",
+    "valid_csv_1000_samples",
+    "valid_bdf_2500_samples",
+])
+def test_load_file_success(fixture_name, request):
+    data = request.getfixturevalue(fixture_name)
+    assert data.get_data() is not None
+    assert data.get_channels() == 19
+    assert isinstance(data.get_data()[0], np.ndarray)
+def test_load_none_success():
+    data = EEGData(None)
+    assert data.get_data() is None
+    assert data.get_channels() is None
+    assert data.get_pad_amount() is None
+def test_getter_dtype(valid_csv_1000_samples, valid_edf_2500_samples):
+    assert valid_edf_2500_samples.get_data()[0][0].dtype == np.float32
+    assert valid_csv_1000_samples.get_data()[0][0].dtype == np.float32
+def test_getter_pad_amount_no_padding(valid_csv_1000_samples):
+    assert valid_csv_1000_samples.get_pad_amount() == 0
+def test_getter_pad_amount_with_padding(valid_csv_500_samples):
+    assert valid_csv_500_samples.get_pad_amount() == 500
+def test_getter_pad_amount_range(valid_edf_2500_samples):
+    assert 0 <= valid_edf_2500_samples.get_pad_amount() <= 1000
+@pytest.mark.parametrize("fixture_name", [
+    "valid_edf_2500_samples",
+    "valid_csv_2500_samples",
+    "valid_bdf_2500_samples",
+])
+def test_segment_data_success(fixture_name, request):
+    eeg_data = request.getfixturevalue(fixture_name)
+    data = eeg_data.get_data()
+    assert len(data) == 3
+    for segment in data:
+        assert segment.shape == (19, 1000)
+        assert segment[0][0].dtype == np.float32
+    assert data[-1].shape[1] == 1000
+    assert 0 <= eeg_data.get_pad_amount() <= 1000
+def test_file_not_found(test_data_path):
+    with pytest.raises(ValueError) as e:
+        EEGData(test_data_path / "nonexistent_file.csv")
+    assert "File not found" in str(e.value)
+def test_path_is_directory(test_data_path):
+    with pytest.raises(ValueError) as e:
+        EEGData(test_data_path)
+    assert "Path is not a file" in str(e.value)
+def test_empty_file(test_data_path):
+    with pytest.raises(ValueError) as e:
+        EEGData(test_data_path / "empty_file.csv")
+    assert "File is empty" in str(e.value)
+def test_no_content_file(test_data_path):
+    with pytest.raises(ValueError) as e:
+        EEGData(test_data_path / "no_content.csv")
+    assert "File is empty" in str(e.value)
+def test_unsupported_file_type(test_data_path):
+    with pytest.raises(ValueError) as e:
+        EEGData(test_data_path / "invalid_file_format.txt")
+    assert "Unsupported file type" in str(e.value)
+@pytest.mark.parametrize("filename,channel_count", [
+    ("invalid_10ch.csv", 10),
+    ("invalid_32ch.csv", 32),
+])
+def test_unsupported_channel_count(test_data_path, filename, channel_count):
+    with pytest.raises(ValueError) as e:
+        EEGData(test_data_path / filename)
+    assert "Unsupported number of channels" in str(e.value)
+def test_corrupted_csv(test_data_path):
+    with pytest.raises(ValueError) as e:
+        EEGData(test_data_path / "corrupted.csv")
+    assert "Failed to" in str(e.value)
+def test_malformed_csv(test_data_path):
+    with pytest.raises(ValueError) as e:
+        EEGData(test_data_path / "malformed.csv")
+    assert "Failed to parse CSV file" in str(e.value)
+def test_corrupted_edf(test_data_path):
+    with pytest.raises(ValueError) as e:
+        EEGData(test_data_path / "corrupted.edf")
+    assert "Bad EDF file provided" in str(e.value)
+def test_corrupted_bdf(test_data_path):
+    with pytest.raises(ValueError) as e:
+        EEGData(test_data_path / "corrupted.bdf")
+    assert "Bad BDF file provided" in str(e.value)
+def test_invalid_encoding_csv(test_data_path):
+    with pytest.raises(ValueError) as e:
+        EEGData(test_data_path / "invalid_encoding.csv")
+    assert "File contains invalid characters" in str(e.value)

morphlabs-0.1.0/tests/test_scientia.py ADDED Viewed

@@ -0,0 +1,332 @@
+import pytest
+from unittest.mock import patch, MagicMock
+from pathlib import Path
+import requests
+from morphlabs.models import Scientia
+@pytest.fixture
+def test_data_path():
+    return Path(__file__).parent / "test_data"
+@pytest.fixture
+def api_key(monkeypatch):
+    monkeypatch.setenv("SCIENTIA_API_KEY", "test_api_key")
+    return "test_api_key"
+@pytest.fixture
+def no_api_key(monkeypatch):
+    monkeypatch.delenv("SCIENTIA_API_KEY", raising=False)
+def mock_success_response(url, json, **kwargs):
+    mock_response = MagicMock()
+    mock_response.status_code = 200
+    mock_response.ok = True
+    mock_response.json.return_value = {
+        "status": "COMPLETED",
+        "output": {"reconstructed": json["input"]["data"]}
+    }
+    return mock_response
+def mock_missing_data_response(url, json, **kwargs):
+    mock_response = MagicMock()
+    mock_response.status_code = 200
+    mock_response.ok = True
+    mock_response.json.return_value = {"status": "COMPLETED", "output": {}}
+    return mock_response
+def test_init_defaults(no_api_key):
+    scientia = Scientia()
+    assert scientia.api_key is None
+    assert scientia.base_url == "https://api.runpod.ai/v2/9ni9hifywn9z73/runsync"
+def test_init_with_api_key(no_api_key):
+    scientia = Scientia(api_key="test_api_key1")
+    assert scientia.api_key == "test_api_key1"
+    assert scientia.base_url == "https://api.runpod.ai/v2/9ni9hifywn9z73/runsync"
+def test_init_from_env(api_key):
+    scientia = Scientia(base_url="https://test.scientia.ai")
+    assert scientia.api_key == "test_api_key"
+    assert scientia.base_url == "https://test.scientia.ai"
+@pytest.mark.parametrize("base_url,expected", [
+    ("https://api.scientia.ai/", "https://api.scientia.ai"),
+    ("https://api.scientia.ai/v1", "https://api.scientia.ai/v1"),
+    ("https://api.scientia.ai/v1/", "https://api.scientia.ai/v1"),
+])
+def test_init_strips_trailing_slash(base_url, expected):
+    scientia = Scientia(api_key="test_api_key", base_url=base_url)
+    assert scientia.base_url == expected
+# =============================================================================
+# Validation Tests
+# =============================================================================
+def test_missing_api_key_error(no_api_key, test_data_path):
+    scientia = Scientia()
+    with pytest.raises(ValueError) as e:
+        scientia.clean_data(test_data_path / "valid_19ch_2500samples.csv")
+    assert "API key not found" in str(e.value)
+def test_empty_api_key_error(monkeypatch, test_data_path):
+    monkeypatch.setenv("SCIENTIA_API_KEY", "")
+    with pytest.raises(ValueError) as e:
+        scientia = Scientia()
+        scientia.clean_data(test_data_path / "valid_19ch_2500samples.csv")
+    assert "API key cannot be empty or whitespace" in str(e.value)
+def test_whitespace_api_key_error(monkeypatch, test_data_path):
+    monkeypatch.setenv("SCIENTIA_API_KEY", " ")
+    with pytest.raises(ValueError) as e:
+        scientia = Scientia()
+        scientia.clean_data(test_data_path / "valid_19ch_2500samples.csv")
+    assert "API key cannot be empty or whitespace" in str(e.value)
+def test_invalid_base_url_error(test_data_path):
+    with pytest.raises(ValueError) as e:
+        scientia = Scientia(api_key="test_api_key", base_url="invalid_url")
+        scientia.clean_data(test_data_path / "valid_19ch_2500samples.csv")
+    assert "Invalid base_url" in str(e.value)
+def test_invalid_base_url_protocol_error(test_data_path):
+    with pytest.raises(ValueError) as e:
+        scientia = Scientia(api_key="test_api_key", base_url="ftp://invalid_url")
+        scientia.clean_data(test_data_path / "valid_19ch_2500samples.csv")
+    assert "URL must start with http:// or https://" in str(e.value)
+def test_clean_data_success(api_key, test_data_path):
+    with patch("morphlabs.models.scientia.requests.post", side_effect=mock_success_response):
+        scientia = Scientia()
+        data = scientia.clean_data(test_data_path / "valid_19ch_2500samples.csv")
+        assert data is not None
+        assert data.shape == (19, 2500)
+def test_clean_data_removes_padding(api_key, test_data_path):
+    with patch("morphlabs.models.scientia.requests.post", side_effect=mock_success_response):
+        scientia = Scientia()
+        data_padded = scientia.clean_data(test_data_path / "valid_19ch_2500samples.csv")
+        assert data_padded.shape == (19, 2500)
+        data_no_pad = scientia.clean_data(test_data_path / "valid_19ch_2000samples.csv")
+        assert data_no_pad.shape == (19, 2000)
+def test_json_missing_reconstructed_field(api_key, test_data_path):
+    with patch("morphlabs.models.scientia.requests.post", side_effect=mock_missing_data_response):
+        scientia = Scientia()
+        with pytest.raises(ValueError) as e:
+            scientia.clean_data(test_data_path / "valid_19ch_2500samples.csv")
+        assert "Invalid response from API: Missing 'reconstructed' field in response." in str(e.value)
+def test_json_decode_error(api_key, test_data_path):
+    def mock_invalid_json(url, json, **kwargs):
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.ok = True
+        mock_response.json.side_effect = requests.exceptions.JSONDecodeError("", "", 0)
+        return mock_response
+    with patch("morphlabs.models.scientia.requests.post", side_effect=mock_invalid_json):
+        scientia = Scientia()
+        with pytest.raises(ValueError) as e:
+            scientia.clean_data(test_data_path / "valid_19ch_1000samples.csv")
+        assert "Expected JSON but received invalid data" in str(e.value)
+@pytest.mark.parametrize("status_code", [429, 500, 502, 503, 504])
+def test_retryable_status_codes(api_key, test_data_path, status_code):
+    def mock_retryable_error(url, json, **kwargs):
+        mock_response = MagicMock()
+        mock_response.status_code = status_code
+        mock_response.ok = False
+        mock_response.text = f"Error {status_code}"
+        return mock_response
+    with patch("morphlabs.models.scientia.requests.post", side_effect=mock_retryable_error):
+        scientia = Scientia()
+        with pytest.raises(Exception) as e:
+            scientia.clean_data(test_data_path / "valid_19ch_1000samples.csv")
+        last_exception = e.value.last_attempt.exception()
+        assert f"API request failed with status {status_code}" in str(last_exception)
+@pytest.mark.parametrize("status_code,expected_msg", [
+    (400, "Bad request"),
+    (401, "Authentication failed"),
+    (403, "Access denied"),
+    (404, "API endpoint not found"),
+])
+def test_non_retryable_status_codes(api_key, test_data_path, status_code, expected_msg):
+    def mock_non_retryable_error(url, json, **kwargs):
+        mock_response = MagicMock()
+        mock_response.status_code = status_code
+        mock_response.ok = False
+        mock_response.text = f"Error {status_code}"
+        return mock_response
+    with patch("morphlabs.models.scientia.requests.post", side_effect=mock_non_retryable_error):
+        scientia = Scientia()
+        with pytest.raises(ValueError) as e:
+            scientia.clean_data(test_data_path / "valid_19ch_1000samples.csv")
+        assert expected_msg in str(e.value)
+def test_retry_attempts_count(api_key, test_data_path):
+    call_count = 0
+    def mock_always_fails(url, json, **kwargs):
+        nonlocal call_count
+        call_count += 1
+        mock_response = MagicMock()
+        mock_response.status_code = 500
+        mock_response.ok = False
+        mock_response.text = "Server error"
+        return mock_response
+    with patch("morphlabs.models.scientia.requests.post", side_effect=mock_always_fails):
+        scientia = Scientia()
+        with pytest.raises(Exception):
+            scientia.clean_data(test_data_path / "valid_19ch_1000samples.csv")
+    assert call_count == 3
+def test_network_timeout(api_key, test_data_path):
+    with patch("morphlabs.models.scientia.requests.post", side_effect=requests.exceptions.Timeout("Connection timed out")):
+        scientia = Scientia()
+        with pytest.raises(Exception) as e:
+            scientia.clean_data(test_data_path / "valid_19ch_1000samples.csv")
+        last_exception = e.value.last_attempt.exception()
+        assert "Network error" in str(last_exception)
+def test_network_connection_error(api_key, test_data_path):
+    with patch("morphlabs.models.scientia.requests.post", side_effect=requests.exceptions.ConnectionError("Connection refused")):
+        scientia = Scientia()
+        with pytest.raises(Exception) as e:
+            scientia.clean_data(test_data_path / "valid_19ch_1000samples.csv")
+        last_exception = e.value.last_attempt.exception()
+        assert "Network error" in str(last_exception)
+# =============================================================================
+# RunPod-specific Status Tests
+# =============================================================================
+@pytest.mark.parametrize("status", ["IN_QUEUE", "IN_PROGRESS"])
+def test_runpod_processing_status(api_key, test_data_path, status):
+    def mock_processing_response(url, json, **kwargs):
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.ok = True
+        mock_response.json.return_value = {"status": status}
+        return mock_response
+    with patch("morphlabs.models.scientia.requests.post", side_effect=mock_processing_response):
+        scientia = Scientia()
+        with pytest.raises(ValueError) as e:
+            scientia.clean_data(test_data_path / "valid_19ch_1000samples.csv")
+        assert "still processing" in str(e.value)
+        assert status in str(e.value)
+def test_runpod_failed_status(api_key, test_data_path):
+    def mock_failed_response(url, json, **kwargs):
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.ok = True
+        mock_response.json.return_value = {"status": "FAILED", "error": "GPU out of memory"}
+        return mock_response
+    with patch("morphlabs.models.scientia.requests.post", side_effect=mock_failed_response):
+        scientia = Scientia()
+        with pytest.raises(ValueError) as e:
+            scientia.clean_data(test_data_path / "valid_19ch_1000samples.csv")
+        assert "Scientia API request failed" in str(e.value)
+        assert "GPU out of memory" in str(e.value)
+def test_runpod_cancelled_status(api_key, test_data_path):
+    def mock_cancelled_response(url, json, **kwargs):
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.ok = True
+        mock_response.json.return_value = {"status": "CANCELLED"}
+        return mock_response
+    with patch("morphlabs.models.scientia.requests.post", side_effect=mock_cancelled_response):
+        scientia = Scientia()
+        with pytest.raises(ValueError) as e:
+            scientia.clean_data(test_data_path / "valid_19ch_1000samples.csv")
+        assert "was cancelled" in str(e.value)
+def test_api_key_in_payload(api_key, test_data_path):
+    captured_payload = None
+    def mock_capture_payload(url, json, **kwargs):
+        nonlocal captured_payload
+        captured_payload = json
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.ok = True
+        mock_response.json.return_value = {
+            "status": "COMPLETED",
+            "output": {"reconstructed": json["input"]["data"]}
+        }
+        return mock_response
+    with patch("morphlabs.models.scientia.requests.post", side_effect=mock_capture_payload):
+        scientia = Scientia()
+        scientia.clean_data(test_data_path / "valid_19ch_1000samples.csv")
+    assert captured_payload is not None
+    assert "input" in captured_payload
+    assert "api_key" in captured_payload["input"]
+    assert captured_payload["input"]["api_key"] == "test_api_key"
+    assert "data" in captured_payload["input"]
+def test_runpod_api_key_in_header(api_key, test_data_path):
+    captured_headers = None
+    def mock_capture_headers(url, headers=None, json=None, **kwargs):
+        nonlocal captured_headers
+        captured_headers = headers
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.ok = True
+        mock_response.json.return_value = {
+            "status": "COMPLETED",
+            "output": {"reconstructed": json["input"]["data"]}
+        }
+        return mock_response
+    with patch("morphlabs.models.scientia.requests.post", side_effect=mock_capture_headers):
+        scientia = Scientia()
+        scientia.clean_data(test_data_path / "valid_19ch_1000samples.csv")
+    assert captured_headers is not None
+    assert "Authorization" in captured_headers
+    assert captured_headers["Authorization"].startswith("Bearer ")
+    assert "rpa_" in captured_headers["Authorization"]