PyPI - eegdash - Versions diffs - 0.4.0.dev153__py3-none-any.whl → 0.4.0.dev171__py3-none-any.whl - Mend

eegdash 0.4.0.dev153py3-none-any.whl → 0.4.0.dev171py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of eegdash might be problematic. Click here for more details.

Files changed (29) hide show

eegdash/__init__.py +1 -1
eegdash/api.py +180 -86
eegdash/bids_eeg_metadata.py +139 -39
eegdash/const.py +25 -0
eegdash/data_utils.py +239 -173
eegdash/dataset/dataset.py +35 -13
eegdash/dataset/dataset_summary.csv +255 -255
eegdash/dataset/registry.py +69 -4
eegdash/downloader.py +95 -9
eegdash/features/datasets.py +320 -136
eegdash/features/decorators.py +88 -3
eegdash/features/extractors.py +203 -55
eegdash/features/feature_bank/complexity.py +7 -3
eegdash/features/feature_bank/dimensionality.py +1 -1
eegdash/features/inspect.py +80 -5
eegdash/features/serialization.py +47 -19
eegdash/features/utils.py +75 -8
eegdash/hbn/preprocessing.py +50 -17
eegdash/hbn/windows.py +145 -32
eegdash/logging.py +19 -0
eegdash/mongodb.py +44 -27
eegdash/paths.py +14 -5
eegdash/utils.py +16 -1
{eegdash-0.4.0.dev153.dist-info → eegdash-0.4.0.dev171.dist-info}/METADATA +1 -1
eegdash-0.4.0.dev171.dist-info/RECORD +37 -0
eegdash-0.4.0.dev153.dist-info/RECORD +0 -37
{eegdash-0.4.0.dev153.dist-info → eegdash-0.4.0.dev171.dist-info}/WHEEL +0 -0
{eegdash-0.4.0.dev153.dist-info → eegdash-0.4.0.dev171.dist-info}/licenses/LICENSE +0 -0
{eegdash-0.4.0.dev153.dist-info → eegdash-0.4.0.dev171.dist-info}/top_level.txt +0 -0

eegdash/__init__.py CHANGED Viewed

@@ -18,4 +18,4 @@ _init_mongo_client()
 __all__ = ["EEGDash", "EEGDashDataset", "EEGChallengeDataset", "preprocessing"]
-__version__ = "0.4.0.dev153"
+__version__ = "0.4.0.dev171"

eegdash/api.py CHANGED Viewed

@@ -212,17 +212,22 @@ class EEGDash:
         return doc is not None
     def _validate_input(self, record: dict[str, Any]) -> dict[str, Any]:
-        """Internal method to validate the input record against the expected schema.
+        """Validate the input record against the expected schema.
         Parameters
         ----------
-        record: dict
+        record : dict
             A dictionary representing the EEG data record to be validated.
         Returns
         -------
-        dict:
-            Returns the record itself on success, or raises a ValueError if the record is invalid.
+        dict
+            The record itself on success.
+        Raises
+        ------
+        ValueError
+            If the record is missing required keys or has values of the wrong type.
         """
         input_types = {
@@ -252,20 +257,44 @@ class EEGDash:
         return record
     def _build_query_from_kwargs(self, **kwargs) -> dict[str, Any]:
-        """Internal helper to build a validated MongoDB query from keyword args.
+        """Build a validated MongoDB query from keyword arguments.
+        This delegates to the module-level builder used across the package.
+        Parameters
+        ----------
+        **kwargs
+            Keyword arguments to convert into a MongoDB query.
+        Returns
+        -------
+        dict
+            A MongoDB query dictionary.
-        This delegates to the module-level builder used across the package and
-        is exposed here for testing and convenience.
         """
         return build_query_from_kwargs(**kwargs)
-    # --- Query merging and conflict detection helpers ---
-    def _extract_simple_constraint(self, query: dict[str, Any], key: str):
+    def _extract_simple_constraint(
+        self, query: dict[str, Any], key: str
+    ) -> tuple[str, Any] | None:
         """Extract a simple constraint for a given key from a query dict.
-        Supports only top-level equality (key: value) and $in (key: {"$in": [...]})
-        constraints. Returns a tuple (kind, value) where kind is "eq" or "in". If the
-        key is not present or uses other operators, returns None.
+        Supports top-level equality (e.g., ``{'subject': '01'}``) and ``$in``
+        (e.g., ``{'subject': {'$in': ['01', '02']}}``) constraints.
+        Parameters
+        ----------
+        query : dict
+            The MongoDB query dictionary.
+        key : str
+            The key for which to extract the constraint.
+        Returns
+        -------
+        tuple or None
+            A tuple of (kind, value) where kind is "eq" or "in", or None if the
+            constraint is not present or unsupported.
         """
         if not isinstance(query, dict) or key not in query:
             return None
@@ -275,16 +304,28 @@ class EEGDash:
                 return ("in", list(val["$in"]))
             return None  # unsupported operator shape for conflict checking
         else:
-            return ("eq", val)
+            return "eq", val
     def _raise_if_conflicting_constraints(
         self, raw_query: dict[str, Any], kwargs_query: dict[str, Any]
     ) -> None:
-        """Raise ValueError if both query sources define incompatible constraints.
+        """Raise ValueError if query sources have incompatible constraints.
+        Checks for mutually exclusive constraints on the same field to avoid
+        silent empty results.
+        Parameters
+        ----------
+        raw_query : dict
+            The raw MongoDB query dictionary.
+        kwargs_query : dict
+            The query dictionary built from keyword arguments.
+        Raises
+        ------
+        ValueError
+            If conflicting constraints are found.
-        We conservatively check only top-level fields with simple equality or $in
-        constraints. If a field appears in both queries and constraints are mutually
-        exclusive, raise an explicit error to avoid silent empty result sets.
         """
         if not raw_query or not kwargs_query:
             return
@@ -388,12 +429,31 @@ class EEGDash:
             logger.info("Upserted: %s", result.upserted_count)
             logger.info("Errors: %s ", result.bulk_api_result.get("writeErrors", []))
-    def _add_request(self, record: dict):
-        """Internal helper method to create a MongoDB insertion request for a record."""
+    def _add_request(self, record: dict) -> InsertOne:
+        """Create a MongoDB insertion request for a record.
+        Parameters
+        ----------
+        record : dict
+            The record to insert.
+        Returns
+        -------
+        InsertOne
+            A PyMongo ``InsertOne`` object.
+        """
         return InsertOne(record)
-    def add(self, record: dict):
-        """Add a single record to the MongoDB collection."""
+    def add(self, record: dict) -> None:
+        """Add a single record to the MongoDB collection.
+        Parameters
+        ----------
+        record : dict
+            The record to add.
+        """
         try:
             self.__collection.insert_one(record)
         except ValueError as e:
@@ -405,11 +465,23 @@ class EEGDash:
             )
             logger.debug("Add operation failed", exc_info=exc)
-    def _update_request(self, record: dict):
-        """Internal helper method to create a MongoDB update request for a record."""
+    def _update_request(self, record: dict) -> UpdateOne:
+        """Create a MongoDB update request for a record.
+        Parameters
+        ----------
+        record : dict
+            The record to update.
+        Returns
+        -------
+        UpdateOne
+            A PyMongo ``UpdateOne`` object.
+        """
         return UpdateOne({"data_name": record["data_name"]}, {"$set": record})
-    def update(self, record: dict):
+    def update(self, record: dict) -> None:
         """Update a single record in the MongoDB collection.
         Parameters
@@ -429,58 +501,81 @@ class EEGDash:
             logger.debug("Update operation failed", exc_info=exc)
     def exists(self, query: dict[str, Any]) -> bool:
-        """Alias for :meth:`exist` provided for API clarity."""
+        """Check if at least one record matches the query.
+        This is an alias for :meth:`exist`.
+        Parameters
+        ----------
+        query : dict
+            MongoDB query to check for existence.
+        Returns
+        -------
+        bool
+            True if a matching record exists, False otherwise.
+        """
         return self.exist(query)
-    def remove_field(self, record, field):
-        """Remove a specific field from a record in the MongoDB collection.
+    def remove_field(self, record: dict, field: str) -> None:
+        """Remove a field from a specific record in the MongoDB collection.
         Parameters
         ----------
         record : dict
-            Record identifying object with ``data_name``.
+            Record-identifying object with a ``data_name`` key.
         field : str
-            Field name to remove.
+            The name of the field to remove.
         """
         self.__collection.update_one(
             {"data_name": record["data_name"]}, {"$unset": {field: 1}}
         )
-    def remove_field_from_db(self, field):
-        """Remove a field from all records (destructive).
+    def remove_field_from_db(self, field: str) -> None:
+        """Remove a field from all records in the database.
+        .. warning::
+            This is a destructive operation and cannot be undone.
         Parameters
         ----------
         field : str
-            Field name to remove from every document.
+            The name of the field to remove from all documents.
         """
         self.__collection.update_many({}, {"$unset": {field: 1}})
     @property
     def collection(self):
-        """Return the MongoDB collection object."""
-        return self.__collection
+        """The underlying PyMongo ``Collection`` object.
-    def close(self):
-        """Backward-compatibility no-op; connections are managed globally.
+        Returns
+        -------
+        pymongo.collection.Collection
+            The collection object used for database interactions.
-        Notes
-        -----
-        Connections are managed by :class:`MongoConnectionManager`. Use
-        :meth:`close_all_connections` to explicitly close all clients.
+        """
+        return self.__collection
+    def close(self) -> None:
+        """Close the MongoDB connection.
+        .. deprecated:: 0.1
+            Connections are now managed globally by :class:`MongoConnectionManager`.
+            This method is a no-op and will be removed in a future version.
+            Use :meth:`EEGDash.close_all_connections` to close all clients.
         """
         # Individual instances no longer close the shared client
         pass
     @classmethod
-    def close_all_connections(cls):
-        """Close all MongoDB client connections managed by the singleton."""
+    def close_all_connections(cls) -> None:
+        """Close all MongoDB client connections managed by the singleton manager."""
         MongoConnectionManager.close_all()
-    def __del__(self):
+    def __del__(self) -> None:
         """Destructor; no explicit action needed due to global connection manager."""
         # No longer needed since we're using singleton pattern
         pass
@@ -775,45 +870,30 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
     ) -> list[dict]:
         """Discover local BIDS EEG files and build minimal records.
-        This helper enumerates EEG recordings under ``dataset_root`` via
-        ``mne_bids.find_matching_paths`` and applies entity filters to produce a
-        list of records suitable for ``EEGDashBaseDataset``. No network access
-        is performed and files are not read.
+        Enumerates EEG recordings under ``dataset_root`` using
+        ``mne_bids.find_matching_paths`` and applies entity filters to produce
+        records suitable for :class:`EEGDashBaseDataset`. No network access is
+        performed, and files are not read.
         Parameters
         ----------
         dataset_root : Path
-            Local dataset directory. May be the plain dataset folder (e.g.,
-            ``ds005509``) or a suffixed cache variant (e.g.,
-            ``ds005509-bdf-mini``).
-        filters : dict of {str, Any}
-            Query filters. Must include ``'dataset'`` with the dataset id (without
-            local suffixes). May include BIDS entities ``'subject'``,
-            ``'session'``, ``'task'``, and ``'run'``. Each value can be a scalar
-            or a sequence of scalars.
+            Local dataset directory (e.g., ``/path/to/cache/ds005509``).
+        filters : dict
+            Query filters. Must include ``'dataset'`` and may include BIDS
+            entities like ``'subject'``, ``'session'``, etc.
         Returns
         -------
-        records : list of dict
-            One record per matched EEG file with at least:
-            - ``'data_name'``
-            - ``'dataset'`` (dataset id, without suffixes)
-            - ``'bidspath'`` (normalized to start with the dataset id)
-            - ``'subject'``, ``'session'``, ``'task'``, ``'run'`` (may be None)
-            - ``'bidsdependencies'`` (empty list)
-            - ``'modality'`` (``"eeg"``)
-            - ``'sampling_frequency'``, ``'nchans'``, ``'ntimes'`` (minimal
-              defaults for offline usage)
+        list of dict
+            A list of records, one for each matched EEG file. Each record
+            contains BIDS entities, paths, and minimal metadata for offline use.
         Notes
         -----
-        - Matching uses ``datatypes=['eeg']`` and ``suffixes=['eeg']``.
-        - ``bidspath`` is constructed as
-          ``<dataset_id> / <relative_path_from_dataset_root>`` to ensure the
-          first path component is the dataset id (without local cache suffixes).
-        - Minimal defaults are set for ``sampling_frequency``, ``nchans``, and
-          ``ntimes`` to satisfy dataset length requirements offline.
+        Matching is performed for ``datatypes=['eeg']`` and ``suffixes=['eeg']``.
+        The ``bidspath`` is normalized to ensure it starts with the dataset ID,
+        even for suffixed cache directories.
         """
         dataset_id = filters["dataset"]
@@ -875,10 +955,22 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
         return records_out
     def _find_key_in_nested_dict(self, data: Any, target_key: str) -> Any:
-        """Recursively search for target_key in nested dicts/lists with normalized matching.
+        """Recursively search for a key in nested dicts/lists.
+        Performs a case-insensitive and underscore/hyphen-agnostic search.
+        Parameters
+        ----------
+        data : Any
+            The nested data structure (dicts, lists) to search.
+        target_key : str
+            The key to search for.
+        Returns
+        -------
+        Any
+            The value of the first matching key, or None if not found.
-        This makes lookups tolerant to naming differences like "p-factor" vs "p_factor".
-        Returns the first match or None.
         """
         norm_target = normalize_key(target_key)
         if isinstance(data, dict):
@@ -901,23 +993,25 @@ class EEGDashDataset(BaseConcatDataset, metaclass=NumpyDocstringInheritanceInitM
         description_fields: list[str],
         base_dataset_kwargs: dict,
     ) -> list[EEGDashBaseDataset]:
-        """Helper method to find datasets in the MongoDB collection that satisfy the
-        given query and return them as a list of EEGDashBaseDataset objects.
+        """Find and construct datasets from a MongoDB query.
+        Queries the database, then creates a list of
+        :class:`EEGDashBaseDataset` objects from the results.
         Parameters
         ----------
-        query : dict
-            The query object, as in EEGDash.find().
-        description_fields : list[str]
-            A list of fields to be extracted from the dataset records and included in
-            the returned dataset description(s).
-        kwargs: additional keyword arguments to be passed to the EEGDashBaseDataset
-            constructor.
+        query : dict, optional
+            The MongoDB query to execute.
+        description_fields : list of str
+            Fields to extract from each record for the dataset description.
+        base_dataset_kwargs : dict
+            Additional keyword arguments to pass to the
+            :class:`EEGDashBaseDataset` constructor.
         Returns
         -------
-        list :
-            A list of EEGDashBaseDataset objects that match the query.
+        list of EEGDashBaseDataset
+            A list of dataset objects matching the query.
         """
         datasets: list[EEGDashBaseDataset] = []

eegdash/bids_eeg_metadata.py CHANGED Viewed

@@ -33,12 +33,30 @@ __all__ = [
 def build_query_from_kwargs(**kwargs) -> dict[str, Any]:
-    """Build and validate a MongoDB query from user-friendly keyword arguments.
+    """Build and validate a MongoDB query from keyword arguments.
+    This function converts user-friendly keyword arguments into a valid
+    MongoDB query dictionary. It handles scalar values as exact matches and
+    list-like values as ``$in`` queries. It also performs validation to
+    reject unsupported fields and empty values.
+    Parameters
+    ----------
+    **kwargs
+        Keyword arguments representing query filters. Allowed keys are defined
+        in ``eegdash.const.ALLOWED_QUERY_FIELDS``.
+    Returns
+    -------
+    dict
+        A MongoDB query dictionary.
+    Raises
+    ------
+    ValueError
+        If an unsupported query field is provided, or if a value is None or
+        an empty string/list.
-    Improvements:
-    - Reject None values and empty/whitespace-only strings
-    - For list/tuple/set values: strip strings, drop None/empties, deduplicate, and use `$in`
-    - Preserve scalars as exact matches
     """
     # 1. Validate that all provided keys are allowed for querying
     unknown_fields = set(kwargs.keys()) - ALLOWED_QUERY_FIELDS
@@ -89,24 +107,29 @@ def build_query_from_kwargs(**kwargs) -> dict[str, Any]:
 def load_eeg_attrs_from_bids_file(bids_dataset, bids_file: str) -> dict[str, Any]:
-    """Build the metadata record for a given BIDS file (single recording) in a BIDS dataset.
+    """Build a metadata record for a BIDS file.
-    Attributes are at least the ones defined in data_config attributes (set to None if missing),
-    but are typically a superset, and include, among others, the paths to relevant
-    meta-data files needed to load and interpret the file in question.
+    Extracts metadata attributes from a single BIDS EEG file within a given
+    BIDS dataset. The extracted attributes include BIDS entities, file paths,
+    and technical metadata required for database indexing.
     Parameters
     ----------
     bids_dataset : EEGBIDSDataset
         The BIDS dataset object containing the file.
     bids_file : str
-        The path to the BIDS file within the dataset.
+        The path to the BIDS file to process.
     Returns
     -------
-    dict:
-        A dictionary representing the metadata record for the given file. This is the
-        same format as the records stored in the database.
+    dict
+        A dictionary of metadata attributes for the file, suitable for
+        insertion into the database.
+    Raises
+    ------
+    ValueError
+        If ``bids_file`` is not found in the ``bids_dataset``.
     """
     if bids_file not in bids_dataset.files:
@@ -198,11 +221,23 @@ def load_eeg_attrs_from_bids_file(bids_dataset, bids_file: str) -> dict[str, Any
 def normalize_key(key: str) -> str:
-    """Normalize a metadata key for robust matching.
+    """Normalize a string key for robust matching.
+    Converts the key to lowercase, replaces non-alphanumeric characters with
+    underscores, and removes leading/trailing underscores. This allows for
+    tolerant matching of keys that may have different capitalization or
+    separators (e.g., "p-factor" becomes "p_factor").
+    Parameters
+    ----------
+    key : str
+        The key to normalize.
+    Returns
+    -------
+    str
+        The normalized key.
-    Lowercase and replace non-alphanumeric characters with underscores, then strip
-    leading/trailing underscores. This allows tolerant matching such as
-    "p-factor" ≈ "p_factor" ≈ "P Factor".
     """
     return re.sub(r"[^a-z0-9]+", "_", str(key).lower()).strip("_")
@@ -212,27 +247,27 @@ def merge_participants_fields(
     participants_row: dict[str, Any] | None,
     description_fields: list[str] | None = None,
 ) -> dict[str, Any]:
-    """Merge participants.tsv fields into a dataset description dictionary.
+    """Merge fields from a participants.tsv row into a description dict.
-    - Preserves existing entries in ``description`` (no overwrites).
-    - Fills requested ``description_fields`` first, preserving their original names.
-    - Adds all remaining participants columns generically using normalized keys
-      unless a matching requested field already captured them.
+    Enriches a description dictionary with data from a subject's row in
+    ``participants.tsv``. It avoids overwriting existing keys in the
+    description.
     Parameters
     ----------
     description : dict
-        Current description to be enriched in-place and returned.
-    participants_row : dict | None
-        A mapping of participants.tsv columns for the current subject.
-    description_fields : list[str] | None
-        Optional list of requested description fields. When provided, matching is
-        performed by normalized names; the original requested field names are kept.
+        The description dictionary to enrich.
+    participants_row : dict or None
+        A dictionary representing a row from ``participants.tsv``. If None,
+        the original description is returned unchanged.
+    description_fields : list of str, optional
+        A list of specific fields to include in the description. Matching is
+        done using normalized keys.
     Returns
     -------
     dict
-        The enriched description (same object as input for convenience).
+        The enriched description dictionary.
     """
     if not isinstance(description, dict) or not isinstance(participants_row, dict):
@@ -272,10 +307,26 @@ def participants_row_for_subject(
     subject: str,
     id_columns: tuple[str, ...] = ("participant_id", "participant", "subject"),
 ) -> pd.Series | None:
-    """Load participants.tsv and return the row for a subject.
+    """Load participants.tsv and return the row for a specific subject.
+    Searches for a subject's data in the ``participants.tsv`` file within a
+    BIDS dataset. It can identify the subject with or without the "sub-"
+    prefix.
+    Parameters
+    ----------
+    bids_root : str or Path
+        The root directory of the BIDS dataset.
+    subject : str
+        The subject identifier (e.g., "01" or "sub-01").
+    id_columns : tuple of str, default ("participant_id", "participant", "subject")
+        A tuple of column names to search for the subject identifier.
+    Returns
+    -------
+    pandas.Series or None
+        A pandas Series containing the subject's data if found, otherwise None.
-    - Accepts either "01" or "sub-01" as the subject identifier.
-    - Returns a pandas Series for the first matching row, or None if not found.
     """
     try:
         participants_tsv = Path(bids_root) / "participants.tsv"
@@ -311,9 +362,28 @@ def participants_extras_from_tsv(
     id_columns: tuple[str, ...] = ("participant_id", "participant", "subject"),
     na_like: tuple[str, ...] = ("", "n/a", "na", "nan", "unknown", "none"),
 ) -> dict[str, Any]:
-    """Return non-identifier, non-empty participants.tsv fields for a subject.
+    """Extract additional participant information from participants.tsv.
+    Retrieves all non-identifier and non-empty fields for a subject from
+    the ``participants.tsv`` file.
+    Parameters
+    ----------
+    bids_root : str or Path
+        The root directory of the BIDS dataset.
+    subject : str
+        The subject identifier.
+    id_columns : tuple of str, default ("participant_id", "participant", "subject")
+        Column names to be treated as identifiers and excluded from the
+        output.
+    na_like : tuple of str, default ("", "n/a", "na", "nan", "unknown", "none")
+        Values to be considered as "Not Available" and excluded.
+    Returns
+    -------
+    dict
+        A dictionary of extra participant information.
-    Uses vectorized pandas operations to drop id columns and NA-like values.
     """
     row = participants_row_for_subject(bids_root, subject, id_columns=id_columns)
     if row is None:
@@ -331,10 +401,21 @@ def attach_participants_extras(
     description: Any,
     extras: dict[str, Any],
 ) -> None:
-    """Attach extras to Raw.info and dataset description without overwriting.
+    """Attach extra participant data to a raw object and its description.
+    Updates the ``raw.info['subject_info']`` and the description object
+    (dict or pandas Series) with extra data from ``participants.tsv``.
+    It does not overwrite existing keys.
+    Parameters
+    ----------
+    raw : mne.io.Raw
+        The MNE Raw object to be updated.
+    description : dict or pandas.Series
+        The description object to be updated.
+    extras : dict
+        A dictionary of extra participant information to attach.
-    - Adds to ``raw.info['subject_info']['participants_extras']``.
-    - Adds to ``description`` if dict or pandas Series (only missing keys).
     """
     if not extras:
         return
@@ -375,9 +456,28 @@ def enrich_from_participants(
     raw: Any,
     description: Any,
 ) -> dict[str, Any]:
-    """Convenience wrapper: read participants.tsv and attach extras for this subject.
+    """Read participants.tsv and attach extra info for the subject.
+    This is a convenience function that finds the subject from the
+    ``bidspath``, retrieves extra information from ``participants.tsv``,
+    and attaches it to the raw object and its description.
+    Parameters
+    ----------
+    bids_root : str or Path
+        The root directory of the BIDS dataset.
+    bidspath : mne_bids.BIDSPath
+        The BIDSPath object for the current data file.
+    raw : mne.io.Raw
+        The MNE Raw object to be updated.
+    description : dict or pandas.Series
+        The description object to be updated.
+    Returns
+    -------
+    dict
+        The dictionary of extras that were attached.
-    Returns the extras dictionary for further use if needed.
     """
     subject = getattr(bidspath, "subject", None)
     if not subject:

eegdash 0.4.0.dev153__py3-none-any.whl → 0.4.0.dev171__py3-none-any.whl

Potentially problematic release.

eegdash 0.4.0.dev153py3-none-any.whl → 0.4.0.dev171py3-none-any.whl