PyPI - cooplot - Versions diffs - 0.1.0__tar.gz - Mend

cooplot 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

cooplot-0.1.0/.gitattributes +1 -0
cooplot-0.1.0/.github/agberens.png +0 -0
cooplot-0.1.0/.github/excelclust.png +0 -0
cooplot-0.1.0/.github/workflows/python-publish.yml +70 -0
cooplot-0.1.0/.gitignore +31 -0
cooplot-0.1.0/PKG-INFO +121 -0
cooplot-0.1.0/README.md +102 -0
cooplot-0.1.0/cooplot/__init__.py +1 -0
cooplot-0.1.0/cooplot/aggregate.py +295 -0
cooplot-0.1.0/cooplot/api.py +190 -0
cooplot-0.1.0/cooplot/build.py +108 -0
cooplot-0.1.0/cooplot/io.py +27 -0
cooplot-0.1.0/cooplot/metrics.py +1459 -0
cooplot-0.1.0/cooplot/scrape.py +226 -0
cooplot-0.1.0/cooplot/viz.py +643 -0
cooplot-0.1.0/notebooks/agberens/Aleksejs Tim/304/215enko.json" +22 -0
cooplot-0.1.0/notebooks/agberens/Camila Roa.json +1 -0
cooplot-0.1.0/notebooks/agberens/Christian Behrens.json +67 -0
cooplot-0.1.0/notebooks/agberens/Dmitry Kobak.json +272 -0
cooplot-0.1.0/notebooks/agberens/Fabio Seel.json +1 -0
cooplot-0.1.0/notebooks/agberens/Ifeoma Veronica Nwabufo.json +17 -0
cooplot-0.1.0/notebooks/agberens/Indu Ilanchezian.json +42 -0
cooplot-0.1.0/notebooks/agberens/Jan Lause.json +42 -0
cooplot-0.1.0/notebooks/agberens/Jan Niklas B/303/266hm.json" +52 -0
cooplot-0.1.0/notebooks/agberens/Jonas Beck.json +57 -0
cooplot-0.1.0/notebooks/agberens/Jonathan Oesterle.json +62 -0
cooplot-0.1.0/notebooks/agberens/Julius Gervelmeyer.json +22 -0
cooplot-0.1.0/notebooks/agberens/Kerol Djoumessi.json +47 -0
cooplot-0.1.0/notebooks/agberens/Kyra Kadhim.json +22 -0
cooplot-0.1.0/notebooks/agberens/Lisa Koch.json +232 -0
cooplot-0.1.0/notebooks/agberens/Lisa Schmors.json +52 -0
cooplot-0.1.0/notebooks/agberens/Luke Rogerson.json +62 -0
cooplot-0.1.0/notebooks/agberens/Murat Se/303/247kin Ayhan.json" +127 -0
cooplot-0.1.0/notebooks/agberens/Patrick K/303/266hler.json" +12 -0
cooplot-0.1.0/notebooks/agberens/Philipp Berens.json +782 -0
cooplot-0.1.0/notebooks/agberens/Rita Gonz/303/241lez M/303/241rquez.json" +62 -0
cooplot-0.1.0/notebooks/agberens/Sacha Sokoloski.json +82 -0
cooplot-0.1.0/notebooks/agberens/Samuel Ofosu Mensah.json +37 -0
cooplot-0.1.0/notebooks/agberens/Sarah M/303/274ller.json" +72 -0
cooplot-0.1.0/notebooks/agberens/Sarah Strau/303/237.json" +17 -0
cooplot-0.1.0/notebooks/agberens/Sebastian Damrich.json +107 -0
cooplot-0.1.0/notebooks/agberens/Simone Ebert.json +42 -0
cooplot-0.1.0/notebooks/agberens/Sophie Laturnus.json +57 -0
cooplot-0.1.0/notebooks/agberens/Verena Jasmin Hallitschke.json +7 -0
cooplot-0.1.0/notebooks/agberens/Yves Bernaerts.json +47 -0
cooplot-0.1.0/notebooks/agberens/Ziwei Huang.json +47 -0
cooplot-0.1.0/notebooks/agberens-coop-ref.txt +179 -0
cooplot-0.1.0/notebooks/agberens-coop.csv +86 -0
cooplot-0.1.0/notebooks/agberens-grouped/All.json +1266 -0
cooplot-0.1.0/notebooks/agberens-grouped/Embedding.json +681 -0
cooplot-0.1.0/notebooks/agberens-grouped/MedML.json +772 -0
cooplot-0.1.0/notebooks/agberens-grouped/Neural.json +822 -0
cooplot-0.1.0/notebooks/agberens.csv +32 -0
cooplot-0.1.0/notebooks/agberens.ipynb +275 -0
cooplot-0.1.0/pyproject.toml +37 -0
cooplot-0.1.0/tests/__init__.py +0 -0
cooplot-0.1.0/tests/test_aggregate_metrics.py +874 -0
cooplot-0.1.0/tests/test_pubmed_live.py +30 -0
cooplot-0.1.0/uv.lock +2603 -0

cooplot-0.1.0/.gitattributes ADDED Viewed

	@@ -0,0 +1 @@
1	+ *.ipynb linguist-documentation

cooplot-0.1.0/.github/agberens.png ADDED Viewed

Binary file

cooplot-0.1.0/.github/excelclust.png ADDED Viewed

Binary file

cooplot-0.1.0/.github/workflows/python-publish.yml ADDED Viewed

@@ -0,0 +1,70 @@
+# This workflow will upload a Python Package to PyPI when a release is created
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+name: Upload Python Package
+on:
+  release:
+    types: [published]
+permissions:
+  contents: read
+jobs:
+  release-build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.x"
+      - name: Build release distributions
+        run: |
+          # NOTE: put your own distribution build steps here.
+          python -m pip install build
+          python -m build
+      - name: Upload distributions
+        uses: actions/upload-artifact@v4
+        with:
+          name: release-dists
+          path: dist/
+  pypi-publish:
+    runs-on: ubuntu-latest
+    needs:
+      - release-build
+    permissions:
+      # IMPORTANT: this permission is mandatory for trusted publishing
+      id-token: write
+    # Dedicated environments with protections for publishing are strongly recommended.
+    # For more information, see: https://docs.github.com/en/actions/deployment/targeting-different-environments/using-environments-for-deployment#deployment-protection-rules
+    environment:
+      name: pypi
+      # OPTIONAL: uncomment and update to include your PyPI project URL in the deployment status:
+      # url: https://pypi.org/p/cooplot
+      #
+      # ALTERNATIVE: if your GitHub Release name is the PyPI project version string
+      # ALTERNATIVE: exactly, uncomment the following line instead:
+      # url: https://pypi.org/project/YOURPROJECT/${{ github.event.release.name }}
+    steps:
+      - name: Retrieve release distributions
+        uses: actions/download-artifact@v4
+        with:
+          name: release-dists
+          path: dist/
+      - name: Publish release distributions to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          packages-dir: dist/

cooplot-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,31 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+.pytest_cache
+# Distribution / packaging
+build/
+dist/
+*.egg-info/
+*.egg
+# Environments
+.env
+.venv
+env/
+venv/
+# Jupyter Notebook
+.ipynb_checkpoints
+# macOS
+.DS_Store
+# Editor directories and files
+.idea/
+.vscode/
+*.swp
+*.swo
+dev/

cooplot-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,121 @@
+Metadata-Version: 2.4
+Name: cooplot
+Version: 0.1.0
+Summary: Co-op between members and subgroups.
+Requires-Python: >=3.10.0
+Requires-Dist: matplotlib>=3.10.6
+Requires-Dist: mne-connectivity>=0.7.0
+Requires-Dist: numpy>=2.2.6
+Requires-Dist: python-dotenv>=1.0
+Requires-Dist: requests>=2.32.5
+Requires-Dist: scholarly>=1.7.11
+Requires-Dist: tqdm>=4.67.1
+Provides-Extra: dev
+Requires-Dist: maturin; extra == 'dev'
+Requires-Dist: pytest; extra == 'dev'
+Requires-Dist: ruff; extra == 'dev'
+Requires-Dist: twine; extra == 'dev'
+Description-Content-Type: text/markdown
+# cooplot
+Analysis of co-op between members and subgroups.
+## Example Gallery
+## Cluster of Excellence – Machine Learning for Science
+```python
+import cooplot
+palette = {
+    "Life Science": "#61859e",
+    "Norms": "#e0aa41",
+    "Human Science": "#5fb4d0",
+    "ML": "#bc3b2f",
+    "Physical Science": "#608dd2",
+}
+_, people = cooplot.load_csv("excelclust.csv", delimiter=";")
+pubs = cooplot.scrape(
+    people,
+    name_col="name",
+    scholar_col="scholar_id",
+    semantic_col="semantic_id",
+    cache_dir=".cache/excelclust",
+)
+mats = cooplot.build(pubs, windows=["2014-2018", "2019-2023"], name_col="name", group_col="group")
+fig = cooplot.show(mats, group_col="group", style="circle", heatmap_counts=True, palette=palette)
+fig.savefig("../.github/excelclust.png", dpi=300)
+```
+![](.github/excelclust.png)
+## AG Berens
+```python
+import cooplot
+header, people = cooplot.load_csv("agberens.csv", delimiter=";")
+pubs = cooplot.scrape(
+    people,
+    name_col="name",
+    scholar_col="scholar_id",
+    semantic_col="semantic_id",
+    cache_dir=".cache/hai",
+    drop_subtitle=False,
+    fallback_semantic_if_empty=True,  # try Semantic if GS had 0 pubs
+)
+mats = cooplot.build(pubs, windows=["2016-2025"], name_col="name", group_col="group")
+fig = cooplot.show(mats, group_col="group", style="both", heatmap_counts=True, figsize=(18,10))
+```
+![](.github/agberens.png)
+You can also generate a list of reference highlighting the cross-group collaboration:
+```python
+groups = cooplot.aggregate(pubs, name_col="name", cache_dir=".cache/agberens-group")
+_ = cooplot.cross_group_publications(groups, year_from=2016, year_to=2025, out_path=".cache/agberens-coop.csv", enrich_crossref=True)
+cooplot.cross_group_report(".cache/agberens-coop.csv", out_path=".cache/agberens-coop-ref.txt", verbose=True)
+```
+which will give you
+```
+...
+Schmors, L., Kotkat, A. H., Bauer, Y., Huang, Z., Crombie, D., Meyerolbersleben, L. S., Sokoloski, S., Berens, P., & Busse, L. (2025). Effects of corticothalamic feedback depend on visual responsiveness and stimulus type. IScience, 28(6), 112481. https://doi.org/10.1016/j.isci.2025.112481
+Collaboration: All (Philipp Berens, Ziwei Huang) and Neural (Lisa Schmors, Sacha Sokoloski).
+Gervelmeyer, J., Müller, S., Huang, Z., & Berens, P. (2025). Fundus Image Toolbox: A Python package for fundus image processing. Journal of Open Source Software, 10(108), 7101. https://doi.org/10.21105/joss.07101
+Collaboration: All (Philipp Berens, Ziwei Huang) and MedML (Julius Gervelmeyer, Sarah Müller).
+Schmidt, G., Heidrich, H., Berens, P., & Müller, S. (2025). Learning Disease State from Noisy Ordinal Disease Progression Labels. Medical Image Computing and Computer Assisted Intervention – MICCAI 2025, 284–293. https://doi.org/10.1007/978-3-032-04971-1_27
+Collaboration: All (Philipp Berens) and MedML (Sarah Müller).
+Ofosu Mensah, S., Djoumessi, K., & Berens, P. (2025). Prototype-Guided and Lightweight Adapters for Inherent Interpretation and Generalisation in Federated Learning. Medical Image Computing and Computer Assisted Intervention – MICCAI 2025, 464–473. https://doi.org/10.1007/978-3-032-04981-0_44
+Collaboration: All (Philipp Berens) and MedML (Kerol Djoumessi, Samuel Ofosu Mensah).
+Oesterle, J., Ran, Y., Stahr, P., Kerr, J. N. D., Schubert, T., Berens, P., & Euler, T. (2025). Task-specific regional circuit adaptations in distinct mouse retinal ganglion cells. Science Advances, 11(17). https://doi.org/10.1126/sciadv.adp7075
+Collaboration: All (Philipp Berens) and Neural (Jonathan Oesterle).
+```
+## Installation
+```bash
+uv pip install cooplot
+```
+or
+```bash
+git clone git@github.com:berenslab/cooplot.git
+cd cooplot
+uv pip install -e ".[dev]"
+```
+## Usage
+See the [example notebook](https://github.com/berenslab/cooplot/blob/main/notebooks/agberens.ipynb) for a complete usage example.

cooplot-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,102 @@
+# cooplot
+Analysis of co-op between members and subgroups.
+## Example Gallery
+## Cluster of Excellence – Machine Learning for Science
+```python
+import cooplot
+palette = {
+    "Life Science": "#61859e",
+    "Norms": "#e0aa41",
+    "Human Science": "#5fb4d0",
+    "ML": "#bc3b2f",
+    "Physical Science": "#608dd2",
+}
+_, people = cooplot.load_csv("excelclust.csv", delimiter=";")
+pubs = cooplot.scrape(
+    people,
+    name_col="name",
+    scholar_col="scholar_id",
+    semantic_col="semantic_id",
+    cache_dir=".cache/excelclust",
+)
+mats = cooplot.build(pubs, windows=["2014-2018", "2019-2023"], name_col="name", group_col="group")
+fig = cooplot.show(mats, group_col="group", style="circle", heatmap_counts=True, palette=palette)
+fig.savefig("../.github/excelclust.png", dpi=300)
+```
+![](.github/excelclust.png)
+## AG Berens
+```python
+import cooplot
+header, people = cooplot.load_csv("agberens.csv", delimiter=";")
+pubs = cooplot.scrape(
+    people,
+    name_col="name",
+    scholar_col="scholar_id",
+    semantic_col="semantic_id",
+    cache_dir=".cache/hai",
+    drop_subtitle=False,
+    fallback_semantic_if_empty=True,  # try Semantic if GS had 0 pubs
+)
+mats = cooplot.build(pubs, windows=["2016-2025"], name_col="name", group_col="group")
+fig = cooplot.show(mats, group_col="group", style="both", heatmap_counts=True, figsize=(18,10))
+```
+![](.github/agberens.png)
+You can also generate a list of reference highlighting the cross-group collaboration:
+```python
+groups = cooplot.aggregate(pubs, name_col="name", cache_dir=".cache/agberens-group")
+_ = cooplot.cross_group_publications(groups, year_from=2016, year_to=2025, out_path=".cache/agberens-coop.csv", enrich_crossref=True)
+cooplot.cross_group_report(".cache/agberens-coop.csv", out_path=".cache/agberens-coop-ref.txt", verbose=True)
+```
+which will give you
+```
+...
+Schmors, L., Kotkat, A. H., Bauer, Y., Huang, Z., Crombie, D., Meyerolbersleben, L. S., Sokoloski, S., Berens, P., & Busse, L. (2025). Effects of corticothalamic feedback depend on visual responsiveness and stimulus type. IScience, 28(6), 112481. https://doi.org/10.1016/j.isci.2025.112481
+Collaboration: All (Philipp Berens, Ziwei Huang) and Neural (Lisa Schmors, Sacha Sokoloski).
+Gervelmeyer, J., Müller, S., Huang, Z., & Berens, P. (2025). Fundus Image Toolbox: A Python package for fundus image processing. Journal of Open Source Software, 10(108), 7101. https://doi.org/10.21105/joss.07101
+Collaboration: All (Philipp Berens, Ziwei Huang) and MedML (Julius Gervelmeyer, Sarah Müller).
+Schmidt, G., Heidrich, H., Berens, P., & Müller, S. (2025). Learning Disease State from Noisy Ordinal Disease Progression Labels. Medical Image Computing and Computer Assisted Intervention – MICCAI 2025, 284–293. https://doi.org/10.1007/978-3-032-04971-1_27
+Collaboration: All (Philipp Berens) and MedML (Sarah Müller).
+Ofosu Mensah, S., Djoumessi, K., & Berens, P. (2025). Prototype-Guided and Lightweight Adapters for Inherent Interpretation and Generalisation in Federated Learning. Medical Image Computing and Computer Assisted Intervention – MICCAI 2025, 464–473. https://doi.org/10.1007/978-3-032-04981-0_44
+Collaboration: All (Philipp Berens) and MedML (Kerol Djoumessi, Samuel Ofosu Mensah).
+Oesterle, J., Ran, Y., Stahr, P., Kerr, J. N. D., Schubert, T., Berens, P., & Euler, T. (2025). Task-specific regional circuit adaptations in distinct mouse retinal ganglion cells. Science Advances, 11(17). https://doi.org/10.1126/sciadv.adp7075
+Collaboration: All (Philipp Berens) and Neural (Jonathan Oesterle).
+```
+## Installation
+```bash
+uv pip install cooplot
+```
+or
+```bash
+git clone git@github.com:berenslab/cooplot.git
+cd cooplot
+uv pip install -e ".[dev]"
+```
+## Usage
+See the [example notebook](https://github.com/berenslab/cooplot/blob/main/notebooks/agberens.ipynb) for a complete usage example.

cooplot-0.1.0/cooplot/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .api import *

cooplot-0.1.0/cooplot/aggregate.py ADDED Viewed

@@ -0,0 +1,295 @@
+from __future__ import annotations
+import json
+import re
+from collections import defaultdict
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Dict, Iterable, List, Optional, Tuple
+DEFAULT_CACHE_DIR = Path(".cache/cooplot/groups")
+_UNLABELED = "Unlabeled"
+_slug_pattern = re.compile(r"[^A-Za-z0-9._-]+")
+@dataclass(frozen=True)
+class GroupedPublications:
+    """Container for group-level publication data."""
+    by_group: Dict[str, List[dict]]
+    paths: Dict[str, Path]
+    def sorted_groups(self) -> List[str]:
+        return sorted(self.by_group.keys(), key=str.lower)
+    def to_author_list(self, name_field: str = "name") -> List[dict]:
+        """Return a minimal people list that can be fed into ``build``."""
+        records: List[dict] = []
+        for group in self.sorted_groups():
+            records.append({name_field: group, "group": group})
+        return records
+    def exclude_groups(self, groups: Iterable[str]) -> "GroupedPublications":
+        """Return a new instance without the specified groups."""
+        to_remove = {str(group).strip() for group in groups if group is not None}
+        to_remove.discard("")
+        if not to_remove:
+            return self
+        filtered_by_group = {
+            group: pubs
+            for group, pubs in self.by_group.items()
+            if group not in to_remove
+        }
+        filtered_paths = {
+            group: path for group, path in self.paths.items() if group not in to_remove
+        }
+        return GroupedPublications(by_group=filtered_by_group, paths=filtered_paths)
+def _lastname(name: str) -> str:
+    return (name or "").strip().split()[-1].lower()
+def _normalize_group(value: Optional[str]) -> str:
+    if value is None:
+        return _UNLABELED
+    group = str(value).strip()
+    return group if group else _UNLABELED
+def _slugify(label: str) -> str:
+    slug = _slug_pattern.sub("_", label.strip())
+    slug = slug.strip("._")
+    return slug or "group"
+def _publication_key(publication: dict) -> Optional[Tuple[str, Optional[int]]]:
+    norm_title = (publication.get("norm_title") or "").strip()
+    if not norm_title:
+        return None
+    year = publication.get("year")
+    if isinstance(year, int):
+        return norm_title, year
+    return norm_title, None
+def _format_record(record: dict) -> dict:
+    authors = sorted(record["authors"], key=_lastname)
+    return {
+        "title": record["title"],
+        "norm_title": record["norm_title"],
+        "year": record["year"],
+        "authors": authors,
+    }
+def _publication_sort_key(record: dict) -> Tuple[int, str]:
+    year = record.get("year")
+    norm_title = (record.get("norm_title") or record.get("title") or "").lower()
+    year_key = year if isinstance(year, int) else -1
+    return (year_key, norm_title)
+def aggregate_publications(
+    publications_by_author: Dict[str, List[dict]],
+    people: Iterable[dict],
+    *,
+    name_col: str = "name",
+    group_col: str = "group",
+    cache_dir: Path | str = DEFAULT_CACHE_DIR,
+    include_unlabeled: bool = True,
+    save_json: bool = True,
+    ensure_ascii: bool = False,
+) -> GroupedPublications:
+    """Group publications by ``group_col`` and deduplicate by normalized title.
+    Parameters
+    ----------
+    publications_by_author
+        Mapping of author name to list of publication dicts, as returned by
+        :func:`cooplot.scrape.scrape_all`.
+    people
+        Iterable of records describing each author. ``group_col`` is used to map
+        authors onto the aggregation key.
+    name_col
+        Field name within ``people`` entries identifying each author.
+    group_col
+        Field name used to determine group membership.
+    cache_dir
+        Directory where group-level JSON files will be stored if ``save_json`` is
+        ``True``. The directory is created if needed.
+    include_unlabeled
+        Whether authors without a ``group_col`` value should be collected under an
+        ``"Unlabeled"`` bucket.
+    save_json
+        When ``True`` the grouped publication lists are written to individual
+        JSON files under ``cache_dir``.
+    ensure_ascii
+        Passed through to :func:`json.dumps` so callers can enforce ASCII-only
+        output if desired.
+    Returns
+    -------
+    GroupedPublications
+        Dataclass containing the grouped publication mapping and the optional
+        cache file paths (empty when ``save_json`` is ``False``).
+    """
+    name_to_group: Dict[str, str] = {}
+    for person in people:
+        name_value = person.get(name_col)
+        if not isinstance(name_value, str) or not name_value.strip():
+            continue
+        group_value = _normalize_group(person.get(group_col))
+        name_to_group[name_value] = group_value
+    grouped: Dict[str, Dict[Tuple[str, Optional[int]], dict]] = {}
+    for author, publications in publications_by_author.items():
+        group_label = name_to_group.get(author, _UNLABELED)
+        if group_label == _UNLABELED and not include_unlabeled:
+            continue
+        bucket = grouped.setdefault(group_label, {})
+        for publication in publications:
+            key = _publication_key(publication)
+            if key is None:
+                continue
+            norm_title, year = key
+            title = publication.get("title") or norm_title
+            entry = bucket.setdefault(
+                key,
+                {
+                    "title": title,
+                    "norm_title": norm_title,
+                    "year": year,
+                    "authors": set(),
+                },
+            )
+            if entry["title"] == entry["norm_title"] and publication.get("title"):
+                entry["title"] = publication["title"]
+            entry["authors"].add(author)
+    grouped_lists: Dict[str, List[dict]] = {}
+    for group_label, records in grouped.items():
+        formatted_records = [_format_record(rec) for rec in records.values()]
+        formatted_records.sort(key=_publication_sort_key)
+        grouped_lists[group_label] = formatted_records
+    paths: Dict[str, Path] = {}
+    if save_json:
+        cache_path = Path(cache_dir)
+        cache_path.mkdir(parents=True, exist_ok=True)
+        for group_label, records in grouped_lists.items():
+            filename = f"{_slugify(group_label)}.json"
+            out_path = cache_path / filename
+            out_path.write_text(
+                json.dumps(records, ensure_ascii=ensure_ascii, indent=2),
+                encoding="utf-8",
+            )
+            paths[group_label] = out_path
+    return GroupedPublications(by_group=grouped_lists, paths=paths)
+def aggregate_cross_group_data(
+    records: Iterable[dict],
+    *,
+    filter: Optional[str] = None,
+) -> GroupedPublications:
+    """Construct a :class:`GroupedPublications` from cross-group collaboration records.
+    Parameters
+    ----------
+    records
+        Iterable of dicts as returned by :func:`cooplot.metrics.cross_group_publications`
+        (or compatible structure) where each record contains ``title``, ``norm_title``,
+        ``year``, ``groups`` and ``authors`` entries.
+    filter
+        Optional string selecting records that contain identifiers. Supported values are
+        ``"doi"``, ``"pubmed"`` (or ``"pubmed_id"`` / ``"pmid"``), and ``"identifier"``
+        (alias ``"any"``) which keeps entries having either DOI or PubMed identifiers.
+        When ``None`` (default) no filtering is applied.
+    """
+    filter_normalized = (filter or "").strip().lower()
+    if filter_normalized and filter_normalized not in {
+        "doi",
+        "pubmed",
+        "pubmed_id",
+        "pmid",
+        "identifier",
+        "any",
+    }:
+        raise ValueError(
+            "filter must be one of None, 'doi', 'pubmed', 'pubmed_id', 'pmid', "
+            "'identifier', or 'any'",
+        )
+    def _has_doi(record: dict) -> bool:
+        doi = record.get("doi") or record.get("DOI")
+        return bool(isinstance(doi, str) and doi.strip())
+    def _has_pubmed(record: dict) -> bool:
+        pmid = record.get("pubmed_id") or record.get("pmid")
+        return bool(isinstance(pmid, str) and pmid.strip())
+    def _passes_filter(record: dict) -> bool:
+        if not filter_normalized:
+            return True
+        if filter_normalized == "doi":
+            return _has_doi(record)
+        if filter_normalized in {"pubmed", "pubmed_id", "pmid"}:
+            return _has_pubmed(record)
+        if filter_normalized in {"identifier", "any"}:
+            return _has_doi(record) or _has_pubmed(record)
+        return True
+    by_group: Dict[str, List[dict]] = defaultdict(list)
+    for record in records:
+        if not isinstance(record, dict):
+            continue
+        if not _passes_filter(record):
+            continue
+        title = record.get("title")
+        norm_title = record.get("norm_title")
+        if not isinstance(norm_title, str) or not norm_title.strip():
+            continue
+        year = record.get("year")
+        authors_by_group = record.get("authors") or {}
+        groups = record.get("groups") or list(authors_by_group.keys())
+        if not groups:
+            continue
+        base = {
+            "title": title or norm_title,
+            "norm_title": norm_title,
+            "year": year if isinstance(year, int) else None,
+        }
+        for group in groups:
+            if not isinstance(group, str):
+                continue
+            group_name = group.strip()
+            if not group_name:
+                continue
+            authors = authors_by_group.get(group) or authors_by_group.get(
+                group_name, []
+            )
+            if not isinstance(authors, list):
+                authors = list(authors)  # tolerate iterables/sets
+            filtered_authors = [
+                author
+                for author in authors
+                if isinstance(author, str) and author.strip()
+            ]
+            formatted = dict(base)
+            formatted["authors"] = sorted(filtered_authors, key=_lastname)
+            by_group[group_name].append(formatted)
+    grouped_lists = {
+        group: sorted(pubs, key=_publication_sort_key)
+        for group, pubs in by_group.items()
+    }
+    return GroupedPublications(by_group=grouped_lists, paths={})