PyPI - biocypher - Versions diffs - 0.9.6__py3-none-any.whl → 0.10.0__py3-none-any.whl - Mend

biocypher 0.9.6py3-none-any.whl → 0.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of biocypher might be problematic. Click here for more details.

Files changed (12) hide show

biocypher/_metadata.py +1 -1
biocypher/_translate.py +5 -8
biocypher/output/in_memory/_airr.py +491 -0
biocypher/output/in_memory/_get_in_memory_kg.py +8 -6
biocypher/output/in_memory/_in_memory_kg.py +57 -0
biocypher/output/in_memory/_pandas.py +1 -59
biocypher/output/write/_get_writer.py +2 -0
biocypher/output/write/graph/_airr.py +32 -0
{biocypher-0.9.6.dist-info → biocypher-0.10.0.dist-info}/METADATA +4 -2
{biocypher-0.9.6.dist-info → biocypher-0.10.0.dist-info}/RECORD +12 -10
{biocypher-0.9.6.dist-info → biocypher-0.10.0.dist-info}/LICENSE +0 -0
{biocypher-0.9.6.dist-info → biocypher-0.10.0.dist-info}/WHEEL +0 -0

biocypher/_metadata.py CHANGED Viewed

@@ -10,7 +10,7 @@ import pathlib
 import toml
-_VERSION = "0.9.6"
+_VERSION = "0.10.0"
 def get_metadata():

biocypher/_translate.py CHANGED Viewed

@@ -37,15 +37,12 @@ class Translator:
         Args:
         ----
-            leaves:
-                Dictionary detailing the leaves of the hierarchy
-                tree representing the structure of the graph; the leaves are
-                the entities that will be direct components of the graph,
-                while the intermediary nodes are additional labels for
-                filtering purposes.
+            ontology (Ontology): An Ontology object providing schema and mapping details.
             strict_mode:
-                If True, the translator will raise an error if input data do not
-                carry source, licence, and version information.
+                strict_mode (bool, optional): If True, enforces that every node and edge carries
+                the required 'source', 'licence', and 'version' properties. Raises ValueError
+                if these are missing. Defaults to False.
         """
         self.ontology = ontology

biocypher/output/in_memory/_airr.py ADDED Viewed

@@ -0,0 +1,491 @@
+from typing import Any, Optional
+from biocypher._create import BioCypherEdge, BioCypherNode
+from biocypher._deduplicate import Deduplicator
+from biocypher._logger import logger
+from biocypher.output.in_memory._in_memory_kg import _InMemoryKG
+try:
+    from scirpy.io import AirrCell
+    HAS_SCIRPY = True
+except ImportError:
+    HAS_SCIRPY = False
+class AirrKG(_InMemoryKG):
+    """Knowledge graph for AIRR (Adaptive Immune Receptor Repertoire) data.
+    This class implements the AIRR data model for representing immune receptor sequences
+    (antibodies and T cell receptors) and their annotations. To ensure proper conversion
+    to AIRR format, your schema file should define immune receptor entities with property
+    names that match the AIRR standards.
+    Key property names in your schema for immune receptor entities:
+    - locus: The gene locus (e.g., "TRA", "TRB", "IGH", "IGK", "IGL")
+    - junction_aa: The amino acid sequence of the junction region (CDR3)
+    - v_call: The V gene assignment
+    - j_call: The J gene assignment
+    - productive: Whether the sequence is productive
+    Pairing Strategies specified in get_kg method:
+    - Indirect pairings allowed:
+        Epitope is only matched with ONE of the paired receptors -> the "paired" AIRR cell will be created
+    - Indirect pairings not allowed:
+        Epitope is only matched with ONE of the paired receptors -> no "paired" AIRR cell will be created
+    For a complete list of available fields and their descriptions, see:
+    https://docs.airr-community.org/en/stable/datarep/rearrangements.html#fields
+    All properties from the biocypher schema defined by user will be preserved in the AIRR format.
+    """
+    # Constants for internal property filtering
+    _INTERNAL_PROPERTIES = frozenset(["node_id", "node_label", "id", "preferred_id"])
+    def __init__(
+        self,
+        deduplicator: Optional["Deduplicator"] = None,
+        metadata_entity_type: str = "epitope",
+    ) -> None:
+        """Initialize AirrKG with configurable metadata node type.
+        Args:
+        ----
+            deduplicator: Deduplicator instance
+            metadata_entity_type: String specifying the metadata node type (default: "epitope")
+        """
+        super().__init__()
+        self.deduplicator = deduplicator or Deduplicator()
+        self.metadata_entity_type = metadata_entity_type
+        # Initialize storage for processed cells
+        self.adjacency_list = {}
+        self.airr_cells = []
+        # These will be populated when nodes and edges are added
+        self.sequence_entity_types = {}
+        self.chain_relationship_types = []
+        self.chain_to_epitope_relationship_types = []
+    def _check_dependencies(self) -> None:
+        """Verify that scirpy is available."""
+        if not HAS_SCIRPY:
+            msg = (
+                "AirrCell module from scirpy not detected. "
+                "Install it with 'poetry add biocypher[scirpy]' or 'poetry add scirpy'."
+            )
+            raise ImportError(msg)
+    def get_kg(self, indirect_pairings: bool = True) -> list[AirrCell]:
+        """Convert directly to AIRR format using AirCell from scirpy.
+        Args:
+        ----
+            indirect_pairings: Boolean controlling pairing strategy (default: True)
+                - True:
+                    Epitope is only matched with ONE of the paired receptors -> the "paired" AIRR cell will be created
+                - False:
+                    Epitope is only matched with ONE of the paired receptors -> no "paired" AIRR cell will be created
+        Returns:
+        -------
+            list: List of generated AIRR cells
+        """
+        self._check_dependencies()
+        if not self.airr_cells:
+            self.airr_cells = self._to_airr_cells(self.adjacency_list, indirect_pairings)
+        return self.airr_cells
+    def add_nodes(self, nodes: list[BioCypherNode]) -> None:
+        """Add BioCypher nodes, organizing them by type."""
+        self._add_to_entities_by_type(nodes)
+    def add_edges(self, edges: list[BioCypherEdge]) -> None:
+        """Add BioCypher edges, organizing them by type."""
+        self._add_to_entities_by_type(edges)
+    def _add_to_entities_by_type(self, entities: dict[str, list[Any]]) -> None:
+        """Add all entities (both nodes and edges) to a common adj. list."""
+        lists = self._separate_entity_types(entities)
+        for _type, _entities in lists.items():
+            if _type not in self.adjacency_list:
+                self.adjacency_list[_type] = []
+            self.adjacency_list[_type].extend(_entities)
+    def _process_entities(self, entities: dict[str, list[Any]]) -> tuple[dict, dict, dict]:
+        """Process entities and organize them into sequence nodes, metadata nodes, and receptor-epitope mappings.
+        Args:
+        ----
+            entities: Dictionary mapping entity types to lists of BioCypherNode/BioCypherEdge objects
+        Returns:
+        -------
+            tuple: (sequence_nodes, metadata_nodes, receptor_epitope_mapping)
+        """
+        sequence_nodes = {}
+        metadata_nodes = {}
+        receptor_epitope_mapping = {}
+        # Determine entity types while processing
+        all_node_types = set()
+        all_edge_types = set()
+        for entity_type, entities_list in entities.items():
+            if not entities_list:  # Skip empty lists
+                continue
+            # Determine if this is a node or edge type
+            if isinstance(entities_list[0], BioCypherNode):
+                all_node_types.add(entity_type)
+                if entity_type == self.metadata_entity_type:
+                    metadata_nodes.update({node.get_id(): node for node in entities_list})
+                else:
+                    sequence_nodes.update({node.get_id(): node for node in entities_list})
+                    self.sequence_entity_types[entity_type] = entity_type.replace(" sequence", "").upper()
+            elif isinstance(entities_list[0], BioCypherEdge):
+                all_edge_types.add(entity_type)
+        # Update relationship types
+        self.chain_relationship_types = [
+            edge_type for edge_type in all_edge_types if self.metadata_entity_type not in edge_type.lower()
+        ]
+        self.chain_to_epitope_relationship_types = [
+            edge_type for edge_type in all_edge_types if self.metadata_entity_type in edge_type.lower()
+        ]
+        # Process chain-to-epitope relationships
+        for entity_type in self.chain_to_epitope_relationship_types:
+            self._update_receptor_epitope_mapping(entities[entity_type], receptor_epitope_mapping)
+        return sequence_nodes, metadata_nodes, receptor_epitope_mapping
+    def _update_receptor_epitope_mapping(self, edges: list[BioCypherEdge], mapping: dict[str, set]) -> None:
+        """Update receptor-epitope mapping with new edges.
+        Args:
+        ----
+            edges: List of edges to process
+            mapping: Dictionary to update with receptor-epitope mappings
+        """
+        for edge in edges:
+            source_id = edge.get_source_id()
+            if source_id not in mapping:
+                mapping[source_id] = set()
+            mapping[source_id].add(edge.get_target_id())
+    def _process_paired_chains(
+        self,
+        entities: dict[str, list[Any]],
+        sequence_nodes: dict[str, BioCypherNode],
+        metadata_nodes: dict[str, BioCypherNode],
+        receptor_epitope_mapping: dict[str, set],
+        indirect_pairings: bool = True,
+    ) -> tuple[list[AirrCell], set[str], int]:
+        """Process paired chains and generate AIRR cells.
+        Args:
+        ----
+            entities: Dictionary of all entities
+            sequence_nodes: Dictionary of sequence nodes
+            metadata_nodes: Dictionary of metadata nodes
+            receptor_epitope_mapping: Dictionary of receptor-epitope mappings
+            indirect_pairings: Boolean controlling pairing strategy
+        Returns:
+        -------
+            tuple: (list of generated cells, set of processed chain IDs, count of cells with multiple epitopes)
+        """
+        airr_cells = []
+        processed_chains = set()
+        n_metacells = 0
+        for entity_type, edges in entities.items():
+            if entity_type in self.chain_relationship_types:
+                for edge in edges:
+                    source_id, target_id = edge.get_source_id(), edge.get_target_id()
+                    processed_chains.update([source_id, target_id])
+                    # Use conditional logic for pairing strategy
+                    source_metadata = receptor_epitope_mapping.get(source_id, set())
+                    target_metadata = receptor_epitope_mapping.get(target_id, set())
+                    if indirect_pairings:
+                        # Union: create paired cell if either chain binds epitopes
+                        metadata_ids = source_metadata | target_metadata
+                        metadata_nodes_cell = self._get_metadata_nodes(metadata_ids, metadata_nodes)
+                        if metadata_nodes_cell:
+                            cell_s = self._generate_airr_cell(
+                                cell_id=edge.get_id(),
+                                source_node=sequence_nodes.get(source_id),
+                                target_node=sequence_nodes.get(target_id),
+                                metadata_nodes=metadata_nodes_cell,
+                                paired=True,
+                                receptor_epitope_mapping=receptor_epitope_mapping,
+                            )
+                            airr_cells.extend(cell_s)
+                            if len(cell_s) > 1:
+                                n_metacells += 1
+                    else:
+                        # Intersection: create paired cell only if both chains bind same epitopes
+                        shared_metadata_ids = source_metadata & target_metadata
+                        # Create paired cell if there are shared epitopes
+                        if shared_metadata_ids:
+                            shared_metadata_nodes = self._get_metadata_nodes(shared_metadata_ids, metadata_nodes)
+                            if shared_metadata_nodes:
+                                cell_s = self._generate_airr_cell(
+                                    cell_id=edge.get_id(),
+                                    source_node=sequence_nodes.get(source_id),
+                                    target_node=sequence_nodes.get(target_id),
+                                    metadata_nodes=shared_metadata_nodes,
+                                    paired=True,
+                                    receptor_epitope_mapping=receptor_epitope_mapping,
+                                )
+                                airr_cells.extend(cell_s)
+                                if len(cell_s) > 1:
+                                    n_metacells += 1
+                        # Create unpaired cells for chains with non-overlapping epitopes
+                        source_only_metadata = source_metadata - target_metadata
+                        target_only_metadata = target_metadata - source_metadata
+                        # Create unpaired cell for source chain if it has unique epitopes
+                        if source_only_metadata:
+                            source_only_nodes = self._get_metadata_nodes(source_only_metadata, metadata_nodes)
+                            if source_only_nodes:
+                                source_cells = self._generate_airr_cell(
+                                    cell_id=f"unpaired_{source_id}",
+                                    source_node=sequence_nodes.get(source_id),
+                                    target_node=None,
+                                    metadata_nodes=source_only_nodes,
+                                    paired=False,
+                                    receptor_epitope_mapping=receptor_epitope_mapping,
+                                )
+                                airr_cells.extend(source_cells)
+                                if len(source_cells) > 1:
+                                    n_metacells += 1
+                        # Create unpaired cell for target chain if it has unique epitopes
+                        if target_only_metadata:
+                            target_only_nodes = self._get_metadata_nodes(target_only_metadata, metadata_nodes)
+                            if target_only_nodes:
+                                target_cells = self._generate_airr_cell(
+                                    cell_id=f"unpaired_{target_id}",
+                                    source_node=sequence_nodes.get(target_id),
+                                    target_node=None,
+                                    metadata_nodes=target_only_nodes,
+                                    paired=False,
+                                    receptor_epitope_mapping=receptor_epitope_mapping,
+                                )
+                                airr_cells.extend(target_cells)
+                                if len(target_cells) > 1:
+                                    n_metacells += 1
+        return airr_cells, processed_chains, n_metacells
+    def _process_unpaired_chains(
+        self,
+        receptor_epitope_mapping: dict[str, set],
+        sequence_nodes: dict[str, BioCypherNode],
+        metadata_nodes: dict[str, BioCypherNode],
+        processed_chains: set[str],
+    ) -> tuple[list[AirrCell], int]:
+        """Process unpaired chains and generate AIRR cells.
+        Args:
+        ----
+            receptor_epitope_mapping: Dictionary of receptor-epitope mappings
+            sequence_nodes: Dictionary of sequence nodes
+            metadata_nodes: Dictionary of metadata nodes
+            processed_chains: Set of already processed chain IDs
+        Returns:
+        -------
+            tuple: (List of generated cells, count of cells with multiple epitopes)
+        """
+        airr_cells = []
+        n_metacells = 0
+        for chain_id in receptor_epitope_mapping:
+            if chain_id not in processed_chains:
+                # Get all metadata nodes for this unpaired chain
+                metadata_nodes_cell = self._get_metadata_nodes(receptor_epitope_mapping[chain_id], metadata_nodes)
+                if metadata_nodes_cell:
+                    cell_s = self._generate_airr_cell(
+                        cell_id=f"unpaired_{chain_id}",
+                        source_node=sequence_nodes.get(chain_id),
+                        target_node=None,
+                        metadata_nodes=metadata_nodes_cell,
+                        paired=False,
+                        receptor_epitope_mapping=receptor_epitope_mapping,
+                    )
+                    airr_cells.extend(cell_s)
+                    # Check if multiple cells were generated (indicating multiple epitopes)
+                    if len(cell_s) > 1:
+                        n_metacells += 1
+        return airr_cells, n_metacells
+    def _to_airr_cells(self, entities: dict[str, list[Any]], indirect_pairings: bool = True) -> list[AirrCell]:
+        """Convert BioCypher entities to AIRR cells using configurable mappings.
+        Args:
+        ----
+            entities: Dictionary mapping entity types to lists of BioCypherNode/BioCypherEdge objects
+            indirect_pairings: Boolean controlling pairing strategy (default: True)
+                - True:
+                    Epitope is only matched with ONE of the paired receptors -> the "paired" AIRR cell will be created
+                - False:
+                    Epitope is only matched with ONE of the paired receptors -> no "paired" AIRR cell will be created
+        Returns:
+        -------
+            list: List of generated AIRR cells
+        """
+        if not entities:
+            msg = "No entities provided for conversion."
+            raise ValueError(msg)
+        logger.info("Starting conversion to AIRR cells")
+        # Process all entities
+        sequence_nodes, metadata_nodes, receptor_epitope_mapping = self._process_entities(entities)
+        # Process paired chains
+        airr_cells, processed_chains, paired_metacells = self._process_paired_chains(
+            entities,
+            sequence_nodes,
+            metadata_nodes,
+            receptor_epitope_mapping,
+            indirect_pairings,
+        )
+        # Process unpaired chains
+        unpaired_cells, unpaired_metacells = self._process_unpaired_chains(
+            receptor_epitope_mapping,
+            sequence_nodes,
+            metadata_nodes,
+            processed_chains,
+        )
+        airr_cells.extend(unpaired_cells)
+        # Calculate total cells with multiple epitopes
+        total_metacells = paired_metacells + unpaired_metacells
+        # Log information about cells
+        logger.info(f"Generated total of {len(airr_cells)} AIRR cells")
+        if total_metacells > 0:
+            logger.info(f"{total_metacells} cells with more than 1 epitope were detected")
+        return airr_cells
+    def _get_metadata_nodes(
+        self,
+        metadata_ids: set[str],
+        metadata_nodes: dict[str, BioCypherNode],
+    ) -> list[BioCypherNode]:
+        """Get metadata nodes for a set of metadata IDs.
+        Args:
+        ----
+            metadata_ids: Set of metadata IDs
+            metadata_nodes: Dictionary of metadata nodes
+        Returns:
+        -------
+            list: List of metadata nodes
+        """
+        return [metadata_nodes[ep_id] for ep_id in metadata_ids if ep_id in metadata_nodes]
+    def _generate_airr_cell(
+        self,
+        cell_id: str,
+        source_node: BioCypherNode | None,
+        target_node: BioCypherNode | None,
+        metadata_nodes: list[BioCypherNode],
+        paired: bool,
+        receptor_epitope_mapping: dict[str, set] | None = None,
+    ) -> list[AirrCell]:
+        cell = AirrCell(cell_id=cell_id)
+        # Process both chains
+        for node in [source_node, target_node]:
+            if not node:  # Skip if node is None
+                continue
+            props = node.get_properties()
+            chain = AirrCell.empty_chain_dict()
+            # Add all properties except internal ones
+            for key, value in props.items():
+                if key not in self._INTERNAL_PROPERTIES:
+                    chain[key] = value
+            # Add locus based on node type
+            chain["locus"] = self.sequence_entity_types.get(node.get_label(), node.get_label())
+            chain["consensus_count"] = 0
+            chain["productive"] = True
+            # Add binds_epitope field based on receptor_epitope_mapping
+            if receptor_epitope_mapping and node.get_id() in receptor_epitope_mapping:
+                chain["validated_epitope"] = bool(receptor_epitope_mapping[node.get_id()])
+            else:
+                chain["validated_epitope"] = False
+            cell.add_chain(chain)
+        # Add metadata
+        return self.add_metadata(metadata_nodes, cell, paired)
+    def add_metadata(self, metadata_nodes: list[BioCypherNode], cell: AirrCell, paired: bool) -> list[AirrCell]:
+        """Add metadata from nodes to cell(s) and return a list of cells.
+        Args:
+        ----
+            metadata_nodes: List of metadata nodes to add
+            cell: Base cell to add metadata to
+            paired: Whether the cell is paired
+        Returns:
+        -------
+            List of cells with metadata added
+        """
+        cells = []
+        if not metadata_nodes:
+            cell["data_source"] = "BioCypher"
+            cell["is_paired"] = paired
+            cells.append(cell)
+        else:
+            for i, node in enumerate(metadata_nodes):
+                # Create a new AirrCell for each metadata node
+                if i > 0:
+                    cell_id_new = f"{cell.cell_id}_meta{i+1}"
+                    meta_cell = AirrCell(cell_id=cell_id_new)
+                    for chain in cell.chains:
+                        meta_cell.add_chain(chain)
+                else:
+                    meta_cell = cell
+                props = node.get_properties()
+                for key, value in props.items():
+                    if key not in self._INTERNAL_PROPERTIES:
+                        meta_cell[key] = value
+                meta_cell["data_source"] = "BioCypher"
+                meta_cell["is_paired"] = paired
+                cells.append(meta_cell)
+        return cells

biocypher/output/in_memory/_get_in_memory_kg.py CHANGED Viewed

@@ -8,6 +8,7 @@ from __future__ import annotations
 from typing import TYPE_CHECKING
 from biocypher._logger import logger
+from biocypher.output.in_memory._airr import AirrKG
 from biocypher.output.in_memory._networkx import NetworkxKG
 from biocypher.output.in_memory._pandas import PandasKG
@@ -19,7 +20,7 @@ logger.debug(f"Loading module {__name__}.")
 __all__ = ["get_in_memory_kg"]
-IN_MEMORY_DBMS = ["csv", "pandas", "tabular", "networkx"]
+IN_MEMORY_DBMS = ["csv", "pandas", "tabular", "networkx", "airr"]
 def get_in_memory_kg(
@@ -35,10 +36,11 @@ def get_in_memory_kg(
     """
     if dbms in ["csv", "pandas", "tabular"]:
         return PandasKG(deduplicator)
     if dbms == "networkx":
         return NetworkxKG(deduplicator)
-    msg = f"Getting the in memory BioCypher KG is not supported for the DBMS {dbms}. Supported: {IN_MEMORY_DBMS}."
-    logger.error(msg)
-    raise NotImplementedError(msg)
+    elif dbms == "airr":
+        return AirrKG(deduplicator)
+    else:
+        msg = f"Getting the in memory BioCypher KG is not supported for the DBMS {dbms}. Supported: {IN_MEMORY_DBMS}."
+        logger.error(msg)
+        raise NotImplementedError(msg)

biocypher/output/in_memory/_in_memory_kg.py CHANGED Viewed

@@ -1,5 +1,7 @@
 from abc import ABC, abstractmethod
+from biocypher._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
 class _InMemoryKG(ABC):
     """Abstract class for handling the in-memory Knowledge Graph instance.
@@ -9,6 +11,7 @@ class _InMemoryKG(ABC):
     - add_nodes
     - add_edges
     - get_kg
+    - _separate_entity_types
     Raises:
         NotImplementedError: InMemoryKG implementation must override 'add_nodes'
@@ -38,3 +41,57 @@ class _InMemoryKG(ABC):
     def get_kg(self):
         """Return the in-memory knowledge graph."""
         raise NotImplementedError("InMemoryKG implementation must override 'get_kg'")
+    def _separate_entity_types(self, entities):
+        """
+        Given mixed iterable of BioCypher objects, separate them into lists by
+        type. Also deduplicates using the `Deduplicator` instance.
+        """
+        lists = {}
+        for entity in entities:
+            if (
+                not isinstance(entity, BioCypherNode)
+                and not isinstance(entity, BioCypherEdge)
+                and not isinstance(entity, BioCypherRelAsNode)
+            ):
+                raise TypeError(
+                    "Expected a BioCypherNode / BioCypherEdge / " f"BioCypherRelAsNode, got {type(entity)}."
+                )
+            if isinstance(entity, BioCypherNode):
+                seen = self.deduplicator.node_seen(entity)
+            elif isinstance(entity, BioCypherEdge):
+                seen = self.deduplicator.edge_seen(entity)
+            elif isinstance(entity, BioCypherRelAsNode):
+                seen = self.deduplicator.rel_as_node_seen(entity)
+            if seen:
+                continue
+            if isinstance(entity, BioCypherRelAsNode):
+                node = entity.get_node()
+                source_edge = entity.get_source_edge()
+                target_edge = entity.get_target_edge()
+                _type = node.get_type()
+                if _type not in lists:
+                    lists[_type] = []
+                lists[_type].append(node)
+                _source_type = source_edge.get_type()
+                if _source_type not in lists:
+                    lists[_source_type] = []
+                lists[_source_type].append(source_edge)
+                _target_type = target_edge.get_type()
+                if _target_type not in lists:
+                    lists[_target_type] = []
+                lists[_target_type].append(target_edge)
+                continue
+            _type = entity.get_type()
+            if _type not in lists:
+                lists[_type] = []
+            lists[_type].append(entity)
+        return lists

biocypher/output/in_memory/_pandas.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import pandas as pd
-from biocypher._create import BioCypherEdge, BioCypherNode, BioCypherRelAsNode
 from biocypher.output.in_memory._in_memory_kg import _InMemoryKG
@@ -20,65 +19,8 @@ class PandasKG(_InMemoryKG):
     def add_edges(self, edges):
         self.add_tables(edges)
-    def _separate_entity_types(self, entities):
-        """
-        Given mixed iterable of BioCypher objects, separate them into lists by
-        type. Also deduplicates using the `Deduplicator` instance.
-        """
-        lists = {}
-        for entity in entities:
-            if (
-                not isinstance(entity, BioCypherNode)
-                and not isinstance(entity, BioCypherEdge)
-                and not isinstance(entity, BioCypherRelAsNode)
-            ):
-                raise TypeError(
-                    "Expected a BioCypherNode / BioCypherEdge / " f"BioCypherRelAsNode, got {type(entity)}."
-                )
-            if isinstance(entity, BioCypherNode):
-                seen = self.deduplicator.node_seen(entity)
-            elif isinstance(entity, BioCypherEdge):
-                seen = self.deduplicator.edge_seen(entity)
-            elif isinstance(entity, BioCypherRelAsNode):
-                seen = self.deduplicator.rel_as_node_seen(entity)
-            if seen:
-                continue
-            if isinstance(entity, BioCypherRelAsNode):
-                node = entity.get_node()
-                source_edge = entity.get_source_edge()
-                target_edge = entity.get_target_edge()
-                _type = node.get_type()
-                if _type not in lists:
-                    lists[_type] = []
-                lists[_type].append(node)
-                _source_type = source_edge.get_type()
-                if _source_type not in lists:
-                    lists[_source_type] = []
-                lists[_source_type].append(source_edge)
-                _target_type = target_edge.get_type()
-                if _target_type not in lists:
-                    lists[_target_type] = []
-                lists[_target_type].append(target_edge)
-                continue
-            _type = entity.get_type()
-            if _type not in lists:
-                lists[_type] = []
-            lists[_type].append(entity)
-        return lists
     def add_tables(self, entities):
-        """
-        Add Pandas dataframes for each node and edge type in the input.
-        """
+        """Add Pandas dataframes for each node and edge type in the input."""
         lists = self._separate_entity_types(entities)
         for _type, _entities in lists.items():

biocypher/output/write/_get_writer.py CHANGED Viewed

@@ -9,6 +9,7 @@ from typing import TYPE_CHECKING
 from biocypher._config import config as _config
 from biocypher._logger import logger
 from biocypher.output.write._batch_writer import _BatchWriter
+from biocypher.output.write.graph._airr import _AirrWriter
 from biocypher.output.write.graph._arangodb import _ArangoDBBatchWriter
 from biocypher.output.write.graph._neo4j import _Neo4jBatchWriter
 from biocypher.output.write.graph._networkx import _NetworkXWriter
@@ -50,6 +51,7 @@ DBMS_TO_CLASS = {
     "Tabular": _PandasCSVWriter,
     "networkx": _NetworkXWriter,
     "NetworkX": _NetworkXWriter,
+    "airr": _AirrWriter,
 }

biocypher/output/write/graph/_airr.py ADDED Viewed

@@ -0,0 +1,32 @@
+"""Module to provide the AnnData writer class for BioCypher."""
+from biocypher._logger import logger
+from biocypher.output.write._writer import _Writer
+class _AirrWriter(_Writer):
+    """A minimal placeholder writer class that implements the required methods
+    but performs no actual writing operations, since there is an existing anndata native writer functionality
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        logger.info("Placeholder writer initialized")
+    def _write_node_data(self, nodes) -> bool:
+        """Required implementation that does nothing with nodes."""
+        logger.info("Placeholder: Node data received but not processed")
+        return True
+    def _write_edge_data(self, edges) -> bool:
+        """Required implementation that does nothing with edges."""
+        logger.info("Placeholder: Edge data received but not processed")
+        return True
+    def _construct_import_call(self) -> str:
+        """Return a placeholder import script."""
+        return "# This is a placeholder import script\nprint('No actual import functionality implemented')"
+    def _get_import_script_name(self) -> str:
+        """Return a placeholder script name."""
+        return "placeholder_import.py"

{biocypher-0.9.6.dist-info → biocypher-0.10.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: biocypher
-Version: 0.9.6
+Version: 0.10.0
 Summary: A unifying framework for biomedical research knowledge graphs
 Home-page: https://github.com/biocypher/biocypher
 License: MIT
@@ -19,6 +19,7 @@ Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
+Provides-Extra: scirpy
 Requires-Dist: PyYAML (>=5.0)
 Requires-Dist: appdirs
 Requires-Dist: more_itertools
@@ -27,6 +28,7 @@ Requires-Dist: networkx (>=3.0,<4.0)
 Requires-Dist: pandas (>=2.0.1,<3.0.0)
 Requires-Dist: pooch (>=1.7.0,<2.0.0)
 Requires-Dist: rdflib (>=6.2.0,<7.0.0)
+Requires-Dist: scirpy (>=0.22.0,<0.23.0) ; extra == "scirpy"
 Requires-Dist: tqdm (>=4.65.0,<5.0.0)
 Requires-Dist: treelib (==1.6.4)
 Project-URL: Bug Tracker, https://github.com/biocypher/biocypher/issues
@@ -62,7 +64,7 @@ the docs [here](https://biocypher.org).
            margin-left: auto;
            margin-right: auto;
            width: 70%;"
-    src="docs/graphical_abstract.png"
+    src="docs/assets/img/graphical-abstract-biocypher.png"
     alt="Graphical Abstract">
 </img>

{biocypher-0.9.6.dist-info → biocypher-0.10.0.dist-info}/RECORD RENAMED Viewed

@@ -11,24 +11,26 @@ biocypher/_deduplicate.py,sha256=rtglcaLRaVzNjLtaPwTGP8VvCM4PHYQ5CZ-cm32CrKQ,484
 biocypher/_get.py,sha256=_wUjhRjH2J6Qhq0Ndy3kdfaWhHDTT-dxyCvtuH36My4,14868
 biocypher/_logger.py,sha256=y9dh3SPJOCWXnkFSYSK7aj_-pB7zlAkNCf43Dp1lt74,2941
 biocypher/_mapping.py,sha256=ntspG2C_NaQODhWTBFk0CDvolkOCjtqlQ9E-NkJAuTg,9030
-biocypher/_metadata.py,sha256=Jb4Uva2PzrPbxzio7DMQnX0WuIFrl_pgyDW89L1R1oQ,1415
+biocypher/_metadata.py,sha256=m8xeGsUl8MT9Tdlh_KKcPCa8Pf8Tn84yUlsWkJoxi2M,1416
 biocypher/_misc.py,sha256=YzlY7zwa0mim9QFg9HwXErkJFIH3cvLrbgjF8tKOIT8,6353
 biocypher/_ontology.py,sha256=lipZxU3aj6zrTbBrJZmCW6IRCuz-KQG3AfbYCVq6aFE,33133
-biocypher/_translate.py,sha256=9E19eLRL0VnxxDuiNhZ5vu54XyKXnfLuBhCgNcL9yAE,17000
+biocypher/_translate.py,sha256=NKSM9lxNjNNbgQrK_24eWYh3B41TS7kjnSwjySnK3s0,16851
 biocypher/output/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 biocypher/output/connect/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 biocypher/output/connect/_get_connector.py,sha256=Qimv3kTkXYkhJZRT6nq8mwIM2wORCnyqqHqF2IByuuc,1152
 biocypher/output/connect/_neo4j_driver.py,sha256=kXjOXW12wZFfEp7plAuo40bPSvOfd-i9m4YaXoMq-p0,12357
 biocypher/output/in_memory/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-biocypher/output/in_memory/_get_in_memory_kg.py,sha256=29DvmOdRUuzEblyMxy5001R4zjEXW3eHNv0htko7c4Y,1115
-biocypher/output/in_memory/_in_memory_kg.py,sha256=g1TPN8PkeAyXbrRuTAjshqC8voI6EmLqR8S_otmviwU,1423
+biocypher/output/in_memory/_airr.py,sha256=tOYww8eHaH4EOElRKFxwShlVW83tXx4xbv7ZfrInphg,20461
+biocypher/output/in_memory/_get_in_memory_kg.py,sha256=NQ-AT8jgAvb-aTK7zsYrItvlDEow_Mfni5tKEJjwjx0,1256
+biocypher/output/in_memory/_in_memory_kg.py,sha256=BTBtqb2ZC_zxXdOJ59BQKUoGXZSpiaRG6VecjZGWCm0,3526
 biocypher/output/in_memory/_networkx.py,sha256=cSOSAreP7S3oeGT6noZ1kAIvSnkVnU3NUp1OY4yqzn0,1515
-biocypher/output/in_memory/_pandas.py,sha256=Ot2jbK5t_YLHqw0BUv9Z_qWNy9r6IX1LYEyejOSJzos,3288
+biocypher/output/in_memory/_pandas.py,sha256=ndZcAAdsw38qZW3nWehcSxhpBGM8pXsn3DPoCcppI0U,1196
 biocypher/output/write/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 biocypher/output/write/_batch_writer.py,sha256=_Dao7z4KN0Uhr86oOOWYEDrIUikR7T0v1SJC2Btd8Y4,38745
-biocypher/output/write/_get_writer.py,sha256=JozRWCMhvh65aQAlcGiiD5x3Nl1HSW8mK1Zf2nTSOzI,4385
+biocypher/output/write/_get_writer.py,sha256=fXputhpt6K4mF8Ti6LFgwZIMU0GrK3aHkWIj1g4liwI,4469
 biocypher/output/write/_writer.py,sha256=y0dWI-RyQdrBLr9Fs91Y9KcCMjnlCaKJT0eWsIS2hG4,7158
 biocypher/output/write/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+biocypher/output/write/graph/_airr.py,sha256=XWzcqwoMZKZ8n6f2Y77hv1ZSRyg2tOVqlzX133-biG8,1274
 biocypher/output/write/graph/_arangodb.py,sha256=xue3hm_DVB5pMR5qqfGXlXll3RpILA0tXos2J-as1-E,7906
 biocypher/output/write/graph/_neo4j.py,sha256=tBPhxn8JAmSS6KmiePofwr9LpGjHQH9BTnpHVK2ellM,12042
 biocypher/output/write/graph/_networkx.py,sha256=2WYkw5ZM3Bp236iwAxEAp3A1DxHKT4_hEPNMUKvPHp4,2320
@@ -38,7 +40,7 @@ biocypher/output/write/relational/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
 biocypher/output/write/relational/_csv.py,sha256=m0BSQXts88Qu5AEvoIgnwRz54ia38g4VN3PaA3LCYM8,2807
 biocypher/output/write/relational/_postgresql.py,sha256=75iJvv-0ewSsQXhVeoYoGnmYQnKY_B4iItZV7DpEBto,12190
 biocypher/output/write/relational/_sqlite.py,sha256=BuGWOeeNA83lbUvjpkzqcR9_baWLsbfmLXBKe4O1EPE,2105
-biocypher-0.9.6.dist-info/LICENSE,sha256=oejgxuxyjSnyPw3YPloz6-dCBB_nYizJ4jDQnr-xZUU,1082
-biocypher-0.9.6.dist-info/METADATA,sha256=8zvMLLWli78WBCjcBMhkqdwQb2rEqIinHz4aePh8RZw,10600
-biocypher-0.9.6.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
-biocypher-0.9.6.dist-info/RECORD,,
+biocypher-0.10.0.dist-info/LICENSE,sha256=oejgxuxyjSnyPw3YPloz6-dCBB_nYizJ4jDQnr-xZUU,1082
+biocypher-0.10.0.dist-info/METADATA,sha256=TAd1YjHL94Sdp0_aW0ag7Qkbr5cJOCm7zvc6DqvG_ko,10706
+biocypher-0.10.0.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
+biocypher-0.10.0.dist-info/RECORD,,

{biocypher-0.9.6.dist-info → biocypher-0.10.0.dist-info}/LICENSE RENAMED Viewed

File without changes

{biocypher-0.9.6.dist-info → biocypher-0.10.0.dist-info}/WHEEL RENAMED Viewed

File without changes

biocypher 0.9.6__py3-none-any.whl → 0.10.0__py3-none-any.whl

Potentially problematic release.

biocypher 0.9.6py3-none-any.whl → 0.10.0py3-none-any.whl