PyPI - pyg-nightly - Versions diffs - 2.7.0.dev20250825__py3-none-any.whl → 2.7.0.dev20250827__py3-none-any.whl - Mend

pyg-nightly 2.7.0.dev20250825py3-none-any.whl → 2.7.0.dev20250827py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

torch_geometric/sampler/__init__.py CHANGED Viewed

@@ -3,7 +3,7 @@ r"""Graph sampler package."""
 from .base import (BaseSampler, NodeSamplerInput, EdgeSamplerInput,
                    SamplerOutput, HeteroSamplerOutput, NegativeSampling,
                    NumNeighbors)
-from .neighbor_sampler import NeighborSampler
+from .neighbor_sampler import NeighborSampler, BidirectionalNeighborSampler
 from .hgt_sampler import HGTSampler
 __all__ = classes = [
@@ -15,5 +15,6 @@ __all__ = classes = [
     'NumNeighbors',
     'NegativeSampling',
     'NeighborSampler',
+    'BidirectionalNeighborSampler',
     'HGTSampler',
 ]

torch_geometric/sampler/base.py CHANGED Viewed

@@ -3,7 +3,7 @@ import math
 import warnings
 from abc import ABC, abstractmethod
 from collections import defaultdict
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from enum import Enum
 from typing import Any, Dict, List, Literal, Optional, Union
@@ -12,8 +12,10 @@ from torch import Tensor
 from torch_geometric.data import Data, FeatureStore, GraphStore, HeteroData
 from torch_geometric.sampler.utils import (
+    global_to_local_node_idx,
     local_to_global_node_idx,
     to_bidirectional,
+    unique_unsorted,
 )
 from torch_geometric.typing import EdgeType, EdgeTypeStr, NodeType, OptTensor
 from torch_geometric.utils.mixin import CastMixin
@@ -209,6 +211,7 @@ class SamplerOutput(CastMixin):
     # TODO(manan): refine this further; it does not currently define a proper
     # API for the expected output of a sampler.
     metadata: Optional[Any] = None
+    _seed_node: OptTensor = field(repr=False, default=None)
     @property
     def global_row(self) -> Tensor:
@@ -220,8 +223,17 @@ class SamplerOutput(CastMixin):
     @property
     def seed_node(self) -> Tensor:
-        return local_to_global_node_idx(
-            self.node, self.batch) if self.batch is not None else None
+        # can be set manually if the seed nodes are not contained in the
+        # sampled nodes
+        if self._seed_node is None:
+            self._seed_node = local_to_global_node_idx(
+                self.node, self.batch) if self.batch is not None else None
+        return self._seed_node
+    @seed_node.setter
+    def seed_node(self, value: Tensor):
+        assert len(value) == len(self.node)
+        self._seed_node = value
     @property
     def global_orig_row(self) -> Tensor:
@@ -263,6 +275,230 @@ class SamplerOutput(CastMixin):
         return out
+    @classmethod
+    def collate(cls, outputs: List['SamplerOutput'],
+                replace: bool = True) -> 'SamplerOutput':
+        r"""Collate a list of :class:`~torch_geometric.sampler.SamplerOutput`
+        objects into a single :class:`~torch_geometric.sampler.SamplerOutput`
+        object. Requires that they all have the same fields.
+        """
+        if len(outputs) == 0:
+            raise ValueError("Cannot collate an empty list of SamplerOutputs")
+        out = outputs[0]
+        has_edge = out.edge is not None
+        has_orig_row = out.orig_row is not None
+        has_orig_col = out.orig_col is not None
+        has_batch = out.batch is not None
+        has_num_sampled_nodes = out.num_sampled_nodes is not None
+        has_num_sampled_edges = out.num_sampled_edges is not None
+        try:
+            for i, sample_output in enumerate(outputs):  # noqa
+                assert not has_edge == (sample_output.edge is None)
+                assert not has_orig_row == (sample_output.orig_row is None)
+                assert not has_orig_col == (sample_output.orig_col is None)
+                assert not has_batch == (sample_output.batch is None)
+                assert not has_num_sampled_nodes == (
+                    sample_output.num_sampled_nodes is None)
+                assert not has_num_sampled_edges == (
+                    sample_output.num_sampled_edges is None)
+        except AssertionError:
+            error_str = f"Output {i+1} has a different field than the first output"  # noqa
+            raise ValueError(error_str)  # noqa
+        for other in outputs[1:]:
+            out = out.merge_with(other, replace=replace)
+        return out
+    def merge_with(self, other: 'SamplerOutput',
+                   replace: bool = True) -> 'SamplerOutput':
+        """Merges two SamplerOutputs.
+        If replace is True, self's nodes and edges take precedence.
+        """
+        if not replace:
+            return SamplerOutput(
+                node=torch.cat([self.node, other.node], dim=0),
+                row=torch.cat([self.row, len(self.node) + other.row], dim=0),
+                col=torch.cat([self.col, len(self.node) + other.col], dim=0),
+                edge=torch.cat([self.edge, other.edge], dim=0)
+                if self.edge is not None and other.edge is not None else None,
+                batch=torch.cat(
+                    [self.batch, len(self.node) + other.batch], dim=0) if
+                self.batch is not None and other.batch is not None else None,
+                num_sampled_nodes=self.num_sampled_nodes +
+                other.num_sampled_nodes if self.num_sampled_nodes is not None
+                and other.num_sampled_nodes is not None else None,
+                num_sampled_edges=self.num_sampled_edges +
+                other.num_sampled_edges if self.num_sampled_edges is not None
+                and other.num_sampled_edges is not None else None,
+                orig_row=torch.cat(
+                    [self.orig_row,
+                     len(self.node) +
+                     other.orig_row], dim=0) if self.orig_row is not None
+                and other.orig_row is not None else None,
+                orig_col=torch.cat(
+                    [self.orig_col,
+                     len(self.node) +
+                     other.orig_col], dim=0) if self.orig_col is not None
+                and other.orig_col is not None else None,
+                metadata=[self.metadata, other.metadata],
+            )
+        else:
+            # NODES
+            old_nodes, new_nodes = self.node, other.node
+            old_node_uid, new_node_uid = [old_nodes], [new_nodes]
+            # batch tracks disjoint subgraph samplings
+            if self.batch is not None and other.batch is not None:
+                # Transform the batch indices to be global node ids
+                old_batch_nodes = self.seed_node
+                new_batch_nodes = other.seed_node
+                old_node_uid.append(old_batch_nodes)
+                new_node_uid.append(new_batch_nodes)
+            # NOTE: if any new node fields are added,
+            # they need to be merged here
+            old_node_uid = torch.stack(old_node_uid, dim=1)
+            new_node_uid = torch.stack(new_node_uid, dim=1)
+            merged_node_uid = unique_unsorted(
+                torch.cat([old_node_uid, new_node_uid], dim=0))
+            num_old_nodes = old_node_uid.shape[0]
+            # Recompute num sampled nodes for second output,
+            # subtracting out nodes already seen in first output
+            merged_node_num_sampled_nodes = None
+            if (self.num_sampled_nodes is not None
+                    and other.num_sampled_nodes is not None):
+                merged_node_num_sampled_nodes = copy.copy(
+                    self.num_sampled_nodes)
+                curr_index = 0
+                # NOTE: There's an assumption here that no two nodes will be
+                # sampled twice in the same SampleOutput object
+                for minibatch in other.num_sampled_nodes:
+                    size_of_intersect = torch.cat([
+                        old_node_uid,
+                        new_node_uid[curr_index:curr_index + minibatch]
+                    ]).unique(dim=0, sorted=False).shape[0] - num_old_nodes
+                    merged_node_num_sampled_nodes.append(size_of_intersect)
+                    curr_index += minibatch
+            merged_nodes = merged_node_uid[:, 0]
+            merged_batch = None
+            if self.batch is not None and other.batch is not None:
+                # Restore the batch indices to be relative to the nodes field
+                ref_merged_batch_nodes = merged_node_uid[:, 1].unsqueeze(
+                    -1).expand(-1, 2)  # num_nodes x 2
+                merged_batch = global_to_local_node_idx(
+                    merged_node_uid, ref_merged_batch_nodes)
+            # EDGES
+            is_bidirectional = self.orig_row is not None \
+                and self.orig_col is not None \
+                and other.orig_row is not None \
+                and other.orig_col is not None
+            if is_bidirectional:
+                old_row, old_col = self.orig_row, self.orig_col
+                new_row, new_col = other.orig_row, other.orig_col
+            else:
+                old_row, old_col = self.row, self.col
+                new_row, new_col = other.row, other.col
+            # Transform the row and col indices to be global node ids
+            # instead of relative indices to nodes field
+            # Edge uids build off of node uids
+            old_row_idx, old_col_idx = local_to_global_node_idx(
+                old_node_uid,
+                old_row), local_to_global_node_idx(old_node_uid, old_col)
+            new_row_idx, new_col_idx = local_to_global_node_idx(
+                new_node_uid,
+                new_row), local_to_global_node_idx(new_node_uid, new_col)
+            old_edge_uid, new_edge_uid = [old_row_idx, old_col_idx
+                                          ], [new_row_idx, new_col_idx]
+            row_idx = 0
+            col_idx = old_row_idx.shape[1]
+            edge_idx = old_row_idx.shape[1] + old_col_idx.shape[1]
+            if self.edge is not None and other.edge is not None:
+                if is_bidirectional:
+                    # bidirectional duplicates edge ids
+                    old_edge_uid_ref = torch.stack([self.row, self.col],
+                                                   dim=1)  # num_edges x 2
+                    old_orig_edge_uid_ref = torch.stack(
+                        [self.orig_row, self.orig_col],
+                        dim=1)  # num_orig_edges x 2
+                    old_edge_idx = global_to_local_node_idx(
+                        old_edge_uid_ref, old_orig_edge_uid_ref)
+                    old_edge = self.edge[old_edge_idx]
+                    new_edge_uid_ref = torch.stack([other.row, other.col],
+                                                   dim=1)  # num_edges x 2
+                    new_orig_edge_uid_ref = torch.stack(
+                        [other.orig_row, other.orig_col],
+                        dim=1)  # num_orig_edges x 2
+                    new_edge_idx = global_to_local_node_idx(
+                        new_edge_uid_ref, new_orig_edge_uid_ref)
+                    new_edge = other.edge[new_edge_idx]
+                else:
+                    old_edge, new_edge = self.edge, other.edge
+                old_edge_uid.append(old_edge.unsqueeze(-1))
+                new_edge_uid.append(new_edge.unsqueeze(-1))
+            old_edge_uid = torch.cat(old_edge_uid, dim=1)
+            new_edge_uid = torch.cat(new_edge_uid, dim=1)
+            merged_edge_uid = unique_unsorted(
+                torch.cat([old_edge_uid, new_edge_uid], dim=0))
+            num_old_edges = old_edge_uid.shape[0]
+            merged_edge_num_sampled_edges = None
+            if (self.num_sampled_edges is not None
+                    and other.num_sampled_edges is not None):
+                merged_edge_num_sampled_edges = copy.copy(
+                    self.num_sampled_edges)
+                curr_index = 0
+                # NOTE: There's an assumption here that no two edges will be
+                # sampled twice in the same SampleOutput object
+                for minibatch in other.num_sampled_edges:
+                    size_of_intersect = torch.cat([
+                        old_edge_uid,
+                        new_edge_uid[curr_index:curr_index + minibatch]
+                    ]).unique(dim=0, sorted=False).shape[0] - num_old_edges
+                    merged_edge_num_sampled_edges.append(size_of_intersect)
+                    curr_index += minibatch
+            merged_row = merged_edge_uid[:, row_idx:col_idx]
+            merged_col = merged_edge_uid[:, col_idx:edge_idx]
+            merged_edge = merged_edge_uid[:, edge_idx:].squeeze() \
+                if self.edge is not None and other.edge is not None else None
+            # restore to row and col indices relative to nodes field
+            merged_row = global_to_local_node_idx(merged_node_uid, merged_row)
+            merged_col = global_to_local_node_idx(merged_node_uid, merged_col)
+            out = SamplerOutput(
+                node=merged_nodes,
+                row=merged_row,
+                col=merged_col,
+                edge=merged_edge,
+                batch=merged_batch,
+                num_sampled_nodes=merged_node_num_sampled_nodes,
+                num_sampled_edges=merged_edge_num_sampled_edges,
+                metadata=[self.metadata, other.metadata],
+            )
+            # Restores orig_row and orig_col if they existed before merging
+            if is_bidirectional:
+                out = out.to_bidirectional(keep_orig_edges=True)
+            return out
 @dataclass
 class HeteroSamplerOutput(CastMixin):
@@ -439,6 +675,25 @@ class HeteroSamplerOutput(CastMixin):
         return out
+    @classmethod
+    def collate(cls, outputs: List['HeteroSamplerOutput'],
+                replace: bool = True) -> 'HeteroSamplerOutput':
+        r"""Collate a list of
+        :class:`~torch_geometric.sampler.HeteroSamplerOutput`objects into a
+        single :class:`~torch_geometric.sampler.HeteroSamplerOutput` object.
+        Requires that they all have the same fields.
+        """
+        # TODO(zaristei)
+        raise NotImplementedError
+    def merge_with(self, other: 'HeteroSamplerOutput',
+                   replace: bool = True) -> 'HeteroSamplerOutput':
+        """Merges two HeteroSamplerOutputs.
+        If replace is True, self's nodes and edges take precedence.
+        """
+        # TODO(zaristei)
+        raise NotImplementedError
 @dataclass(frozen=True)
 class NumNeighbors:

pyg-nightly 2.7.0.dev20250825__py3-none-any.whl → 2.7.0.dev20250827__py3-none-any.whl

pyg-nightly 2.7.0.dev20250825py3-none-any.whl → 2.7.0.dev20250827py3-none-any.whl