PyPI - pyg-nightly - Versions diffs - 2.6.0.dev20240704__py3-none-any.whl → 2.8.0.dev20251207__py3-none-any.whl - Mend

pyg-nightly 2.6.0.dev20240704py3-none-any.whl → 2.8.0.dev20251207py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pyg-nightly might be problematic. Click here for more details.

Files changed (268) hide show

{pyg_nightly-2.6.0.dev20240704.dist-info → pyg_nightly-2.8.0.dev20251207.dist-info}/METADATA +81 -58
{pyg_nightly-2.6.0.dev20240704.dist-info → pyg_nightly-2.8.0.dev20251207.dist-info}/RECORD +265 -221
{pyg_nightly-2.6.0.dev20240704.dist-info → pyg_nightly-2.8.0.dev20251207.dist-info}/WHEEL +1 -1
pyg_nightly-2.8.0.dev20251207.dist-info/licenses/LICENSE +19 -0
torch_geometric/__init__.py +34 -1
torch_geometric/_compile.py +11 -3
torch_geometric/_onnx.py +228 -0
torch_geometric/config_mixin.py +8 -3
torch_geometric/config_store.py +1 -1
torch_geometric/contrib/__init__.py +1 -1
torch_geometric/contrib/explain/pgm_explainer.py +1 -1
torch_geometric/data/__init__.py +19 -1
torch_geometric/data/batch.py +2 -2
torch_geometric/data/collate.py +1 -3
torch_geometric/data/data.py +110 -6
torch_geometric/data/database.py +19 -5
torch_geometric/data/dataset.py +14 -9
torch_geometric/data/extract.py +1 -1
torch_geometric/data/feature_store.py +17 -22
torch_geometric/data/graph_store.py +3 -2
torch_geometric/data/hetero_data.py +139 -7
torch_geometric/data/hypergraph_data.py +2 -2
torch_geometric/data/in_memory_dataset.py +2 -2
torch_geometric/data/lightning/datamodule.py +42 -28
torch_geometric/data/storage.py +9 -1
torch_geometric/datasets/__init__.py +20 -1
torch_geometric/datasets/actor.py +7 -9
torch_geometric/datasets/airfrans.py +17 -20
torch_geometric/datasets/airports.py +8 -10
torch_geometric/datasets/amazon.py +8 -11
torch_geometric/datasets/amazon_book.py +8 -9
torch_geometric/datasets/amazon_products.py +7 -9
torch_geometric/datasets/aminer.py +8 -9
torch_geometric/datasets/aqsol.py +10 -13
torch_geometric/datasets/attributed_graph_dataset.py +8 -10
torch_geometric/datasets/ba_multi_shapes.py +10 -12
torch_geometric/datasets/ba_shapes.py +5 -6
torch_geometric/datasets/brca_tgca.py +1 -1
torch_geometric/datasets/city.py +157 -0
torch_geometric/datasets/dbp15k.py +1 -1
torch_geometric/datasets/gdelt_lite.py +3 -2
torch_geometric/datasets/ged_dataset.py +3 -2
torch_geometric/datasets/git_mol_dataset.py +263 -0
torch_geometric/datasets/gnn_benchmark_dataset.py +3 -2
torch_geometric/datasets/hgb_dataset.py +2 -2
torch_geometric/datasets/hm.py +1 -1
torch_geometric/datasets/instruct_mol_dataset.py +134 -0
torch_geometric/datasets/linkx_dataset.py +4 -3
torch_geometric/datasets/lrgb.py +3 -5
torch_geometric/datasets/malnet_tiny.py +2 -1
torch_geometric/datasets/md17.py +3 -3
torch_geometric/datasets/medshapenet.py +145 -0
torch_geometric/datasets/mnist_superpixels.py +2 -3
torch_geometric/datasets/modelnet.py +1 -1
torch_geometric/datasets/molecule_gpt_dataset.py +492 -0
torch_geometric/datasets/molecule_net.py +3 -2
torch_geometric/datasets/neurograph.py +1 -3
torch_geometric/datasets/ogb_mag.py +1 -1
torch_geometric/datasets/opf.py +19 -5
torch_geometric/datasets/pascal_pf.py +1 -1
torch_geometric/datasets/pcqm4m.py +2 -1
torch_geometric/datasets/ppi.py +2 -1
torch_geometric/datasets/protein_mpnn_dataset.py +451 -0
torch_geometric/datasets/qm7.py +1 -1
torch_geometric/datasets/qm9.py +3 -2
torch_geometric/datasets/shrec2016.py +2 -2
torch_geometric/datasets/snap_dataset.py +8 -4
torch_geometric/datasets/tag_dataset.py +462 -0
torch_geometric/datasets/teeth3ds.py +269 -0
torch_geometric/datasets/web_qsp_dataset.py +342 -0
torch_geometric/datasets/wikics.py +2 -1
torch_geometric/datasets/wikidata.py +2 -1
torch_geometric/deprecation.py +1 -1
torch_geometric/distributed/__init__.py +13 -0
torch_geometric/distributed/dist_loader.py +2 -2
torch_geometric/distributed/local_feature_store.py +3 -2
torch_geometric/distributed/local_graph_store.py +2 -1
torch_geometric/distributed/partition.py +9 -8
torch_geometric/distributed/rpc.py +3 -3
torch_geometric/edge_index.py +35 -22
torch_geometric/explain/algorithm/attention_explainer.py +219 -29
torch_geometric/explain/algorithm/base.py +2 -2
torch_geometric/explain/algorithm/captum.py +1 -1
torch_geometric/explain/algorithm/captum_explainer.py +2 -1
torch_geometric/explain/algorithm/gnn_explainer.py +406 -69
torch_geometric/explain/algorithm/graphmask_explainer.py +8 -8
torch_geometric/explain/algorithm/pg_explainer.py +305 -47
torch_geometric/explain/explainer.py +2 -2
torch_geometric/explain/explanation.py +89 -5
torch_geometric/explain/metric/faithfulness.py +1 -1
torch_geometric/graphgym/checkpoint.py +2 -1
torch_geometric/graphgym/config.py +3 -2
torch_geometric/graphgym/imports.py +15 -4
torch_geometric/graphgym/logger.py +1 -1
torch_geometric/graphgym/loss.py +1 -1
torch_geometric/graphgym/models/encoder.py +2 -2
torch_geometric/graphgym/models/layer.py +1 -1
torch_geometric/graphgym/utils/comp_budget.py +4 -3
torch_geometric/hash_tensor.py +798 -0
torch_geometric/index.py +16 -7
torch_geometric/inspector.py +6 -2
torch_geometric/io/fs.py +27 -0
torch_geometric/io/tu.py +2 -3
torch_geometric/llm/__init__.py +9 -0
torch_geometric/llm/large_graph_indexer.py +741 -0
torch_geometric/llm/models/__init__.py +23 -0
torch_geometric/llm/models/g_retriever.py +251 -0
torch_geometric/llm/models/git_mol.py +336 -0
torch_geometric/llm/models/glem.py +397 -0
torch_geometric/llm/models/llm.py +470 -0
torch_geometric/llm/models/llm_judge.py +158 -0
torch_geometric/llm/models/molecule_gpt.py +222 -0
torch_geometric/llm/models/protein_mpnn.py +333 -0
torch_geometric/llm/models/sentence_transformer.py +188 -0
torch_geometric/llm/models/txt2kg.py +353 -0
torch_geometric/llm/models/vision_transformer.py +38 -0
torch_geometric/llm/rag_loader.py +154 -0
torch_geometric/llm/utils/__init__.py +10 -0
torch_geometric/llm/utils/backend_utils.py +443 -0
torch_geometric/llm/utils/feature_store.py +169 -0
torch_geometric/llm/utils/graph_store.py +199 -0
torch_geometric/llm/utils/vectorrag.py +125 -0
torch_geometric/loader/cluster.py +6 -5
torch_geometric/loader/graph_saint.py +2 -1
torch_geometric/loader/ibmb_loader.py +4 -4
torch_geometric/loader/link_loader.py +1 -1
torch_geometric/loader/link_neighbor_loader.py +2 -1
torch_geometric/loader/mixin.py +6 -5
torch_geometric/loader/neighbor_loader.py +1 -1
torch_geometric/loader/neighbor_sampler.py +2 -2
torch_geometric/loader/prefetch.py +4 -3
torch_geometric/loader/temporal_dataloader.py +2 -2
torch_geometric/loader/utils.py +10 -10
torch_geometric/metrics/__init__.py +23 -2
torch_geometric/metrics/link_pred.py +755 -85
torch_geometric/nn/__init__.py +1 -0
torch_geometric/nn/aggr/__init__.py +2 -0
torch_geometric/nn/aggr/base.py +1 -1
torch_geometric/nn/aggr/equilibrium.py +1 -1
torch_geometric/nn/aggr/fused.py +1 -1
torch_geometric/nn/aggr/patch_transformer.py +149 -0
torch_geometric/nn/aggr/set_transformer.py +1 -1
torch_geometric/nn/aggr/utils.py +9 -4
torch_geometric/nn/attention/__init__.py +9 -1
torch_geometric/nn/attention/polynormer.py +107 -0
torch_geometric/nn/attention/qformer.py +71 -0
torch_geometric/nn/attention/sgformer.py +99 -0
torch_geometric/nn/conv/__init__.py +2 -0
torch_geometric/nn/conv/appnp.py +1 -1
torch_geometric/nn/conv/collect.jinja +6 -3
torch_geometric/nn/conv/cugraph/gat_conv.py +8 -2
torch_geometric/nn/conv/cugraph/rgcn_conv.py +3 -0
torch_geometric/nn/conv/cugraph/sage_conv.py +3 -0
torch_geometric/nn/conv/dna_conv.py +1 -1
torch_geometric/nn/conv/eg_conv.py +7 -7
torch_geometric/nn/conv/gat_conv.py +33 -4
torch_geometric/nn/conv/gatv2_conv.py +35 -4
torch_geometric/nn/conv/gen_conv.py +1 -1
torch_geometric/nn/conv/general_conv.py +1 -1
torch_geometric/nn/conv/gravnet_conv.py +2 -1
torch_geometric/nn/conv/hetero_conv.py +3 -2
torch_geometric/nn/conv/meshcnn_conv.py +487 -0
torch_geometric/nn/conv/message_passing.py +6 -5
torch_geometric/nn/conv/mixhop_conv.py +1 -1
torch_geometric/nn/conv/rgcn_conv.py +2 -1
torch_geometric/nn/conv/sg_conv.py +1 -1
torch_geometric/nn/conv/spline_conv.py +2 -1
torch_geometric/nn/conv/ssg_conv.py +1 -1
torch_geometric/nn/conv/transformer_conv.py +5 -3
torch_geometric/nn/data_parallel.py +5 -4
torch_geometric/nn/dense/linear.py +5 -24
torch_geometric/nn/encoding.py +17 -3
torch_geometric/nn/fx.py +17 -15
torch_geometric/nn/model_hub.py +5 -16
torch_geometric/nn/models/__init__.py +11 -0
torch_geometric/nn/models/attentive_fp.py +1 -1
torch_geometric/nn/models/attract_repel.py +148 -0
torch_geometric/nn/models/basic_gnn.py +2 -1
torch_geometric/nn/models/captum.py +1 -1
torch_geometric/nn/models/deep_graph_infomax.py +1 -1
torch_geometric/nn/models/dimenet.py +2 -2
torch_geometric/nn/models/dimenet_utils.py +4 -2
torch_geometric/nn/models/gpse.py +1083 -0
torch_geometric/nn/models/graph_unet.py +13 -4
torch_geometric/nn/models/lpformer.py +783 -0
torch_geometric/nn/models/metapath2vec.py +1 -1
torch_geometric/nn/models/mlp.py +4 -2
torch_geometric/nn/models/node2vec.py +1 -1
torch_geometric/nn/models/polynormer.py +206 -0
torch_geometric/nn/models/rev_gnn.py +3 -3
torch_geometric/nn/models/schnet.py +2 -1
torch_geometric/nn/models/sgformer.py +219 -0
torch_geometric/nn/models/signed_gcn.py +1 -1
torch_geometric/nn/models/visnet.py +2 -2
torch_geometric/nn/norm/batch_norm.py +17 -7
torch_geometric/nn/norm/diff_group_norm.py +7 -2
torch_geometric/nn/norm/graph_norm.py +9 -4
torch_geometric/nn/norm/instance_norm.py +5 -1
torch_geometric/nn/norm/layer_norm.py +15 -7
torch_geometric/nn/norm/msg_norm.py +8 -2
torch_geometric/nn/pool/__init__.py +15 -9
torch_geometric/nn/pool/cluster_pool.py +144 -0
torch_geometric/nn/pool/connect/base.py +1 -3
torch_geometric/nn/pool/edge_pool.py +1 -1
torch_geometric/nn/pool/knn.py +13 -10
torch_geometric/nn/pool/select/base.py +1 -4
torch_geometric/nn/summary.py +1 -1
torch_geometric/nn/to_hetero_module.py +4 -3
torch_geometric/nn/to_hetero_transformer.py +3 -3
torch_geometric/nn/to_hetero_with_bases_transformer.py +5 -5
torch_geometric/profile/__init__.py +2 -0
torch_geometric/profile/nvtx.py +66 -0
torch_geometric/profile/profiler.py +18 -9
torch_geometric/profile/utils.py +20 -5
torch_geometric/sampler/__init__.py +2 -1
torch_geometric/sampler/base.py +337 -8
torch_geometric/sampler/hgt_sampler.py +11 -1
torch_geometric/sampler/neighbor_sampler.py +298 -25
torch_geometric/sampler/utils.py +93 -5
torch_geometric/testing/__init__.py +4 -0
torch_geometric/testing/decorators.py +35 -5
torch_geometric/testing/distributed.py +1 -1
torch_geometric/transforms/__init__.py +4 -0
torch_geometric/transforms/add_gpse.py +49 -0
torch_geometric/transforms/add_metapaths.py +10 -8
torch_geometric/transforms/add_positional_encoding.py +2 -2
torch_geometric/transforms/base_transform.py +2 -1
torch_geometric/transforms/delaunay.py +65 -15
torch_geometric/transforms/face_to_edge.py +32 -3
torch_geometric/transforms/gdc.py +8 -9
torch_geometric/transforms/largest_connected_components.py +1 -1
torch_geometric/transforms/mask.py +5 -1
torch_geometric/transforms/node_property_split.py +1 -1
torch_geometric/transforms/normalize_features.py +3 -3
torch_geometric/transforms/pad.py +1 -1
torch_geometric/transforms/random_link_split.py +1 -1
torch_geometric/transforms/remove_duplicated_edges.py +4 -2
torch_geometric/transforms/remove_self_loops.py +36 -0
torch_geometric/transforms/rooted_subgraph.py +1 -1
torch_geometric/transforms/svd_feature_reduction.py +1 -1
torch_geometric/transforms/virtual_node.py +2 -1
torch_geometric/typing.py +82 -17
torch_geometric/utils/__init__.py +6 -1
torch_geometric/utils/_lexsort.py +0 -9
torch_geometric/utils/_negative_sampling.py +28 -13
torch_geometric/utils/_normalize_edge_index.py +46 -0
torch_geometric/utils/_scatter.py +126 -164
torch_geometric/utils/_sort_edge_index.py +0 -2
torch_geometric/utils/_spmm.py +16 -14
torch_geometric/utils/_subgraph.py +4 -0
torch_geometric/utils/_tree_decomposition.py +1 -1
torch_geometric/utils/_trim_to_layer.py +2 -2
torch_geometric/utils/augmentation.py +1 -1
torch_geometric/utils/convert.py +17 -10
torch_geometric/utils/cross_entropy.py +34 -13
torch_geometric/utils/embedding.py +91 -2
torch_geometric/utils/geodesic.py +28 -25
torch_geometric/utils/influence.py +279 -0
torch_geometric/utils/map.py +14 -10
torch_geometric/utils/nested.py +1 -1
torch_geometric/utils/smiles.py +3 -3
torch_geometric/utils/sparse.py +32 -24
torch_geometric/visualization/__init__.py +2 -1
torch_geometric/visualization/graph.py +250 -5
torch_geometric/warnings.py +11 -2
torch_geometric/nn/nlp/__init__.py +0 -7
torch_geometric/nn/nlp/llm.py +0 -283
torch_geometric/nn/nlp/sentence_transformer.py +0 -94

torch_geometric/llm/utils/feature_store.py ADDED Viewed

@@ -0,0 +1,169 @@
+import gc
+from collections.abc import Iterable, Iterator
+from typing import Any, Dict, List, Tuple, Union
+import torch
+from torch import Tensor
+from torch_geometric.data import Data, HeteroData
+from torch_geometric.distributed import LocalFeatureStore
+from torch_geometric.llm.utils.backend_utils import batch_knn
+from torch_geometric.sampler import HeteroSamplerOutput, SamplerOutput
+from torch_geometric.typing import InputNodes
+# NOTE: Only compatible with Homogeneous graphs for now
+class KNNRAGFeatureStore(LocalFeatureStore):
+    """A feature store that uses a KNN-based retrieval."""
+    def __init__(self) -> None:
+        """Initializes the feature store."""
+        # to be set by the config
+        self.encoder_model = None
+        self.k_nodes = None
+        self._config: Dict[str, Any] = {}
+        super().__init__()
+    @property
+    def config(self) -> Dict[str, Any]:
+        """Get the config for the feature store."""
+        return self._config
+    def _set_from_config(self, config: Dict[str, Any], attr_name: str) -> None:
+        """Set an attribute from the config.
+        Args:
+            config (Dict[str, Any]): Config dictionary
+            attr_name (str): Name of attribute to set
+        Raises:
+            ValueError: If required attribute not found in config
+        """
+        if attr_name not in config:
+            raise ValueError(
+                f"Required config parameter '{attr_name}' not found")
+        setattr(self, attr_name, config[attr_name])
+    @config.setter  # type: ignore
+    def config(self, config: Dict[str, Any]) -> None:
+        """Set the config for the feature store.
+        Args:
+            config (Dict[str, Any]):
+                Config dictionary containing required parameters
+        Raises:
+            ValueError: If required parameters missing from config
+        """
+        self._set_from_config(config, "k_nodes")
+        self._set_from_config(config, "encoder_model")
+        assert self.encoder_model is not None, \
+            "Need to define encoder model from config"
+        self.encoder_model.eval()
+        self._config = config
+    @property
+    def x(self) -> Tensor:
+        """Returns the node features."""
+        return Tensor(self.get_tensor(group_name=None, attr_name='x'))
+    @property
+    def edge_attr(self) -> Tensor:
+        """Returns the edge attributes."""
+        return Tensor(
+            self.get_tensor(group_name=(None, None), attr_name='edge_attr'))
+    def retrieve_seed_nodes(  # noqa: D417
+            self, query: Union[str, List[str],
+                               Tuple[str]]) -> Tuple[InputNodes, Tensor]:
+        """Retrieves the k_nodes most similar nodes to the given query.
+        Args:
+            query (Union[str, List[str], Tuple[str]]): The query
+                or list of queries to search for.
+        Returns:
+            The indices of the most similar nodes and the encoded query
+        """
+        if not isinstance(query, (list, tuple)):
+            query = [query]
+        assert self.k_nodes is not None, "please set k_nodes via config"
+        if len(query) == 1:
+            result, query_enc = next(
+                self._retrieve_seed_nodes_batch(query, self.k_nodes))
+            gc.collect()
+            torch.cuda.empty_cache()
+            return result, query_enc
+        else:
+            out_dict = {}
+            for i, out in enumerate(
+                    self._retrieve_seed_nodes_batch(query, self.k_nodes)):
+                out_dict[query[i]] = out
+            gc.collect()
+            torch.cuda.empty_cache()
+            return out_dict
+    def _retrieve_seed_nodes_batch(  # noqa: D417
+            self, query: Iterable[Any],
+            k_nodes: int) -> Iterator[Tuple[InputNodes, Tensor]]:
+        """Retrieves the k_nodes most similar nodes to each query in the batch.
+        Args:
+        - query (Iterable[Any]: The batch of queries to search for.
+        - k_nodes (int): The number of nodes to retrieve.
+        Yields:
+        - The indices of the most similar nodes for each query.
+        """
+        if isinstance(self.meta, dict) and self.meta.get("is_hetero", False):
+            raise NotImplementedError
+        assert self.encoder_model is not None, \
+            "Need to define encoder model from config"
+        query_enc = self.encoder_model.encode(query)
+        return batch_knn(query_enc, self.x, k_nodes)
+    def load_subgraph(  # noqa
+        self,
+        sample: Union[SamplerOutput, HeteroSamplerOutput],
+        induced: bool = True,
+    ) -> Union[Data, HeteroData]:
+        """Loads a subgraph from the given sample.
+        Args:
+            sample: The sample to load the subgraph from.
+            induced: Whether to return the induced subgraph.
+                Resets node and edge ids.
+        Returns:
+            The loaded subgraph.
+        """
+        if isinstance(sample, HeteroSamplerOutput):
+            raise NotImplementedError
+        """
+        NOTE: torch_geometric.loader.utils.filter_custom_store
+        can be used here if it supported edge features.
+        """
+        edge_id = sample.edge
+        x = self.x[sample.node]
+        edge_attr = self.edge_attr[edge_id]
+        edge_idx = torch.stack(
+            [sample.row, sample.col], dim=0) if induced else torch.stack(
+                [sample.global_row, sample.global_col], dim=0)
+        result = Data(x=x, edge_attr=edge_attr, edge_index=edge_idx)
+        # useful for tracking what subset of the graph was sampled
+        result.node_idx = sample.node
+        result.edge_idx = edge_id
+        return result
+"""
+TODO: make class CuVSKNNRAGFeatureStore(KNNRAGFeatureStore)
+include a approximate knn flag for the CuVS.
+Connect this with a CuGraphGraphStore
+for enabling a accelerated boolean flag for RAGQueryLoader.
+On by default if CuGraph+CuVS avail.
+If not raise note mentioning its speedup.
+"""

torch_geometric/llm/utils/graph_store.py ADDED Viewed

@@ -0,0 +1,199 @@
+from typing import Any, Dict, Optional, Tuple, Union
+import torch
+from torch import Tensor
+from torch_geometric.data import FeatureStore
+from torch_geometric.distributed import LocalGraphStore
+from torch_geometric.sampler import (
+    BidirectionalNeighborSampler,
+    NodeSamplerInput,
+    SamplerOutput,
+)
+from torch_geometric.utils import index_sort
+# A representation of an edge index, following the possible formats:
+#    * default: Tensor, size = [2, num_edges]
+#    *     Tensor[0, :] == row, Tensor[1, :] == col
+#    * COO: (row, col)
+#    * CSC: (row, colptr)
+#    * CSR: (rowptr, col)
+_EdgeTensorType = Union[Tensor, Tuple[Tensor, Tensor]]
+class NeighborSamplingRAGGraphStore(LocalGraphStore):
+    """Neighbor sampling based graph-store to store & retrieve graph data."""
+    def __init__(  # type: ignore[no-untyped-def]
+        self,
+        feature_store: Optional[FeatureStore] = None,
+        **kwargs,
+    ):
+        """Initializes the graph store.
+        Optional feature store and neighbor sampling settings.
+        Args:
+        feature_store (optional): The feature store to use.
+            None if not yet registered.
+        **kwargs (optional):
+            Additional keyword arguments for neighbor sampling.
+        """
+        self.feature_store = feature_store
+        self.sample_kwargs = kwargs
+        self._sampler_is_initialized = False
+        self._config: Dict[str, Any] = {}
+        # to be set by the config
+        self.num_neighbors = None
+        super().__init__()
+    @property
+    def config(self) -> Dict[str, Any]:
+        """Get the config for the feature store."""
+        return self._config
+    def _set_from_config(self, config: Dict[str, Any], attr_name: str) -> None:
+        """Set an attribute from the config.
+        Args:
+            config (Dict[str, Any]): Config dictionary
+            attr_name (str): Name of attribute to set
+        Raises:
+            ValueError: If required attribute not found in config
+        """
+        if attr_name not in config:
+            raise ValueError(
+                f"Required config parameter '{attr_name}' not found")
+        setattr(self, attr_name, config[attr_name])
+    @config.setter  # type: ignore
+    def config(self, config: Dict[str, Any]) -> None:
+        """Set the config for the feature store.
+        Args:
+            config (Dict[str, Any]):
+                Config dictionary containing required parameters
+        Raises:
+            ValueError: If required parameters missing from config
+        """
+        self._set_from_config(config, "num_neighbors")
+        if hasattr(self, 'sampler'):
+            self.sampler.num_neighbors = (  # type: ignore[has-type]
+                self.num_neighbors)
+        self._config = config
+    def _init_sampler(self) -> None:
+        """Initializes neighbor sampler with the registered feature store."""
+        if self.feature_store is None:
+            raise AttributeError("Feature store not registered yet.")
+        assert self.num_neighbors is not None, \
+            "Please set num_neighbors through config"
+        self.sampler = BidirectionalNeighborSampler(
+            data=(self.feature_store, self), num_neighbors=self.num_neighbors,
+            **self.sample_kwargs)
+        self._sampler_is_initialized = True
+    def register_feature_store(self, feature_store: FeatureStore) -> None:
+        """Registers a feature store with the graph store.
+        :param feature_store: The feature store to register.
+        """
+        self.feature_store = feature_store
+        self._sampler_is_initialized = False
+    def put_edge_id(  # type: ignore[no-untyped-def]
+            self, edge_id: Tensor, *args, **kwargs) -> bool:
+        """Stores an edge ID in the graph store.
+        :param edge_id: The edge ID to store.
+        :return: Whether the operation was successful.
+        """
+        ret = super().put_edge_id(edge_id.contiguous(), *args, **kwargs)
+        self._sampler_is_initialized = False
+        return ret
+    @property
+    def edge_index(self) -> _EdgeTensorType:
+        """Gets the edge index of the graph.
+        :return: The edge index as a tensor.
+        """
+        return self.get_edge_index(*self.edge_idx_args, **self.edge_idx_kwargs)
+    def put_edge_index(  # type: ignore[no-untyped-def]
+            self, edge_index: _EdgeTensorType, *args, **kwargs) -> bool:
+        """Stores an edge index in the graph store.
+        :param edge_index: The edge index to store.
+        :return: Whether the operation was successful.
+        """
+        ret = super().put_edge_index(edge_index, *args, **kwargs)
+        # HACK
+        self.edge_idx_args = args
+        self.edge_idx_kwargs = kwargs
+        self._sampler_is_initialized = False
+        return ret
+    # HACKY
+    @edge_index.setter  # type: ignore
+    def edge_index(self, edge_index: _EdgeTensorType) -> None:
+        """Sets the edge index of the graph.
+        :param edge_index: The edge index to set.
+        """
+        # correct since we make node list from triples
+        if isinstance(edge_index, Tensor):
+            num_nodes = int(edge_index.max()) + 1
+        else:
+            assert isinstance(edge_index, tuple) \
+                and isinstance(edge_index[0], Tensor) \
+                and isinstance(edge_index[1], Tensor), \
+                "edge_index must be a Tensor of [2, num_edges] \
+                or a tuple of Tensors, (row, col)."
+            num_nodes = int(edge_index[0].max()) + 1
+        attr = dict(
+            edge_type=None,
+            layout='coo',
+            size=(num_nodes, num_nodes),
+            is_sorted=False,
+        )
+        # edge index needs to be sorted here and the perm saved for later
+        col_sorted, self.perm = index_sort(edge_index[1], num_nodes,
+                                           stable=True)
+        row_sorted = edge_index[0][self.perm]
+        edge_index_sorted = torch.stack([row_sorted, col_sorted], dim=0)
+        self.put_edge_index(edge_index_sorted, **attr)
+    def sample_subgraph(
+        self,
+        seed_nodes: Tensor,
+    ) -> SamplerOutput:
+        """Sample the graph starting from the given nodes using the
+        in-built NeighborSampler.
+        Args:
+            seed_nodes (InputNodes): Seed nodes to start sampling from.
+            num_neighbors (Optional[NumNeighborsType], optional): Parameters
+                to determine how many hops and number of neighbors per hop.
+                Defaults to None.
+        Returns:
+            Union[SamplerOutput, HeteroSamplerOutput]: NeighborSamplerOutput
+                for the input.
+        """
+        # TODO add support for Hetero
+        if not self._sampler_is_initialized:
+            self._init_sampler()
+        seed_nodes = seed_nodes.unique().contiguous()
+        node_sample_input = NodeSamplerInput(input_id=None, node=seed_nodes)
+        out = self.sampler.sample_from_nodes(  # type: ignore[has-type]
+            node_sample_input)
+        # edge ids need to be remapped to the original indices
+        out.edge = self.perm[out.edge]
+        return out

torch_geometric/llm/utils/vectorrag.py ADDED Viewed

@@ -0,0 +1,125 @@
+# mypy: ignore-errors
+import os
+from abc import abstractmethod
+from typing import Any, Callable, Dict, List, Optional, Protocol, Union
+import torch
+from torch import Tensor
+from torch_geometric.data import Data
+from torch_geometric.llm.models import SentenceTransformer
+from torch_geometric.llm.utils.backend_utils import batch_knn
+class VectorRetriever(Protocol):
+    """Protocol for VectorRAG."""
+    @abstractmethod
+    def query(self, query: Any, **kwargs: Optional[Dict[str, Any]]) -> Data:
+        """Retrieve a context for a given query."""
+        ...
+class DocumentRetriever(VectorRetriever):
+    """Retrieve documents from a vector database."""
+    def __init__(self, raw_docs: List[str],
+                 embedded_docs: Optional[Tensor] = None, k_for_docs: int = 2,
+                 model: Optional[Union[SentenceTransformer, torch.nn.Module,
+                                       Callable]] = None,
+                 model_kwargs: Optional[Dict[str, Any]] = None):
+        """Retrieve documents from a vector database.
+        Args:
+            raw_docs: List[str]: List of raw documents.
+            embedded_docs: Optional[Tensor]: Embedded documents.
+            k_for_docs: int: Number of documents to retrieve.
+            model: Optional[Union[SentenceTransformer, torch.nn.Module]]:
+                Model to use for encoding.
+            model_kwargs: Optional[Dict[str, Any]]:
+                Keyword arguments to pass to the model.
+        """
+        self.raw_docs = raw_docs
+        self.embedded_docs = embedded_docs
+        self.k_for_docs = k_for_docs
+        self.model = model
+        if self.model is not None:
+            self.encoder = self.model
+            self.model_kwargs = model_kwargs
+        if self.embedded_docs is None:
+            assert self.model is not None, \
+                "Model must be provided if embedded_docs is not provided"
+            self.model_kwargs = model_kwargs or {}
+            self.embedded_docs = self.encoder(self.raw_docs,
+                                              **self.model_kwargs)
+            # we don't want to print the verbose output in `query`
+            self.model_kwargs.pop("verbose", None)
+    def query(self, query: Union[str, Tensor]) -> List[str]:
+        """Retrieve documents from the vector database.
+        Args:
+            query: Union[str, Tensor]: Query to retrieve documents for.
+        Returns:
+            List[str]: Documents retrieved from the vector database.
+        """
+        if isinstance(query, str):
+            with torch.no_grad():
+                query_enc = self.encoder(query, **self.model_kwargs)
+        else:
+            query_enc = query
+        selected_doc_idxs, _ = next(
+            batch_knn(query_enc, self.embedded_docs, self.k_for_docs))
+        return [self.raw_docs[i] for i in selected_doc_idxs]
+    def save(self, path: str) -> None:
+        """Save the DocumentRetriever instance to disk.
+        Args:
+            path: str: Path where to save the retriever.
+        """
+        os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
+        # Prepare data to save
+        save_dict = {
+            'raw_docs': self.raw_docs,
+            'embedded_docs': self.embedded_docs,
+            'k_for_docs': self.k_for_docs,
+        }
+        # We do not serialize the model
+        torch.save(save_dict, path)
+    @classmethod
+    def load(cls, path: str, model: Union[SentenceTransformer, torch.nn.Module,
+                                          Callable],
+             model_kwargs: Optional[Dict[str, Any]] = None) -> VectorRetriever:
+        """Load a DocumentRetriever instance from disk.
+        Args:
+            path: str: Path to the saved retriever.
+            model: Union[SentenceTransformer, torch.nn.Module, Callable]:
+                Model to use for encoding.
+                If None, the saved model will be used if available.
+            model_kwargs: Optional[Dict[str, Any]]
+                Key word args to be passed to model
+        Returns:
+            DocumentRetriever: The loaded retriever.
+        """
+        if not os.path.exists(path):
+            raise FileNotFoundError(
+                f"No saved document retriever found at {path}")
+        save_dict = torch.load(path, weights_only=False)
+        if save_dict['embedded_docs'] is not None \
+                and isinstance(save_dict['embedded_docs'], Tensor)\
+                and model_kwargs is not None:
+            model_kwargs.pop("verbose", None)
+        # Create a new DocumentRetriever with the loaded data
+        return cls(raw_docs=save_dict['raw_docs'],
+                   embedded_docs=save_dict['embedded_docs'],
+                   k_for_docs=save_dict['k_for_docs'], model=model,
+                   model_kwargs=model_kwargs)

torch_geometric/loader/cluster.py CHANGED Viewed

@@ -12,6 +12,7 @@ from torch import Tensor
 import torch_geometric.typing
 from torch_geometric.data import Data
 from torch_geometric.index import index2ptr, ptr2index
+from torch_geometric.io import fs
 from torch_geometric.typing import pyg_lib
 from torch_geometric.utils import index_sort, narrow, select, sort_edge_index
 from torch_geometric.utils.map import map_index
@@ -77,7 +78,7 @@ class ClusterData(torch.utils.data.Dataset):
         path = osp.join(root_dir, filename or 'metis.pt')
         if save_dir is not None and osp.exists(path):
-            self.partition = torch.load(path)
+            self.partition = fs.torch_load(path)
         else:
             if log:  # pragma: no cover
                 print('Computing METIS partitioning...', file=sys.stderr)
@@ -234,9 +235,9 @@ class ClusterData(torch.utils.data.Dataset):
 class ClusterLoader(torch.utils.data.DataLoader):
     r"""The data loader scheme from the `"Cluster-GCN: An Efficient Algorithm
     for Training Deep and Large Graph Convolutional Networks"
-    <https://arxiv.org/abs/1905.07953>`_ paper which merges partioned subgraphs
-    and their between-cluster links from a large-scale graph data object to
-    form a mini-batch.
+    <https://arxiv.org/abs/1905.07953>`_ paper which merges partitioned
+    subgraphs and their between-cluster links from a large-scale graph data
+    object to form a mini-batch.
     .. note::
@@ -251,7 +252,7 @@ class ClusterLoader(torch.utils.data.DataLoader):
     Args:
         cluster_data (torch_geometric.loader.ClusterData): The already
-            partioned data object.
+            partitioned data object.
         **kwargs (optional): Additional arguments of
             :class:`torch.utils.data.DataLoader`, such as :obj:`batch_size`,
             :obj:`shuffle`, :obj:`drop_last` or :obj:`num_workers`.

torch_geometric/loader/graph_saint.py CHANGED Viewed

@@ -4,6 +4,7 @@ from typing import Optional
 import torch
 from tqdm import tqdm
+from torch_geometric.io import fs
 from torch_geometric.typing import SparseTensor
@@ -77,7 +78,7 @@ class GraphSAINTSampler(torch.utils.data.DataLoader):
         if self.sample_coverage > 0:
             path = osp.join(save_dir or '', self._filename)
             if save_dir is not None and osp.exists(path):  # pragma: no cover
-                self.node_norm, self.edge_norm = torch.load(path)
+                self.node_norm, self.edge_norm = fs.torch_load(path)
             else:
                 self.node_norm, self.edge_norm = self._compute_norm()
                 if save_dir is not None:  # pragma: no cover

torch_geometric/loader/ibmb_loader.py CHANGED Viewed

@@ -148,7 +148,7 @@ def indices_complete_check(
         if isinstance(aux, Tensor):
             aux = aux.cpu().numpy()
-        assert np.all(np.in1d(out,
+        assert np.all(np.isin(out,
                               aux)), "Not all output nodes are in aux nodes!"
         outs.append(out)
@@ -236,7 +236,7 @@ def create_batchwise_out_aux_pairs(
             logits[tele_set, i] = 1. / len(tele_set)
         new_logits = logits.clone()
-        for i in range(num_iter):
+        for _ in range(num_iter):
             new_logits = adj @ new_logits * (1 - alpha) + alpha * logits
         inds = new_logits.argsort(0)
@@ -498,7 +498,7 @@ class IBMBBaseLoader(torch.utils.data.DataLoader):
             assert adj is not None
         for out, aux in pbar:
-            mask = torch.from_numpy(np.in1d(aux, out))
+            mask = torch.from_numpy(np.isin(aux, out))
             if isinstance(aux, np.ndarray):
                 aux = torch.from_numpy(aux)
             subg = get_subgraph(aux, graph, return_edge_index_type, adj,
@@ -541,7 +541,7 @@ class IBMBBaseLoader(torch.utils.data.DataLoader):
         out, aux = zip(*data_list)
         out = np.concatenate(out)
         aux = np.unique(np.concatenate(aux))
-        mask = torch.from_numpy(np.in1d(aux, out))
+        mask = torch.from_numpy(np.isin(aux, out))
         aux = torch.from_numpy(aux)
         subg = get_subgraph(aux, self.graph, self.return_edge_index_type,

torch_geometric/loader/link_loader.py CHANGED Viewed

@@ -70,7 +70,7 @@ class LinkLoader(
             :obj:`edge_label_index`. If set, temporal sampling will be
             used such that neighbors are guaranteed to fulfill temporal
             constraints, *i.e.*, neighbors have an earlier timestamp than
-            the ouput edge. The :obj:`time_attr` needs to be set for this
+            the output edge. The :obj:`time_attr` needs to be set for this
             to work. (default: :obj:`None`)
         neg_sampling (NegativeSampling, optional): The negative sampling
             configuration.

torch_geometric/loader/link_neighbor_loader.py CHANGED Viewed

@@ -117,7 +117,7 @@ class LinkNeighborLoader(LinkLoader):
             :obj:`edge_label_index`. If set, temporal sampling will be
             used such that neighbors are guaranteed to fulfill temporal
             constraints, *i.e.*, neighbors have an earlier timestamp than
-            the ouput edge. The :obj:`time_attr` needs to be set for this
+            the output edge. The :obj:`time_attr` needs to be set for this
             to work. (default: :obj:`None`)
         replace (bool, optional): If set to :obj:`True`, will sample with
             replacement. (default: :obj:`False`)
@@ -170,6 +170,7 @@ class LinkNeighborLoader(LinkLoader):
             negative sampling mode.
             If set to :obj:`None`, no negative sampling strategy is applied.
             (default: :obj:`None`)
+            For example use obj:`neg_sampling=dict(mode= 'binary', amount=0.5)`
         neg_sampling_ratio (int or float, optional): The ratio of sampled
             negative edges to the number of positive edges.
             Deprecated in favor of the :obj:`neg_sampling` argument.

torch_geometric/loader/mixin.py CHANGED Viewed

@@ -106,9 +106,9 @@ class MultithreadingMixin:
     def _mt_init_fn(self, worker_id: int) -> None:
         try:
             torch.set_num_threads(int(self._worker_threads))
-        except IndexError:
+        except IndexError as e:
             raise ValueError(f"Cannot set {self.worker_threads} threads "
-                             f"in worker {worker_id}")
+                             f"in worker {worker_id}") from e
         # Chain worker init functions:
         self._old_worker_init_fn(worker_id)
@@ -213,9 +213,9 @@ class AffinityMixin:
             psutil.Process().cpu_affinity(worker_cores)
-        except IndexError:
+        except IndexError as e:
             raise ValueError(f"Cannot use CPU affinity for worker ID "
-                             f"{worker_id} on CPU {self.loader_cores}")
+                             f"{worker_id} on CPU {self.loader_cores}") from e
         # Chain worker init functions:
         self._old_worker_init_fn(worker_id)
@@ -248,7 +248,8 @@ class AffinityMixin:
             warnings.warn(
                 "Due to conflicting parallelization methods it is not advised "
                 "to use affinitization with 'HeteroData' datasets. "
-                "Use `enable_multithreading` for better performance.")
+                "Use `enable_multithreading` for better performance.",
+                stacklevel=2)
         self.loader_cores = loader_cores[:] if loader_cores else None
         if self.loader_cores is None:

torch_geometric/loader/neighbor_loader.py CHANGED Viewed

@@ -14,7 +14,7 @@ class NeighborLoader(NodeLoader):
     This loader allows for mini-batch training of GNNs on large-scale graphs
     where full-batch training is not feasible.
-    More specifically, :obj:`num_neighbors` denotes how much neighbors are
+    More specifically, :obj:`num_neighbors` denotes how many neighbors are
     sampled for each node in each iteration.
     :class:`~torch_geometric.loader.NeighborLoader` takes in this list of
     :obj:`num_neighbors` and iteratively samples :obj:`num_neighbors[i]` for

torch_geometric/loader/neighbor_sampler.py CHANGED Viewed

@@ -72,9 +72,9 @@ class NeighborSampler(torch.utils.data.DataLoader):
         `examples/reddit.py
         <https://github.com/pyg-team/pytorch_geometric/blob/master/examples/
         reddit.py>`_ or
-        `examples/ogbn_products_sage.py
+        `examples/ogbn_train.py
         <https://github.com/pyg-team/pytorch_geometric/blob/master/examples/
-        ogbn_products_sage.py>`_.
+        ogbn_train.py>`_.
     Args:
         edge_index (Tensor or SparseTensor): A :obj:`torch.LongTensor` or a

pyg-nightly 2.6.0.dev20240704__py3-none-any.whl → 2.8.0.dev20251207__py3-none-any.whl

Potentially problematic release.

pyg-nightly 2.6.0.dev20240704py3-none-any.whl → 2.8.0.dev20251207py3-none-any.whl