PyPI - pyg-nightly - Versions diffs - 2.7.0.dev20250905__py3-none-any.whl → 2.7.0.dev20250907__py3-none-any.whl - Mend

pyg-nightly 2.7.0.dev20250905py3-none-any.whl → 2.7.0.dev20250907py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

{pyg_nightly-2.7.0.dev20250905.dist-info → pyg_nightly-2.7.0.dev20250907.dist-info}/METADATA +2 -1
{pyg_nightly-2.7.0.dev20250905.dist-info → pyg_nightly-2.7.0.dev20250907.dist-info}/RECORD +32 -25
torch_geometric/__init__.py +1 -1
torch_geometric/data/__init__.py +0 -5
torch_geometric/data/lightning/datamodule.py +2 -2
torch_geometric/datasets/molecule_gpt_dataset.py +1 -1
torch_geometric/datasets/web_qsp_dataset.py +262 -210
torch_geometric/graphgym/imports.py +2 -2
torch_geometric/llm/__init__.py +9 -0
torch_geometric/{data → llm}/large_graph_indexer.py +124 -61
torch_geometric/llm/models/__init__.py +23 -0
torch_geometric/{nn → llm}/models/g_retriever.py +68 -49
torch_geometric/{nn → llm}/models/git_mol.py +1 -1
torch_geometric/{nn/nlp → llm/models}/llm.py +167 -33
torch_geometric/llm/models/llm_judge.py +158 -0
torch_geometric/{nn → llm}/models/molecule_gpt.py +1 -1
torch_geometric/{nn/nlp → llm/models}/sentence_transformer.py +42 -8
torch_geometric/llm/models/txt2kg.py +353 -0
torch_geometric/llm/rag_loader.py +154 -0
torch_geometric/llm/utils/backend_utils.py +442 -0
torch_geometric/llm/utils/feature_store.py +169 -0
torch_geometric/llm/utils/graph_store.py +199 -0
torch_geometric/llm/utils/vectorrag.py +124 -0
torch_geometric/loader/__init__.py +0 -4
torch_geometric/nn/__init__.py +0 -1
torch_geometric/nn/models/__init__.py +0 -10
torch_geometric/nn/models/sgformer.py +2 -0
torch_geometric/loader/rag_loader.py +0 -107
torch_geometric/nn/nlp/__init__.py +0 -9
{pyg_nightly-2.7.0.dev20250905.dist-info → pyg_nightly-2.7.0.dev20250907.dist-info}/WHEEL +0 -0
{pyg_nightly-2.7.0.dev20250905.dist-info → pyg_nightly-2.7.0.dev20250907.dist-info}/licenses/LICENSE +0 -0
/torch_geometric/{nn → llm}/models/glem.py +0 -0
/torch_geometric/{nn → llm}/models/protein_mpnn.py +0 -0
/torch_geometric/{nn/nlp → llm/models}/vision_transformer.py +0 -0

torch_geometric/llm/utils/graph_store.py ADDED Viewed

@@ -0,0 +1,199 @@
+from typing import Any, Dict, Optional, Tuple, Union
+import torch
+from torch import Tensor
+from torch_geometric.data import FeatureStore
+from torch_geometric.distributed import LocalGraphStore
+from torch_geometric.sampler import (
+    BidirectionalNeighborSampler,
+    NodeSamplerInput,
+    SamplerOutput,
+)
+from torch_geometric.utils import index_sort
+# A representation of an edge index, following the possible formats:
+#    * default: Tensor, size = [2, num_edges]
+#    *     Tensor[0, :] == row, Tensor[1, :] == col
+#    * COO: (row, col)
+#    * CSC: (row, colptr)
+#    * CSR: (rowptr, col)
+_EdgeTensorType = Union[Tensor, Tuple[Tensor, Tensor]]
+class NeighborSamplingRAGGraphStore(LocalGraphStore):
+    """Neighbor sampling based graph-store to store & retrieve graph data."""
+    def __init__(  # type: ignore[no-untyped-def]
+        self,
+        feature_store: Optional[FeatureStore] = None,
+        **kwargs,
+    ):
+        """Initializes the graph store.
+        Optional feature store and neighbor sampling settings.
+        Args:
+        feature_store (optional): The feature store to use.
+            None if not yet registered.
+        **kwargs (optional):
+            Additional keyword arguments for neighbor sampling.
+        """
+        self.feature_store = feature_store
+        self.sample_kwargs = kwargs
+        self._sampler_is_initialized = False
+        self._config: Dict[str, Any] = {}
+        # to be set by the config
+        self.num_neighbors = None
+        super().__init__()
+    @property
+    def config(self) -> Dict[str, Any]:
+        """Get the config for the feature store."""
+        return self._config
+    def _set_from_config(self, config: Dict[str, Any], attr_name: str) -> None:
+        """Set an attribute from the config.
+        Args:
+            config (Dict[str, Any]): Config dictionary
+            attr_name (str): Name of attribute to set
+        Raises:
+            ValueError: If required attribute not found in config
+        """
+        if attr_name not in config:
+            raise ValueError(
+                f"Required config parameter '{attr_name}' not found")
+        setattr(self, attr_name, config[attr_name])
+    @config.setter  # type: ignore
+    def config(self, config: Dict[str, Any]) -> None:
+        """Set the config for the feature store.
+        Args:
+            config (Dict[str, Any]):
+                Config dictionary containing required parameters
+        Raises:
+            ValueError: If required parameters missing from config
+        """
+        self._set_from_config(config, "num_neighbors")
+        if hasattr(self, 'sampler'):
+            self.sampler.num_neighbors = (  # type: ignore[has-type]
+                self.num_neighbors)
+        self._config = config
+    def _init_sampler(self) -> None:
+        """Initializes neighbor sampler with the registered feature store."""
+        if self.feature_store is None:
+            raise AttributeError("Feature store not registered yet.")
+        assert self.num_neighbors is not None, \
+            "Please set num_neighbors through config"
+        self.sampler = BidirectionalNeighborSampler(
+            data=(self.feature_store, self), num_neighbors=self.num_neighbors,
+            **self.sample_kwargs)
+        self._sampler_is_initialized = True
+    def register_feature_store(self, feature_store: FeatureStore) -> None:
+        """Registers a feature store with the graph store.
+        :param feature_store: The feature store to register.
+        """
+        self.feature_store = feature_store
+        self._sampler_is_initialized = False
+    def put_edge_id(  # type: ignore[no-untyped-def]
+            self, edge_id: Tensor, *args, **kwargs) -> bool:
+        """Stores an edge ID in the graph store.
+        :param edge_id: The edge ID to store.
+        :return: Whether the operation was successful.
+        """
+        ret = super().put_edge_id(edge_id.contiguous(), *args, **kwargs)
+        self._sampler_is_initialized = False
+        return ret
+    @property
+    def edge_index(self) -> _EdgeTensorType:
+        """Gets the edge index of the graph.
+        :return: The edge index as a tensor.
+        """
+        return self.get_edge_index(*self.edge_idx_args, **self.edge_idx_kwargs)
+    def put_edge_index(  # type: ignore[no-untyped-def]
+            self, edge_index: _EdgeTensorType, *args, **kwargs) -> bool:
+        """Stores an edge index in the graph store.
+        :param edge_index: The edge index to store.
+        :return: Whether the operation was successful.
+        """
+        ret = super().put_edge_index(edge_index, *args, **kwargs)
+        # HACK
+        self.edge_idx_args = args
+        self.edge_idx_kwargs = kwargs
+        self._sampler_is_initialized = False
+        return ret
+    # HACKY
+    @edge_index.setter  # type: ignore
+    def edge_index(self, edge_index: _EdgeTensorType) -> None:
+        """Sets the edge index of the graph.
+        :param edge_index: The edge index to set.
+        """
+        # correct since we make node list from triples
+        if isinstance(edge_index, Tensor):
+            num_nodes = int(edge_index.max()) + 1
+        else:
+            assert isinstance(edge_index, tuple) \
+                and isinstance(edge_index[0], Tensor) \
+                and isinstance(edge_index[1], Tensor), \
+                "edge_index must be a Tensor of [2, num_edges] \
+                or a tuple of Tensors, (row, col)."
+            num_nodes = int(edge_index[0].max()) + 1
+        attr = dict(
+            edge_type=None,
+            layout='coo',
+            size=(num_nodes, num_nodes),
+            is_sorted=False,
+        )
+        # edge index needs to be sorted here and the perm saved for later
+        col_sorted, self.perm = index_sort(edge_index[1], num_nodes,
+                                           stable=True)
+        row_sorted = edge_index[0][self.perm]
+        edge_index_sorted = torch.stack([row_sorted, col_sorted], dim=0)
+        self.put_edge_index(edge_index_sorted, **attr)
+    def sample_subgraph(
+        self,
+        seed_nodes: Tensor,
+    ) -> SamplerOutput:
+        """Sample the graph starting from the given nodes using the
+        in-built NeighborSampler.
+        Args:
+            seed_nodes (InputNodes): Seed nodes to start sampling from.
+            num_neighbors (Optional[NumNeighborsType], optional): Parameters
+                to determine how many hops and number of neighbors per hop.
+                Defaults to None.
+        Returns:
+            Union[SamplerOutput, HeteroSamplerOutput]: NeighborSamplerOutput
+                for the input.
+        """
+        # TODO add support for Hetero
+        if not self._sampler_is_initialized:
+            self._init_sampler()
+        seed_nodes = seed_nodes.unique().contiguous()
+        node_sample_input = NodeSamplerInput(input_id=None, node=seed_nodes)
+        out = self.sampler.sample_from_nodes(  # type: ignore[has-type]
+            node_sample_input)
+        # edge ids need to be remapped to the original indices
+        out.edge = self.perm[out.edge]
+        return out

torch_geometric/llm/utils/vectorrag.py ADDED Viewed

@@ -0,0 +1,124 @@
+# mypy: ignore-errors
+import os
+from abc import abstractmethod
+from typing import Any, Callable, Dict, List, Optional, Protocol, Union
+import torch
+from torch import Tensor
+from torch_geometric.data import Data
+from torch_geometric.llm.models import SentenceTransformer
+from torch_geometric.llm.utils.backend_utils import batch_knn
+class VectorRetriever(Protocol):
+    """Protocol for VectorRAG."""
+    @abstractmethod
+    def query(self, query: Any, **kwargs: Optional[Dict[str, Any]]) -> Data:
+        """Retrieve a context for a given query."""
+        ...
+class DocumentRetriever(VectorRetriever):
+    """Retrieve documents from a vector database."""
+    def __init__(self, raw_docs: List[str],
+                 embedded_docs: Optional[Tensor] = None, k_for_docs: int = 2,
+                 model: Optional[Union[SentenceTransformer, torch.nn.Module,
+                                       Callable]] = None,
+                 model_kwargs: Optional[Dict[str, Any]] = None):
+        """Retrieve documents from a vector database.
+        Args:
+            raw_docs: List[str]: List of raw documents.
+            embedded_docs: Optional[Tensor]: Embedded documents.
+            k_for_docs: int: Number of documents to retrieve.
+            model: Optional[Union[SentenceTransformer, torch.nn.Module]]:
+                Model to use for encoding.
+            model_kwargs: Optional[Dict[str, Any]]:
+                Keyword arguments to pass to the model.
+        """
+        self.raw_docs = raw_docs
+        self.embedded_docs = embedded_docs
+        self.k_for_docs = k_for_docs
+        self.model = model
+        if self.model is not None:
+            self.encoder = self.model
+            self.model_kwargs = model_kwargs
+        if self.embedded_docs is None:
+            assert self.model is not None, \
+                "Model must be provided if embedded_docs is not provided"
+            self.model_kwargs = model_kwargs or {}
+            self.embedded_docs = self.encoder(self.raw_docs,
+                                              **self.model_kwargs)
+            # we don't want to print the verbose output in `query`
+            self.model_kwargs.pop("verbose", None)
+    def query(self, query: Union[str, Tensor]) -> List[str]:
+        """Retrieve documents from the vector database.
+        Args:
+            query: Union[str, Tensor]: Query to retrieve documents for.
+        Returns:
+            List[str]: Documents retrieved from the vector database.
+        """
+        if isinstance(query, str):
+            query_enc = self.encoder(query, **self.model_kwargs)
+        else:
+            query_enc = query
+        selected_doc_idxs, _ = next(
+            batch_knn(query_enc, self.embedded_docs, self.k_for_docs))
+        return [self.raw_docs[i] for i in selected_doc_idxs]
+    def save(self, path: str) -> None:
+        """Save the DocumentRetriever instance to disk.
+        Args:
+            path: str: Path where to save the retriever.
+        """
+        os.makedirs(os.path.dirname(os.path.abspath(path)), exist_ok=True)
+        # Prepare data to save
+        save_dict = {
+            'raw_docs': self.raw_docs,
+            'embedded_docs': self.embedded_docs,
+            'k_for_docs': self.k_for_docs,
+        }
+        # We do not serialize the model
+        torch.save(save_dict, path)
+    @classmethod
+    def load(cls, path: str, model: Union[SentenceTransformer, torch.nn.Module,
+                                          Callable],
+             model_kwargs: Optional[Dict[str, Any]] = None) -> VectorRetriever:
+        """Load a DocumentRetriever instance from disk.
+        Args:
+            path: str: Path to the saved retriever.
+            model: Union[SentenceTransformer, torch.nn.Module, Callable]:
+                Model to use for encoding.
+                If None, the saved model will be used if available.
+            model_kwargs: Optional[Dict[str, Any]]
+                Key word args to be passed to model
+        Returns:
+            DocumentRetriever: The loaded retriever.
+        """
+        if not os.path.exists(path):
+            raise FileNotFoundError(
+                f"No saved document retriever found at {path}")
+        save_dict = torch.load(path, weights_only=False)
+        if save_dict['embedded_docs'] is not None \
+                and isinstance(save_dict['embedded_docs'], Tensor)\
+                and model_kwargs is not None:
+            model_kwargs.pop("verbose", None)
+        # Create a new DocumentRetriever with the loaded data
+        return cls(raw_docs=save_dict['raw_docs'],
+                   embedded_docs=save_dict['embedded_docs'],
+                   k_for_docs=save_dict['k_for_docs'], model=model,
+                   model_kwargs=model_kwargs)

torch_geometric/loader/__init__.py CHANGED Viewed

@@ -22,7 +22,6 @@ from .dynamic_batch_sampler import DynamicBatchSampler
 from .prefetch import PrefetchLoader
 from .cache import CachedLoader
 from .mixin import AffinityMixin
-from .rag_loader import RAGQueryLoader, RAGFeatureStore, RAGGraphStore
 __all__ = classes = [
     'DataLoader',
@@ -51,9 +50,6 @@ __all__ = classes = [
     'PrefetchLoader',
     'CachedLoader',
     'AffinityMixin',
-    'RAGQueryLoader',
-    'RAGFeatureStore',
-    'RAGGraphStore'
 ]
 RandomNodeSampler = deprecated(

torch_geometric/nn/__init__.py CHANGED Viewed

@@ -17,7 +17,6 @@ from .dense import *  # noqa
 from .kge import *  # noqa
 from .models import *  # noqa
 from .functional import *  # noqa
-from .nlp import *  # noqa
 __all__ = [
     'Reshape',

torch_geometric/nn/models/__init__.py CHANGED Viewed

@@ -29,11 +29,6 @@ from .gnnff import GNNFF
 from .pmlp import PMLP
 from .neural_fingerprint import NeuralFingerprint
 from .visnet import ViSNet
-from .g_retriever import GRetriever
-from .git_mol import GITMol
-from .molecule_gpt import MoleculeGPT
-from .protein_mpnn import ProteinMPNN
-from .glem import GLEM
 from .lpformer import LPFormer
 from .sgformer import SGFormer
@@ -87,11 +82,6 @@ __all__ = classes = [
     'PMLP',
     'NeuralFingerprint',
     'ViSNet',
-    'GRetriever',
-    'GITMol',
-    'MoleculeGPT',
-    'ProteinMPNN',
-    'GLEM',
     'LPFormer',
     'SGFormer',
     'Polynormer',

torch_geometric/nn/models/sgformer.py CHANGED Viewed

@@ -187,6 +187,8 @@ class SGFormer(torch.nn.Module):
         self.params2 = list(self.graph_conv.parameters())
         self.params2.extend(list(self.fc.parameters()))
+        self.out_channels = out_channels
     def reset_parameters(self) -> None:
         self.trans_conv.reset_parameters()
         self.graph_conv.reset_parameters()

torch_geometric/loader/rag_loader.py DELETED Viewed

@@ -1,107 +0,0 @@
-from abc import abstractmethod
-from typing import Any, Callable, Dict, Optional, Protocol, Tuple, Union
-from torch_geometric.data import Data, FeatureStore, HeteroData
-from torch_geometric.sampler import HeteroSamplerOutput, SamplerOutput
-from torch_geometric.typing import InputEdges, InputNodes
-class RAGFeatureStore(Protocol):
-    """Feature store template for remote GNN RAG backend."""
-    @abstractmethod
-    def retrieve_seed_nodes(self, query: Any, **kwargs) -> InputNodes:
-        """Makes a comparison between the query and all the nodes to get all
-        the closest nodes. Return the indices of the nodes that are to be seeds
-        for the RAG Sampler.
-        """
-        ...
-    @abstractmethod
-    def retrieve_seed_edges(self, query: Any, **kwargs) -> InputEdges:
-        """Makes a comparison between the query and all the edges to get all
-        the closest nodes. Returns the edge indices that are to be the seeds
-        for the RAG Sampler.
-        """
-        ...
-    @abstractmethod
-    def load_subgraph(
-        self, sample: Union[SamplerOutput, HeteroSamplerOutput]
-    ) -> Union[Data, HeteroData]:
-        """Combines sampled subgraph output with features in a Data object."""
-        ...
-class RAGGraphStore(Protocol):
-    """Graph store template for remote GNN RAG backend."""
-    @abstractmethod
-    def sample_subgraph(self, seed_nodes: InputNodes, seed_edges: InputEdges,
-                        **kwargs) -> Union[SamplerOutput, HeteroSamplerOutput]:
-        """Sample a subgraph using the seeded nodes and edges."""
-        ...
-    @abstractmethod
-    def register_feature_store(self, feature_store: FeatureStore):
-        """Register a feature store to be used with the sampler. Samplers need
-        info from the feature store in order to work properly on HeteroGraphs.
-        """
-        ...
-# TODO: Make compatible with Heterographs
-class RAGQueryLoader:
-    """Loader meant for making RAG queries from a remote backend."""
-    def __init__(self, data: Tuple[RAGFeatureStore, RAGGraphStore],
-                 local_filter: Optional[Callable[[Data, Any], Data]] = None,
-                 seed_nodes_kwargs: Optional[Dict[str, Any]] = None,
-                 seed_edges_kwargs: Optional[Dict[str, Any]] = None,
-                 sampler_kwargs: Optional[Dict[str, Any]] = None,
-                 loader_kwargs: Optional[Dict[str, Any]] = None):
-        """Loader meant for making queries from a remote backend.
-        Args:
-            data (Tuple[RAGFeatureStore, RAGGraphStore]): Remote FeatureStore
-                and GraphStore to load from. Assumed to conform to the
-                protocols listed above.
-            local_filter (Optional[Callable[[Data, Any], Data]], optional):
-                Optional local transform to apply to data after retrieval.
-                Defaults to None.
-            seed_nodes_kwargs (Optional[Dict[str, Any]], optional): Parameters
-                to pass into process for fetching seed nodes. Defaults to None.
-            seed_edges_kwargs (Optional[Dict[str, Any]], optional): Parameters
-                to pass into process for fetching seed edges. Defaults to None.
-            sampler_kwargs (Optional[Dict[str, Any]], optional): Parameters to
-                pass into process for sampling graph. Defaults to None.
-            loader_kwargs (Optional[Dict[str, Any]], optional): Parameters to
-                pass into process for loading graph features. Defaults to None.
-        """
-        fstore, gstore = data
-        self.feature_store = fstore
-        self.graph_store = gstore
-        self.graph_store.register_feature_store(self.feature_store)
-        self.local_filter = local_filter
-        self.seed_nodes_kwargs = seed_nodes_kwargs or {}
-        self.seed_edges_kwargs = seed_edges_kwargs or {}
-        self.sampler_kwargs = sampler_kwargs or {}
-        self.loader_kwargs = loader_kwargs or {}
-    def query(self, query: Any) -> Data:
-        """Retrieve a subgraph associated with the query with all its feature
-        attributes.
-        """
-        seed_nodes = self.feature_store.retrieve_seed_nodes(
-            query, **self.seed_nodes_kwargs)
-        seed_edges = self.feature_store.retrieve_seed_edges(
-            query, **self.seed_edges_kwargs)
-        subgraph_sample = self.graph_store.sample_subgraph(
-            seed_nodes, seed_edges, **self.sampler_kwargs)
-        data = self.feature_store.load_subgraph(sample=subgraph_sample,
-                                                **self.loader_kwargs)
-        if self.local_filter:
-            data = self.local_filter(data, query)
-        return data

torch_geometric/nn/nlp/__init__.py DELETED Viewed

@@ -1,9 +0,0 @@
-from .sentence_transformer import SentenceTransformer
-from .vision_transformer import VisionTransformer
-from .llm import LLM
-__all__ = classes = [
-    'SentenceTransformer',
-    'VisionTransformer',
-    'LLM',
-]

{pyg_nightly-2.7.0.dev20250905.dist-info → pyg_nightly-2.7.0.dev20250907.dist-info}/WHEEL RENAMED Viewed

File without changes

{pyg_nightly-2.7.0.dev20250905.dist-info → pyg_nightly-2.7.0.dev20250907.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

/torch_geometric/{nn → llm}/models/glem.py RENAMED Viewed

File without changes

/torch_geometric/{nn → llm}/models/protein_mpnn.py RENAMED Viewed

File without changes

/torch_geometric/{nn/nlp → llm/models}/vision_transformer.py RENAMED Viewed

File without changes

pyg-nightly 2.7.0.dev20250905__py3-none-any.whl → 2.7.0.dev20250907__py3-none-any.whl

pyg-nightly 2.7.0.dev20250905py3-none-any.whl → 2.7.0.dev20250907py3-none-any.whl