PyPI - relationalai - Versions diffs - 0.12.1__py3-none-any.whl → 0.12.3__py3-none-any.whl - Mend

relationalai 0.12.1py3-none-any.whl → 0.12.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

relationalai/semantics/reasoners/graph/core.py CHANGED Viewed

@@ -1332,21 +1332,6 @@ class Graph():
     # presently in use by the `cosine_similarity` and
     # `jaccard_similarity` relationships.
-    def _count_common_outneighbor_fragment(self, node_u, node_v):
-        """
-        Helper for cosine_similarity and jaccard_similarity that returns a fragment
-        that counts the common outneighbors of given nodes `node_u` and `node_v`.
-        """
-        common_outneighbor_node = self.Node.ref()
-        return (
-            count(common_outneighbor_node)
-            .per(node_u, node_v)
-            .where(
-                self._outneighbor(node_u, common_outneighbor_node),
-                self._outneighbor(node_v, common_outneighbor_node),
-            )
-        )
     def _wu_dot_wv_fragment(self, node_u, node_v):
         """
         Helper for cosine_similarity that returns a fragment that produces an
@@ -5592,18 +5577,71 @@ class Graph():
     @include_in_docs
-    def jaccard_similarity(self):
-        """Returns a ternary relationship containing the Jaccard similarity for all pairs of nodes.
+    def jaccard_similarity(
+            self,
+            *,
+            full: Optional[bool] = None,
+            from_: Optional[Relationship] = None,
+            to: Optional[Relationship] = None,
+            between: Optional[Relationship] = None,
+        ):
+        """Returns a ternary relationship containing
+        the Jaccard similarity for pairs of nodes.
         The Jaccard similarity is a measure between two nodes that ranges from
         0.0 to 1.0, where higher values indicate greater similarity.
+        Parameters
+        ----------
+        full : bool, optional
+            If ``True``, computes the Jaccard similarity for all pairs
+            of nodes in the graph. This computation can be expensive for large graphs,
+            as the result can scale quadratically in the number of nodes. Mutually exclusive
+            with other parameters.
+            Default is ``None``.
+        from_ : Relationship, optional
+            A unary relationship containing a subset of the graph's nodes. When
+            provided, constrains the domain of the Jaccard similarity computation: only
+            Jaccard similarity scores for node pairs where the first node is
+            in this relationship are computed and returned. Mutually exclusive with
+            ``full`` and ``between``.
+            Default is ``None``.
+        to : Relationship, optional
+            A unary relationship containing a subset of the graph's nodes. Can only
+            be used together with the ``from_`` parameter. When provided with ``from_``,
+            constrains the domain of the Jaccard similarity computation: only
+            Jaccard similarity scores for node pairs where the first node is
+            in ``from_`` and the second node is in ``to`` are computed and returned.
+            Default is ``None``.
+        between : Relationship, optional
+            A binary relationship containing pairs of nodes. When provided,
+            constrains the domain of the Jaccard similarity computation: only
+            Jaccard similarity scores for the specific node pairs in
+            this relationship are computed and returned. Mutually exclusive
+            with other parameters.
+            Default is ``None``.
         Returns
         -------
         Relationship
             A ternary relationship where each tuple represents a pair of nodes
             and their Jaccard similarity.
+        Raises
+        ------
+        ValueError
+            If ``full`` is provided with any other parameter.
+            If ``between`` is provided with any other parameter.
+            If ``from_`` is provided with any parameter other than ``to``.
+            If none of ``full``, ``from_``, or ``between`` is provided.
+            If ``full`` is not ``True`` or ``None``.
+        AssertionError
+            If ``from_``, ``to``, or ``between`` is not a ``Relationship``.
+            If ``from_``, ``to``, or ``between`` is not attached to the same model as the graph.
+            If ``from_``, ``to``, or ``between`` does not contain the graph's ``Node`` concept.
+            If ``from_`` or ``to`` is not a unary relationship.
+            If ``between`` is not a binary relationship.
         Relationship Schema
         -------------------
         ``jaccard_similarity(node_u, node_v, score)``
@@ -5652,6 +5690,40 @@ class Graph():
         The weighted Jaccard similarity between node 1 and 2 is then:
         `0.46 / (1.6 + 1.6 + 1.4) = 0.1`.
+        Edge weights are assumed to be non-negative, so the neighborhood
+        vectors contain only non-negative elements. Therefore, the Jaccard
+        similarity score is always between 0.0 and 1.0, inclusive.
+        The ``jaccard_similarity(full=True)`` method computes and caches
+        the full Jaccard similarity relationship for all pairs of nodes,
+        providing efficient reuse across multiple calls. This can be expensive
+        as the result can contain O(|V|²) tuples.
+        Calling ``jaccard_similarity()`` without arguments raises a ``ValueError``,
+        to ensure awareness and explicit acknowledgement (``full=True``) of this cost.
+        In contrast, ``jaccard_similarity(from_=subset)`` constrains the computation to
+        tuples with the first position in the passed-in ``subset``. The result is
+        not cached; it is specific to the call site. When a significant fraction of
+        the Jaccard similarity relation is needed across a program,
+        ``jaccard_similarity(full=True)`` is typically more efficient. Use
+        ``jaccard_similarity(from_=subset)`` only when small subsets of
+        the Jaccard similarity relationship are needed
+        collectively across the program.
+        The ``to`` parameter can be used together with ``from_`` to further
+        constrain the computation: ``jaccard_similarity(from_=subset_a, to=subset_b)``
+        computes Jaccard similarity scores only for node pairs where the first node is in
+        ``subset_a`` and the second node is in ``subset_b``. (Since ``jaccard_similarity``
+        is symmetric in its first two positions, using ``to`` without ``from_`` would
+        be functionally redundant, and is not allowed.)
+        The ``between`` parameter provides another way to constrain the computation.
+        Unlike ``from_`` and ``to``, which allow you to independently constrain the first
+        and second positions in ``jaccard_similarity`` tuples to sets of nodes, ``between``
+        allows you constrain the first and second positions, jointly, to specific pairs
+        of nodes.
         Examples
         --------
         **Unweighted Graph Examples**
@@ -5673,8 +5745,8 @@ class Graph():
         ...     Edge.new(src=n4, dst=n3),
         ... )
         >>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
-        >>> jaccard = graph.jaccard_similarity()
-        >>> select(score).where(jaccard(u, v, score), u.id == 2, v.id == 4).inspect()
+        >>> jaccard_similarity = graph.jaccard_similarity(full=True)
+        >>> select(score).where(jaccard_similarity(u, v, score), u.id == 2, v.id == 4).inspect()
         ▰▰▰▰ Setup complete
            score
         0   0.25
@@ -5696,8 +5768,8 @@ class Graph():
         ...     Edge.new(src=n4, dst=n3),
         ... )
         >>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
-        >>> jaccard = graph.jaccard_similarity()
-        >>> select(score).where(jaccard(u, v, score), u.id == 2, v.id == 4).inspect()
+        >>> jaccard_similarity = graph.jaccard_similarity(full=True)
+        >>> select(score).where(jaccard_similarity(u, v, score), u.id == 2, v.id == 4).inspect()
         ▰▰▰▰ Setup complete
            score
         0    0.5
@@ -5724,12 +5796,57 @@ class Graph():
         >>>
         >>> # 3. Select the weighted Jaccard similarity for the pair (1, 2)
         >>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
-        >>> jaccard = graph.jaccard_similarity()
-        >>> select(score).where(jaccard(u, v, score), u.id == 1, v.id == 2).inspect()
+        >>> jaccard_similarity = graph.jaccard_similarity(full=True)
+        >>> select(score).where(jaccard_similarity(u, v, score), u.id == 1, v.id == 2).inspect()
         ▰▰▰▰ Setup complete
            score
         0    0.1
+        **Domain Constraint Examples**
+        >>> # Use 'from_' parameter to constrain the set of nodes for the first position
+        >>> # Using the same undirected unweighted graph from above
+        >>> from relationalai.semantics import where
+        >>> subset = model.Relationship(f"{{node:{Node}}} is in subset")
+        >>> node = Node.ref()
+        >>> where(node.id == 2).define(subset(node))
+        >>>
+        >>> # Get Jaccard similarity scores only for pairs where first node is in subset
+        >>> constrained_jaccard_similarity = graph.jaccard_similarity(from_=subset)
+        >>> select(u.id, v.id, score).where(constrained_jaccard_similarity(u, v, score)).inspect()
+        ▰▰▰▰ Setup complete
+           id  id2  score
+        0   2    2   1.00
+        1   2    3   0.50
+        2   2    4   0.25
+        >>> # Use both 'from_' and 'to' parameters to constrain both positions
+        >>> from_subset = model.Relationship(f"{{node:{Node}}} is in from_subset")
+        >>> to_subset = model.Relationship(f"{{node:{Node}}} is in to_subset")
+        >>> where(node.id == 2).define(from_subset(node))
+        >>> where(node.id == 4).define(to_subset(node))
+        >>>
+        >>> # Get Jaccard similarity scores only where first node is in from_subset and second node is in to_subset
+        >>> constrained_jaccard_similarity = graph.jaccard_similarity(from_=from_subset, to=to_subset)
+        >>> select(u.id, v.id, score).where(constrained_jaccard_similarity(u, v, score)).inspect()
+        ▰▰▰▰ Setup complete
+           id  id2  score
+        0   2    4   0.25
+        >>> # Use 'between' parameter to constrain to specific pairs of nodes
+        >>> pairs = model.Relationship(f"{{node_a:{Node}}} and {{node_b:{Node}}} are a pair")
+        >>> node_a, node_b = Node.ref(), Node.ref()
+        >>> where(node_a.id == 2, node_b.id == 4).define(pairs(node_a, node_b))
+        >>> where(node_a.id == 3, node_b.id == 4).define(pairs(node_a, node_b))
+        >>>
+        >>> # Get Jaccard similarity scores only for the specific pairs (2, 4) and (3, 4)
+        >>> constrained_jaccard_similarity = graph.jaccard_similarity(between=pairs)
+        >>> select(u.id, v.id, score).where(constrained_jaccard_similarity(u, v, score)).inspect()
+        ▰▰▰▰ Setup complete
+           id  id2  score
+        0   2    4   0.25
+        1   3    4   0.50
         References
         ----------
         Frigo M, Cruciani E, Coudert D, Deriche R, Natale E, Deslauriers-Gauthier S.
@@ -5738,57 +5855,242 @@ class Graph():
         doi: 10.1162/netn_a_00199. PMID: 34746624; PMCID: PMC8567827.
         """
-        warnings.warn(
-            (
-                "`jaccard_similarity` presently always computes the similarity "
-                "of all pairs of nodes of the graph. To provide better control over "
-                "the computed subset, `jaccard_similarity`'s interface will soon "
-                "need to change."
-            ),
-            FutureWarning,
-            stacklevel=2
+        # Validate domain constraint parameters.
+        self._validate_domain_constraint_parameters(
+            'jaccard_similarity', full, from_, to, between
         )
+        # At this point, exactly one of `full`, `from_`, or `between`
+        # has been provided, and if `to` is provided, `from_` is also provided.
+        # Handle `between`.
+        if between is not None:
+            self._validate_pair_subset_parameter(between)
+            return self._jaccard_similarity_between(between)
+        # Handle `from_` (and potentially `to`).
+        if from_ is not None:
+            self._validate_node_subset_parameter('from_', from_)
+            if to is not None:
+                self._validate_node_subset_parameter('to', to)
+                return self._jaccard_similarity_from_to(from_, to)
+            return self._jaccard_similarity_from(from_)
+        # Handle `full`.
         return self._jaccard_similarity
     @cached_property
     def _jaccard_similarity(self):
-        """Lazily define and cache the self._jaccard_similarity relationship."""
-        _jaccard_similarity_rel = self._model.Relationship(f"{{node_u:{self._NodeConceptStr}}} has a similarity to {{node_v:{self._NodeConceptStr}}} of {{similarity:Float}}")
+        """Lazily define and cache the full jaccard_similarity relationship."""
+        _jaccard_similarity_rel = self._create_jaccard_similarity_relationship()
         _jaccard_similarity_rel.annotate(annotations.track("graphs", "jaccard_similarity"))
+        return _jaccard_similarity_rel
-        if not self.weighted:
-            node_u, node_v = self.Node.ref(), self.Node.ref()
-            num_union_outneighbors, num_u_outneigbor, num_v_outneigbor, f = Integer.ref(),\
-                Integer.ref(), Integer.ref(), Float.ref()
-            where(num_common_outneighbor := self._count_common_outneighbor_fragment(node_u, node_v),
-                  self._count_outneighbor(node_u, num_u_outneigbor),
-                  self._count_outneighbor(node_v, num_v_outneigbor),
-                  num_union_outneighbors := num_u_outneigbor + num_v_outneigbor - num_common_outneighbor,
-                  f := num_common_outneighbor / num_union_outneighbors).define(_jaccard_similarity_rel(node_u, node_v, f))
+    def _jaccard_similarity_from(self, node_subset_from: Relationship):
+        """
+        Create a jaccard_similarity relationship, with the first position in each
+        tuple constrained to be in the given subset of nodes. Note this relationship
+        is not cached; it is specific to the callsite.
+        """
+        _jaccard_similarity_rel = self._create_jaccard_similarity_relationship(
+            node_subset_from=node_subset_from
+        )
+        _jaccard_similarity_rel.annotate(annotations.track("graphs", "jaccard_similarity_from"))
+        return _jaccard_similarity_rel
+    def _jaccard_similarity_from_to(self, node_subset_from: Relationship, node_subset_to: Relationship):
+        """
+        Create a jaccard_similarity relationship, with the first position in each
+        tuple constrained to be in `node_subset_from`, and the second position in
+        each tuple constrained to be in `node_subset_to`. Note this relationship
+        is not cached; it is specific to the callsite.
+        """
+        _jaccard_similarity_rel = self._create_jaccard_similarity_relationship(
+            node_subset_from=node_subset_from,
+            node_subset_to=node_subset_to
+        )
+        _jaccard_similarity_rel.annotate(annotations.track("graphs", "jaccard_similarity_from_to"))
+        return _jaccard_similarity_rel
+    def _jaccard_similarity_between(self, pair_subset_between: Relationship):
+        """
+        Create a jaccard_similarity relationship, with the first and second position
+        in each tuple jointly constrained to be in the given set of pairs
+        of nodes. Note this relationship is not cached;
+        it is specific to the callsite.
+        """
+        _jaccard_similarity_rel = self._create_jaccard_similarity_relationship(
+            pair_subset_between=pair_subset_between
+        )
+        _jaccard_similarity_rel.annotate(annotations.track("graphs", "jaccard_similarity_between"))
+        return _jaccard_similarity_rel
+    def _create_jaccard_similarity_relationship(
+        self,
+        *,
+        node_subset_from: Optional[Relationship] = None,
+        node_subset_to: Optional[Relationship] = None,
+        pair_subset_between: Optional[Relationship] = None,
+    ):
+        """
+        Create jaccard_similarity relationship, optionally constrained by
+        the provided node subsets or pair subset.
+        """
+        _jaccard_similarity_rel = self._model.Relationship(
+            f"{{node_u:{self._NodeConceptStr}}} has a Jaccard similarity to "
+            f"{{node_v:{self._NodeConceptStr}}} of {{score:Float}}"
+        )
+        # Branch by case to select appropriate count_outneighbor,
+        # outneighbor, and weighted_outdegree relationships, and build
+        # appropriate constraints on the domain of the nodes.
+        node_u, node_v = self.Node.ref(), self.Node.ref()
+        # TODO: Optimization opportunity. In a number of branches below,
+        #   we compute _count_outneighbor_of, which transitively computes
+        #   _outneighbor_of, and then compute _outneighbor_of directly;
+        #   the present code structure makes this a developer-time-efficient
+        #   way to get this off the ground, but of course involves redundant
+        #   work. In future this redundant work could be eliminated.
+        # Handle the `between` case.
+        if pair_subset_between is not None:
+            # Extract first-position and second-position nodes.
+            first_position_subset = self._model.Relationship(f"{{node:{self._NodeConceptStr}}}")
+            second_position_subset = self._model.Relationship(f"{{node:{self._NodeConceptStr}}}")
+            node_x, node_y = self.Node.ref(), self.Node.ref()
+            where(
+                pair_subset_between(node_x, node_y)
+            ).define(
+                first_position_subset(node_x),
+                second_position_subset(node_y)
+            )
+            if not self.weighted:
+                count_outneighbor_u_rel = self._count_outneighbor_of(first_position_subset)
+                count_outneighbor_v_rel = self._count_outneighbor_of(second_position_subset)
+                outneighbor_u_rel = self._outneighbor_of(first_position_subset)
+                outneighbor_v_rel = self._outneighbor_of(second_position_subset)
+            else: # self.weighted
+                weighted_outdegree_u_rel = self._weighted_outdegree_of(first_position_subset)
+                weighted_outdegree_v_rel = self._weighted_outdegree_of(second_position_subset)
+            node_constraints = [pair_subset_between(node_u, node_v)]
+        # Handle the `from_` case.
+        elif node_subset_from is not None and node_subset_to is None:
+            if not self.weighted:
+                count_outneighbor_u_rel = self._count_outneighbor_of(node_subset_from)
+                count_outneighbor_v_rel = self._count_outneighbor
+                outneighbor_u_rel = self._outneighbor_of(node_subset_from)
+                outneighbor_v_rel = self._outneighbor
+            else: # self.weighted
+                weighted_outdegree_u_rel = self._weighted_outdegree_of(node_subset_from)
+                weighted_outdegree_v_rel = self._weighted_outdegree
+            # TODO: Implement depth-two traversal strategy for better performance.
+            #   See similar comments on related similarity metrics.
+            node_constraints = [node_subset_from(node_u)]
+        # Handle the `from_`/`to` case.
+        elif node_subset_from is not None and node_subset_to is not None:
+            # Check for object identity optimization.
+            if node_subset_from is node_subset_to:
+                if not self.weighted:
+                    count_outneighbor_u_rel = self._count_outneighbor_of(node_subset_from)
+                    count_outneighbor_v_rel = count_outneighbor_u_rel
+                    outneighbor_u_rel = self._outneighbor_of(node_subset_from)
+                    outneighbor_v_rel = outneighbor_u_rel
+                else: # self.weighted
+                    weighted_outdegree_u_rel = self._weighted_outdegree_of(node_subset_from)
+                    weighted_outdegree_v_rel = weighted_outdegree_u_rel
+            else:
+                if not self.weighted:
+                    count_outneighbor_u_rel = self._count_outneighbor_of(node_subset_from)
+                    count_outneighbor_v_rel = self._count_outneighbor_of(node_subset_to)
+                    outneighbor_u_rel = self._outneighbor_of(node_subset_from)
+                    outneighbor_v_rel = self._outneighbor_of(node_subset_to)
+                else: # self.weighted
+                    weighted_outdegree_u_rel = self._weighted_outdegree_of(node_subset_from)
+                    weighted_outdegree_v_rel = self._weighted_outdegree_of(node_subset_to)
+            node_constraints = [node_subset_from(node_u), node_subset_to(node_v)]
+        # Handle the `full` case.
         else:
-            # TODO (dba) Annotate local relationships in this scope with `@ondemand` once available.
+            if not self.weighted:
+                count_outneighbor_u_rel = self._count_outneighbor
+                count_outneighbor_v_rel = self._count_outneighbor
+                outneighbor_u_rel = self._outneighbor
+                outneighbor_v_rel = self._outneighbor
+            else: # self.weighted
+                weighted_outdegree_u_rel = self._weighted_outdegree
+                weighted_outdegree_v_rel = self._weighted_outdegree
+            node_constraints = []
+        # Define Jaccard similarity logic for weighted and unweighted cases.
+        if not self.weighted:
+            num_u_outneigbor, num_v_outneigbor = Integer.ref(), Integer.ref()
+            common_outneighbor_node = self.Node.ref()
+            num_union_outneighbors = Integer.ref()
+            score = Float.ref()
+            where(
+                *node_constraints,
+                count_outneighbor_u_rel(node_u, num_u_outneigbor),  # type: ignore[possibly-unbound]
+                count_outneighbor_v_rel(node_v, num_v_outneigbor),  # type: ignore[possibly-unbound]
+                num_common_outneighbor := count(common_outneighbor_node).per(node_u, node_v).where(
+                    outneighbor_u_rel(node_u, common_outneighbor_node),  # type: ignore[possibly-unbound]
+                    outneighbor_v_rel(node_v, common_outneighbor_node),  # type: ignore[possibly-unbound]
+                ),
+                num_union_outneighbors := num_u_outneigbor + num_v_outneigbor - num_common_outneighbor,
+                score := num_common_outneighbor / num_union_outneighbors,
+            ).define(
+                _jaccard_similarity_rel(node_u, node_v, score)
+            )
+        else:
             # (1) The numerator: For every node `k` in the graph, find the minimum weight of
             #     the out-edges from `u` and `v` to `k`, and sum those minimum weights.
             #     Note that for any node `k` that is not a common out-neighbor of nodes `u` and `v`,
             #     the minimum weight of the out-edges from `u` and `v` to `k` is zero/empty,
             #     so the sum here reduces to a sum over the common out-neighbors of `u` and `v`.
-            min_weight_to_common_outneighbor = self._model.Relationship(f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} have common outneighbor {{node_k:{self._NodeConceptStr}}} with minimum weight {{minweight:Float}}")
+            min_weight_to_common_outneighbor = self._model.Relationship(
+                f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} "
+                f"have common outneighbor {{node_k:{self._NodeConceptStr}}} "
+                f"with minimum weight {{minweight:Float}}"
+            )
-            node_u, node_v, node_k, w1, w2 = self.Node.ref(), self.Node.ref(), self.Node.ref(), Float.ref(), Float.ref()
-            w = union(where(self._weight(node_u, node_k, w1)).select(w1),
-                      where(self._weight(node_v, node_k, w2)).select(w2))
-            where(self._edge(node_u, node_k),
-                  self._edge(node_v, node_k))\
-                  .define(min_weight_to_common_outneighbor(node_u, node_v, node_k, min(w).per(node_u, node_v, node_k)))
+            node_k, w1, w2 = self.Node.ref(), Float.ref(), Float.ref()
+            w = union(
+                where(self._weight(node_u, node_k, w1)).select(w1),
+                where(self._weight(node_v, node_k, w2)).select(w2)
+            )
+            where(
+                *node_constraints,
+                self._edge(node_u, node_k),
+                self._edge(node_v, node_k)
+            ).define(
+                min_weight_to_common_outneighbor(
+                    node_u, node_v, node_k, min(w).per(node_u, node_v, node_k)
+                )
+            )
-            sum_of_min_weights_to_common_outneighbors = self._model.Relationship(f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} have a sum of minweights of {{minsum:Float}}")
+            sum_of_min_weights_to_common_outneighbors = self._model.Relationship(
+                f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} "
+                f"have a sum of minweights of {{minsum:Float}}"
+            )
             minweight = Float.ref()
-            where(min_weight_to_common_outneighbor(node_u, node_v, node_k, minweight)
-                  ).define(sum_of_min_weights_to_common_outneighbors(node_u, node_v, sum(node_k, minweight).per(node_u, node_v)))
+            where(
+                min_weight_to_common_outneighbor(node_u, node_v, node_k, minweight)
+            ).define(
+                sum_of_min_weights_to_common_outneighbors(
+                    node_u, node_v, sum(node_k, minweight).per(node_u, node_v)
+                )
+            )
             # (2) The denominator: For every node `k` in the graph, find the maximum weight of
             #     the out-edges from `u` and `v` to `k`, and sum those maximum weights.
@@ -5827,20 +6129,31 @@ class Graph():
             #         self._weighted_outdegree(u) +
             #         self._weighted_outdegree(v) -
             #         _sum_of_min_weights_to_common_outneighbors(u, v)
-            sum_of_max_weights_to_other_nodes = self._model.Relationship(f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} have a maxsum of {{maxsum:Float}}")
+            sum_of_max_weights_to_other_nodes = self._model.Relationship(
+                f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} "
+                f"have a maxsum of {{maxsum:Float}}"
+            )
             u_outdegree, v_outdegree, maxsum, minsum = Float.ref(), Float.ref(), Float.ref(), Float.ref()
-            where(self._weighted_outdegree(node_u, u_outdegree),
-                  self._weighted_outdegree(node_v, v_outdegree),
-                  sum_of_min_weights_to_common_outneighbors(node_u, node_v, minsum),
-                  maxsum == u_outdegree + v_outdegree - minsum
-                  ).define(sum_of_max_weights_to_other_nodes(node_u, node_v, maxsum))
+            where(
+                *node_constraints,
+                weighted_outdegree_u_rel(node_u, u_outdegree),  # type: ignore[possibly-unbound]
+                weighted_outdegree_v_rel(node_v, v_outdegree),  # type: ignore[possibly-unbound]
+                sum_of_min_weights_to_common_outneighbors(node_u, node_v, minsum),
+                maxsum == u_outdegree + v_outdegree - minsum
+            ).define(
+                sum_of_max_weights_to_other_nodes(node_u, node_v, maxsum)
+            )
+            # Combination of (1) and (2) to produce score.
             score = Float.ref()
-            where(sum_of_min_weights_to_common_outneighbors(node_u, node_v, minsum),
-                  sum_of_max_weights_to_other_nodes(node_u, node_v, maxsum),
-                  score == minsum/maxsum
-                  ).define(_jaccard_similarity_rel(node_u, node_v, score))
+            where(
+                sum_of_min_weights_to_common_outneighbors(node_u, node_v, minsum),
+                sum_of_max_weights_to_other_nodes(node_u, node_v, maxsum),
+                score == minsum / maxsum
+            ).define(
+                _jaccard_similarity_rel(node_u, node_v, score)
+            )
         return _jaccard_similarity_rel
@@ -6662,19 +6975,72 @@ class Graph():
     @include_in_docs
-    def preferential_attachment(self):
-        """Returns a ternary relationship containing the preferential attachment score for all pairs of nodes.
+    def preferential_attachment(
+            self,
+            *,
+            full: Optional[bool] = None,
+            from_: Optional[Relationship] = None,
+            to: Optional[Relationship] = None,
+            between: Optional[Relationship] = None,
+        ):
+        """Returns a ternary relationship containing
+        the preferential attachment score for pairs of nodes.
         The preferential attachment score between two nodes `u` and `v` is the
         number of nodes adjacent to `u` multiplied by the number of nodes
         adjacent to `v`.
+        Parameters
+        ----------
+        full : bool, optional
+            If ``True``, computes the preferential attachment score for all pairs
+            of nodes in the graph. This computation can be expensive for large graphs,
+            as the result can scale quadratically in the number of nodes. Mutually exclusive
+            with other parameters.
+            Default is ``None``.
+        from_ : Relationship, optional
+            A unary relationship containing a subset of the graph's nodes. When
+            provided, constrains the domain of the preferential attachment computation: only
+            preferential attachment scores for node pairs where the first node is
+            in this relationship are computed and returned. Mutually exclusive with
+            ``full`` and ``between``.
+            Default is ``None``.
+        to : Relationship, optional
+            A unary relationship containing a subset of the graph's nodes. Can only
+            be used together with the ``from_`` parameter. When provided with ``from_``,
+            constrains the domain of the preferential attachment computation: only
+            preferential attachment scores for node pairs where the first node is
+            in ``from_`` and the second node is in ``to`` are computed and returned.
+            Default is ``None``.
+        between : Relationship, optional
+            A binary relationship containing pairs of nodes. When provided,
+            constrains the domain of the preferential attachment computation: only
+            preferential attachment scores for the specific node pairs in
+            this relationship are computed and returned. Mutually exclusive
+            with other parameters.
+            Default is ``None``.
         Returns
         -------
         Relationship
             A ternary relationship where each tuple represents a pair of nodes
             and their preferential attachment score.
+        Raises
+        ------
+        ValueError
+            If ``full`` is provided with any other parameter.
+            If ``between`` is provided with any other parameter.
+            If ``from_`` is provided with any parameter other than ``to``.
+            If none of ``full``, ``from_``, or ``between`` is provided.
+            If ``full`` is not ``True`` or ``None``.
+        AssertionError
+            If ``from_``, ``to``, or ``between`` is not a ``Relationship``.
+            If ``from_``, ``to``, or ``between`` is not attached to the same model as the graph.
+            If ``from_``, ``to``, or ``between`` does not contain the graph's ``Node`` concept.
+            If ``from_`` or ``to`` is not a unary relationship.
+            If ``between`` is not a binary relationship.
         Relationship Schema
         -------------------
         ``preferential_attachment(node_u, node_v, score)``
@@ -6691,6 +7057,38 @@ class Graph():
         | Directed   | Yes       |                      |
         | Weighted   | Yes       | Weights are ignored. |
+        Notes
+        -----
+        The ``preferential_attachment(full=True)`` method computes and caches
+        the full preferential attachment relationship for all pairs of nodes,
+        providing efficient reuse across multiple calls. This can be expensive
+        as the result contains O(|V|²) tuples.
+        Calling ``preferential_attachment()`` without arguments raises a ``ValueError``,
+        to ensure awareness and explicit acknowledgement (``full=True``) of this cost.
+        In contrast, ``preferential_attachment(from_=subset)`` constrains the computation to
+        tuples with the first position in the passed-in ``subset``. The result is
+        not cached; it is specific to the call site. When a significant fraction of
+        the preferential attachment relation is needed across a program,
+        ``preferential_attachment(full=True)`` is typically more efficient. Use
+        ``preferential_attachment(from_=subset)`` only when small subsets of
+        the preferential attachment relationship are needed
+        collectively across the program.
+        The ``to`` parameter can be used together with ``from_`` to further
+        constrain the computation: ``preferential_attachment(from_=subset_a, to=subset_b)``
+        computes preferential attachment scores only for node pairs where the first node is in
+        ``subset_a`` and the second node is in ``subset_b``. (Since ``preferential_attachment``
+        is symmetric in its first two positions, using ``to`` without ``from_``would
+        be functionally redundant, and is not allowed.)
+        The ``between`` parameter provides another way to constrain the computation.
+        Unlike ``from_`` and ``to``, which allow you to independently constrain the first
+        and second positions in ``preferential_attachment`` tuples to sets of nodes, ``between``
+        allows you constrain the first and second positions, jointly, to specific pairs
+        of nodes.
         Examples
         --------
         >>> from relationalai.semantics import Model, define, select, Integer
@@ -6712,10 +7110,10 @@ class Graph():
         ...     Edge.new(src=n4, dst=n3),
         ... )
         >>>
-        >>> # 3. Select the preferential attachment score for the pair (1, 3)
+        >>> # 3. Select the preferential attachment scores from the full relationship
         >>> u, v = Node.ref("u"), Node.ref("v")
         >>> score = Integer.ref("score")
-        >>> preferential_attachment = graph.preferential_attachment()
+        >>> preferential_attachment = graph.preferential_attachment(full=True)
         >>> select(
         ...     u.id, v.id, score,
         ... ).where(
@@ -6727,64 +7125,302 @@ class Graph():
            id  id2  score
         0   1    3      3
+        >>> # 4. Use 'from_' parameter to constrain the set of nodes for the first position
+        >>> # Define a subset containing only node 1
+        >>> from relationalai.semantics import where
+        >>> subset = model.Relationship(f"{{node:{Node}}} is in subset")
+        >>> node = Node.ref()
+        >>> where(node.id == 1).define(subset(node))
+        >>>
+        >>> # Get preferential attachment scores only for pairs where first node is in subset
+        >>> constrained_preferential_attachment = graph.preferential_attachment(from_=subset)
+        >>> select(u.id, v.id, score).where(constrained_preferential_attachment(u, v, score)).inspect()
+        ▰▰▰▰ Setup complete
+           id  id2  score
+        0   1    1      1
+        1   1    2      3
+        2   1    3      3
+        3   1    4      3
+        >>> # 5. Use both 'from_' and 'to' parameters to constrain both positions
+        >>> from_subset = model.Relationship(f"{{node:{Node}}} is in from_subset")
+        >>> to_subset = model.Relationship(f"{{node:{Node}}} is in to_subset")
+        >>> where(node.id == 1).define(from_subset(node))
+        >>> where(node.id == 3).define(to_subset(node))
+        >>>
+        >>> # Get preferential attachment scores only where first node is in from_subset and second node is in to_subset
+        >>> constrained_preferential_attachment = graph.preferential_attachment(from_=from_subset, to=to_subset)
+        >>> select(u.id, v.id, score).where(constrained_preferential_attachment(u, v, score)).inspect()
+        ▰▰▰▰ Setup complete
+           id  id2  score
+        0   1    3      3
+        >>> # 6. Use 'between' parameter to constrain to specific pairs of nodes
+        >>> pairs = model.Relationship(f"{{node_a:{Node}}} and {{node_b:{Node}}} are a pair")
+        >>> node_a, node_b = Node.ref(), Node.ref()
+        >>> where(node_a.id == 1, node_b.id == 3).define(pairs(node_a, node_b))
+        >>> where(node_a.id == 2, node_b.id == 4).define(pairs(node_a, node_b))
+        >>>
+        >>> # Get preferential attachment scores only for the specific pairs (1, 3) and (2, 4)
+        >>> constrained_preferential_attachment = graph.preferential_attachment(between=pairs)
+        >>> select(u.id, v.id, score).where(constrained_preferential_attachment(u, v, score)).inspect()
+        ▰▰▰▰ Setup complete
+           id  id2  score
+        0   1    3      3
+        1   2    4      6
         """
-        warnings.warn(
-            (
-                "`preferential_attachment` presently always computes the similarity "
-                "of all pairs of nodes of the graph. To provide better control over "
-                "the computed subset, `preferential_attachment`'s interface will soon "
-                "need to change."
-            ),
-            FutureWarning,
-            stacklevel=2
+        # Validate domain constraint parameters.
+        self._validate_domain_constraint_parameters(
+            'preferential_attachment', full, from_, to, between
         )
+        # At this point, exactly one of `full`, `from_`, or `between`
+        # has been provided, and if `to` is provided, `from_` is also provided.
+        # Handle `between`.
+        if between is not None:
+            self._validate_pair_subset_parameter(between)
+            return self._preferential_attachment_between(between)
+        # Handle `from_` (and potentially `to`).
+        if from_ is not None:
+            self._validate_node_subset_parameter('from_', from_)
+            if to is not None:
+                self._validate_node_subset_parameter('to', to)
+                return self._preferential_attachment_from_to(from_, to)
+            return self._preferential_attachment_from(from_)
+        # Handle `full`.
         return self._preferential_attachment
     @cached_property
     def _preferential_attachment(self):
-        """Lazily define and cache the self._preferential_attachment relationship."""
-        _preferential_attachment_rel = self._model.Relationship(f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} have preferential attachment score {{score:Integer}}")
+        """Lazily define and cache the full preferential_attachment relationship."""
+        _preferential_attachment_rel = self._create_preferential_attachment_relationship()
         _preferential_attachment_rel.annotate(annotations.track("graphs", "preferential_attachment"))
+        return _preferential_attachment_rel
+    def _preferential_attachment_from(self, node_subset_from: Relationship):
+        """
+        Create a preferential_attachment relationship, with the first position in each
+        tuple constrained to be in the given subset of nodes. Note this relationship
+        is not cached; it is specific to the callsite.
+        """
+        _preferential_attachment_rel = self._create_preferential_attachment_relationship(
+            node_subset_from=node_subset_from
+        )
+        _preferential_attachment_rel.annotate(annotations.track("graphs", "preferential_attachment_from"))
+        return _preferential_attachment_rel
+    def _preferential_attachment_from_to(self, node_subset_from: Relationship, node_subset_to: Relationship):
+        """
+        Create a preferential_attachment relationship, with the first position in each
+        tuple constrained to be in `node_subset_from`, and the second position in
+        each tuple constrained to be in `node_subset_to`. Note this relationship
+        is not cached; it is specific to the callsite.
+        """
+        _preferential_attachment_rel = self._create_preferential_attachment_relationship(
+            node_subset_from=node_subset_from,
+            node_subset_to=node_subset_to
+        )
+        _preferential_attachment_rel.annotate(annotations.track("graphs", "preferential_attachment_from_to"))
+        return _preferential_attachment_rel
+    def _preferential_attachment_between(self, pair_subset_between: Relationship):
+        """
+        Create a preferential_attachment relationship, with the first and second position
+        in each tuple jointly constrained to be in the given set of pairs
+        of nodes. Note this relationship is not cached;
+        it is specific to the callsite.
+        """
+        _preferential_attachment_rel = self._create_preferential_attachment_relationship(
+            pair_subset_between=pair_subset_between
+        )
+        _preferential_attachment_rel.annotate(annotations.track("graphs", "preferential_attachment_between"))
+        return _preferential_attachment_rel
+    def _create_preferential_attachment_relationship(
+        self,
+        *,
+        node_subset_from: Optional[Relationship] = None,
+        node_subset_to: Optional[Relationship] = None,
+        pair_subset_between: Optional[Relationship] = None,
+    ):
+        """
+        Create preferential_attachment relationship, optionally constrained by
+        the provided node subsets or pair subset.
+        """
+        _preferential_attachment_rel = self._model.Relationship(
+            f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} "
+            f"have preferential attachment score {{score:Integer}}"
+        )
+        # Branch by case to select appropriate count_neighbor and isolated_node relationships,
+        # and to define relevant constraints on the separate and joint domains of node_u and node_v.
         node_u, node_v = self.Node.ref(), self.Node.ref()
-        count_u, count_v = Integer.ref(), Integer.ref()
-        # NOTE: We consider isolated nodes separately to maintain
-        #   the dense behavior of preferential attachment.
+        # Handle the `between` case.
+        if pair_subset_between is not None:
+            # Collect nodes that appear in the subset by position.
+            first_position_subset = self._model.Relationship(f"{{node:{self._NodeConceptStr}}}")
+            second_position_subset = self._model.Relationship(f"{{node:{self._NodeConceptStr}}}")
+            node_x, node_y = self.Node.ref(), self.Node.ref()
+            where(
+                pair_subset_between(node_x, node_y)
+            ).define(
+                first_position_subset(node_x),
+                second_position_subset(node_y)
+            )
+            # Constituents of non-isolated-nodes rule.
+            non_isolated_rule_uv_constraint = [pair_subset_between(node_u, node_v)]
+            count_neighbor_u_rel = self._count_neighbor_of(first_position_subset)
+            count_neighbor_v_rel = self._count_neighbor_of(second_position_subset)
+            # Constituents of u-isolated rule.
+            isolated_u_rel = self._isolated_node_of(first_position_subset)
+            isolated_u_rule_uv_constraint = [pair_subset_between(node_u, node_v)]
+            # Constituents of v-isolated rule.
+            isolated_v_rel = self._isolated_node_of(second_position_subset)
+            isolated_v_rule_uv_constraint = [pair_subset_between(node_u, node_v)]
+        # Handle the `from_` case.
+        elif node_subset_from is not None and node_subset_to is None:
+            # NOTE: It isn't necessary to compute _count_neighbor_of
+            #   and _isolated_node_of for node_subset_from, given
+            #   we have to compute _count_neighbor and _isolated_node
+            #   for the unconstrained second position anyway. That does
+            #   require additional constraints as seen below, though.
+            #
+            #   It's not clear to this author that there is a more clever
+            #   way to do this, given that in preferential attachment,
+            #   constraining one position implies no constraint on the
+            #   other position, unlike in, e.g., common neighbor?
+            # Constituents of non-isolated-nodes rule.
+            non_isolated_rule_uv_constraint = [node_subset_from(node_u)]
+            count_neighbor_u_rel = self._count_neighbor
+            count_neighbor_v_rel = self._count_neighbor
+            # Constituents of u-isolated rule.
+            isolated_u_rel = self._isolated_node
+            isolated_u_rule_uv_constraint = [
+                node_subset_from(node_u),
+                self.Node(node_v)
+            ]
+            # Constituents of v-isolated rule.
+            isolated_v_rel = self._isolated_node
+            isolated_v_rule_uv_constraint = [node_subset_from(node_u)]
-        # Case where node u is isolated, and node v is any node: score 0.
+        # Handle the `from_`/`to` case.
+        elif node_subset_from is not None and node_subset_to is not None:
+            # Check for object identity optimization.
+            if node_subset_from is node_subset_to:
+                # Constituents of non-isolated-nodes rule.
+                non_isolated_rule_uv_constraint = []
+                count_neighbor_u_rel = self._count_neighbor_of(node_subset_from)
+                count_neighbor_v_rel = count_neighbor_u_rel
+                # Constituents of u-isolated rule.
+                isolated_u_rel = self._isolated_node_of(node_subset_from)
+                isolated_u_rule_uv_constraint = [node_subset_to(node_v)]
+                # Constituents of v-isolated rule.
+                isolated_v_rel = isolated_u_rel
+                isolated_v_rule_uv_constraint = [node_subset_from(node_u)]
+            else:
+                # Constituents of non-isolated-nodes rule.
+                non_isolated_rule_uv_constraint = []
+                count_neighbor_u_rel = self._count_neighbor_of(node_subset_from)
+                count_neighbor_v_rel = self._count_neighbor_of(node_subset_to)
+                # Constituents of u-isolated rule.
+                isolated_u_rel = self._isolated_node_of(node_subset_from)
+                isolated_u_rule_uv_constraint = [node_subset_to(node_v)]
+                # Constituents of v-isolated rule.
+                isolated_v_rel = self._isolated_node_of(node_subset_to)
+                isolated_v_rule_uv_constraint = [node_subset_from(node_u)]
+        # Handle the `full` case.
+        else:
+            # Constituents of non-isolated-nodes rule.
+            non_isolated_rule_uv_constraint = []
+            count_neighbor_u_rel = self._count_neighbor
+            count_neighbor_v_rel = self._count_neighbor
+            # Constituents of u-isolated rule.
+            isolated_u_rel = self._isolated_node
+            isolated_u_rule_uv_constraint = [self.Node(node_v)]
+            # Constituents of v-isolated rule.
+            isolated_v_rel = self._isolated_node
+            isolated_v_rule_uv_constraint = [self.Node(node_u)]
+        # Define shared logic, which has three cases.
+        count_u, count_v = Integer.ref(), Integer.ref()
+        # Case where node u is isolated, and node v is any node (respecting constraints): score 0.
         where(
-            self._isolated_node(node_u),
-            self.Node(node_v),
+            isolated_u_rel(node_u),
+            *isolated_u_rule_uv_constraint,
         ).define(_preferential_attachment_rel(node_u, node_v, 0))
-        # Case where node u is any node, and node v is isolated: score 0.
+        # Case where node u is any node (respecting constraints), and node v is isolated: score 0.
         where(
-            self.Node(node_u),
-            self._isolated_node(node_v)
+            *isolated_v_rule_uv_constraint,
+            isolated_v_rel(node_v)
         ).define(_preferential_attachment_rel(node_u, node_v, 0))
         # Case where neither node is isolated: score is count_neighbor[u] * count_neighbor[v].
         where(
-            self._count_neighbor(node_u, count_u),
-            self._count_neighbor(node_v, count_v)
+            *non_isolated_rule_uv_constraint,
+            count_neighbor_u_rel(node_u, count_u),
+            count_neighbor_v_rel(node_v, count_v)
         ).define(_preferential_attachment_rel(node_u, node_v, count_u * count_v))
         return _preferential_attachment_rel
     @cached_property
     def _isolated_node(self):
+        """Lazily define and cache the self._isolated_node relationship."""
+        return self._create_isolated_node_relationship()
+    def _isolated_node_of(self, node_subset: Relationship):
         """
-        Lazily define and cache the self._isolated_node (helper, non-public) relationship.
-        At this time, exclusively a helper for preferential_attachment.
+        Create an _isolated_node relationship constrained to the subset of nodes
+        in `node_subset`. Note this relationship is not cached; it is
+        specific to the callsite.
+        """
+        return self._create_isolated_node_relationship(node_subset=node_subset)
+    def _create_isolated_node_relationship(
+        self,
+        *,
+        node_subset: Optional[Relationship] = None,
+    ):
+        """
+        Create _isolated_node relationship, optionally constrained by
+        the provided node subset.
         """
         _isolated_node_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} is isolated")
         neighbor_node = self.Node.ref()
+        if node_subset is not None:
+            neighbor_rel = self._neighbor_of(node_subset)
+            node_constraint = node_subset(self.Node)
+        else:
+            neighbor_rel = self._neighbor
+            node_constraint = self.Node
         where(
-            self.Node,
-            not_(self._neighbor(self.Node, neighbor_node))
+            node_constraint,
+            not_(neighbor_rel(self.Node, neighbor_node))
         ).define(_isolated_node_rel(self.Node))
         return _isolated_node_rel

relationalai 0.12.1__py3-none-any.whl → 0.12.3__py3-none-any.whl

relationalai 0.12.1py3-none-any.whl → 0.12.3py3-none-any.whl