relationalai 0.12.1__py3-none-any.whl → 0.12.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1332,21 +1332,6 @@ class Graph():
1332
1332
  # presently in use by the `cosine_similarity` and
1333
1333
  # `jaccard_similarity` relationships.
1334
1334
 
1335
- def _count_common_outneighbor_fragment(self, node_u, node_v):
1336
- """
1337
- Helper for cosine_similarity and jaccard_similarity that returns a fragment
1338
- that counts the common outneighbors of given nodes `node_u` and `node_v`.
1339
- """
1340
- common_outneighbor_node = self.Node.ref()
1341
- return (
1342
- count(common_outneighbor_node)
1343
- .per(node_u, node_v)
1344
- .where(
1345
- self._outneighbor(node_u, common_outneighbor_node),
1346
- self._outneighbor(node_v, common_outneighbor_node),
1347
- )
1348
- )
1349
-
1350
1335
  def _wu_dot_wv_fragment(self, node_u, node_v):
1351
1336
  """
1352
1337
  Helper for cosine_similarity that returns a fragment that produces an
@@ -5592,18 +5577,71 @@ class Graph():
5592
5577
 
5593
5578
 
5594
5579
  @include_in_docs
5595
- def jaccard_similarity(self):
5596
- """Returns a ternary relationship containing the Jaccard similarity for all pairs of nodes.
5580
+ def jaccard_similarity(
5581
+ self,
5582
+ *,
5583
+ full: Optional[bool] = None,
5584
+ from_: Optional[Relationship] = None,
5585
+ to: Optional[Relationship] = None,
5586
+ between: Optional[Relationship] = None,
5587
+ ):
5588
+ """Returns a ternary relationship containing
5589
+ the Jaccard similarity for pairs of nodes.
5597
5590
 
5598
5591
  The Jaccard similarity is a measure between two nodes that ranges from
5599
5592
  0.0 to 1.0, where higher values indicate greater similarity.
5600
5593
 
5594
+ Parameters
5595
+ ----------
5596
+ full : bool, optional
5597
+ If ``True``, computes the Jaccard similarity for all pairs
5598
+ of nodes in the graph. This computation can be expensive for large graphs,
5599
+ as the result can scale quadratically in the number of nodes. Mutually exclusive
5600
+ with other parameters.
5601
+ Default is ``None``.
5602
+ from_ : Relationship, optional
5603
+ A unary relationship containing a subset of the graph's nodes. When
5604
+ provided, constrains the domain of the Jaccard similarity computation: only
5605
+ Jaccard similarity scores for node pairs where the first node is
5606
+ in this relationship are computed and returned. Mutually exclusive with
5607
+ ``full`` and ``between``.
5608
+ Default is ``None``.
5609
+ to : Relationship, optional
5610
+ A unary relationship containing a subset of the graph's nodes. Can only
5611
+ be used together with the ``from_`` parameter. When provided with ``from_``,
5612
+ constrains the domain of the Jaccard similarity computation: only
5613
+ Jaccard similarity scores for node pairs where the first node is
5614
+ in ``from_`` and the second node is in ``to`` are computed and returned.
5615
+ Default is ``None``.
5616
+ between : Relationship, optional
5617
+ A binary relationship containing pairs of nodes. When provided,
5618
+ constrains the domain of the Jaccard similarity computation: only
5619
+ Jaccard similarity scores for the specific node pairs in
5620
+ this relationship are computed and returned. Mutually exclusive
5621
+ with other parameters.
5622
+ Default is ``None``.
5623
+
5601
5624
  Returns
5602
5625
  -------
5603
5626
  Relationship
5604
5627
  A ternary relationship where each tuple represents a pair of nodes
5605
5628
  and their Jaccard similarity.
5606
5629
 
5630
+ Raises
5631
+ ------
5632
+ ValueError
5633
+ If ``full`` is provided with any other parameter.
5634
+ If ``between`` is provided with any other parameter.
5635
+ If ``from_`` is provided with any parameter other than ``to``.
5636
+ If none of ``full``, ``from_``, or ``between`` is provided.
5637
+ If ``full`` is not ``True`` or ``None``.
5638
+ AssertionError
5639
+ If ``from_``, ``to``, or ``between`` is not a ``Relationship``.
5640
+ If ``from_``, ``to``, or ``between`` is not attached to the same model as the graph.
5641
+ If ``from_``, ``to``, or ``between`` does not contain the graph's ``Node`` concept.
5642
+ If ``from_`` or ``to`` is not a unary relationship.
5643
+ If ``between`` is not a binary relationship.
5644
+
5607
5645
  Relationship Schema
5608
5646
  -------------------
5609
5647
  ``jaccard_similarity(node_u, node_v, score)``
@@ -5652,6 +5690,40 @@ class Graph():
5652
5690
  The weighted Jaccard similarity between node 1 and 2 is then:
5653
5691
  `0.46 / (1.6 + 1.6 + 1.4) = 0.1`.
5654
5692
 
5693
+ Edge weights are assumed to be non-negative, so the neighborhood
5694
+ vectors contain only non-negative elements. Therefore, the Jaccard
5695
+ similarity score is always between 0.0 and 1.0, inclusive.
5696
+
5697
+ The ``jaccard_similarity(full=True)`` method computes and caches
5698
+ the full Jaccard similarity relationship for all pairs of nodes,
5699
+ providing efficient reuse across multiple calls. This can be expensive
5700
+ as the result can contain O(|V|²) tuples.
5701
+
5702
+ Calling ``jaccard_similarity()`` without arguments raises a ``ValueError``,
5703
+ to ensure awareness and explicit acknowledgement (``full=True``) of this cost.
5704
+
5705
+ In contrast, ``jaccard_similarity(from_=subset)`` constrains the computation to
5706
+ tuples with the first position in the passed-in ``subset``. The result is
5707
+ not cached; it is specific to the call site. When a significant fraction of
5708
+ the Jaccard similarity relation is needed across a program,
5709
+ ``jaccard_similarity(full=True)`` is typically more efficient. Use
5710
+ ``jaccard_similarity(from_=subset)`` only when small subsets of
5711
+ the Jaccard similarity relationship are needed
5712
+ collectively across the program.
5713
+
5714
+ The ``to`` parameter can be used together with ``from_`` to further
5715
+ constrain the computation: ``jaccard_similarity(from_=subset_a, to=subset_b)``
5716
+ computes Jaccard similarity scores only for node pairs where the first node is in
5717
+ ``subset_a`` and the second node is in ``subset_b``. (Since ``jaccard_similarity``
5718
+ is symmetric in its first two positions, using ``to`` without ``from_`` would
5719
+ be functionally redundant, and is not allowed.)
5720
+
5721
+ The ``between`` parameter provides another way to constrain the computation.
5722
+ Unlike ``from_`` and ``to``, which allow you to independently constrain the first
5723
+ and second positions in ``jaccard_similarity`` tuples to sets of nodes, ``between``
5724
+ allows you constrain the first and second positions, jointly, to specific pairs
5725
+ of nodes.
5726
+
5655
5727
  Examples
5656
5728
  --------
5657
5729
  **Unweighted Graph Examples**
@@ -5673,8 +5745,8 @@ class Graph():
5673
5745
  ... Edge.new(src=n4, dst=n3),
5674
5746
  ... )
5675
5747
  >>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
5676
- >>> jaccard = graph.jaccard_similarity()
5677
- >>> select(score).where(jaccard(u, v, score), u.id == 2, v.id == 4).inspect()
5748
+ >>> jaccard_similarity = graph.jaccard_similarity(full=True)
5749
+ >>> select(score).where(jaccard_similarity(u, v, score), u.id == 2, v.id == 4).inspect()
5678
5750
  ▰▰▰▰ Setup complete
5679
5751
  score
5680
5752
  0 0.25
@@ -5696,8 +5768,8 @@ class Graph():
5696
5768
  ... Edge.new(src=n4, dst=n3),
5697
5769
  ... )
5698
5770
  >>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
5699
- >>> jaccard = graph.jaccard_similarity()
5700
- >>> select(score).where(jaccard(u, v, score), u.id == 2, v.id == 4).inspect()
5771
+ >>> jaccard_similarity = graph.jaccard_similarity(full=True)
5772
+ >>> select(score).where(jaccard_similarity(u, v, score), u.id == 2, v.id == 4).inspect()
5701
5773
  ▰▰▰▰ Setup complete
5702
5774
  score
5703
5775
  0 0.5
@@ -5724,12 +5796,57 @@ class Graph():
5724
5796
  >>>
5725
5797
  >>> # 3. Select the weighted Jaccard similarity for the pair (1, 2)
5726
5798
  >>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
5727
- >>> jaccard = graph.jaccard_similarity()
5728
- >>> select(score).where(jaccard(u, v, score), u.id == 1, v.id == 2).inspect()
5799
+ >>> jaccard_similarity = graph.jaccard_similarity(full=True)
5800
+ >>> select(score).where(jaccard_similarity(u, v, score), u.id == 1, v.id == 2).inspect()
5729
5801
  ▰▰▰▰ Setup complete
5730
5802
  score
5731
5803
  0 0.1
5732
5804
 
5805
+ **Domain Constraint Examples**
5806
+
5807
+ >>> # Use 'from_' parameter to constrain the set of nodes for the first position
5808
+ >>> # Using the same undirected unweighted graph from above
5809
+ >>> from relationalai.semantics import where
5810
+ >>> subset = model.Relationship(f"{{node:{Node}}} is in subset")
5811
+ >>> node = Node.ref()
5812
+ >>> where(node.id == 2).define(subset(node))
5813
+ >>>
5814
+ >>> # Get Jaccard similarity scores only for pairs where first node is in subset
5815
+ >>> constrained_jaccard_similarity = graph.jaccard_similarity(from_=subset)
5816
+ >>> select(u.id, v.id, score).where(constrained_jaccard_similarity(u, v, score)).inspect()
5817
+ ▰▰▰▰ Setup complete
5818
+ id id2 score
5819
+ 0 2 2 1.00
5820
+ 1 2 3 0.50
5821
+ 2 2 4 0.25
5822
+
5823
+ >>> # Use both 'from_' and 'to' parameters to constrain both positions
5824
+ >>> from_subset = model.Relationship(f"{{node:{Node}}} is in from_subset")
5825
+ >>> to_subset = model.Relationship(f"{{node:{Node}}} is in to_subset")
5826
+ >>> where(node.id == 2).define(from_subset(node))
5827
+ >>> where(node.id == 4).define(to_subset(node))
5828
+ >>>
5829
+ >>> # Get Jaccard similarity scores only where first node is in from_subset and second node is in to_subset
5830
+ >>> constrained_jaccard_similarity = graph.jaccard_similarity(from_=from_subset, to=to_subset)
5831
+ >>> select(u.id, v.id, score).where(constrained_jaccard_similarity(u, v, score)).inspect()
5832
+ ▰▰▰▰ Setup complete
5833
+ id id2 score
5834
+ 0 2 4 0.25
5835
+
5836
+ >>> # Use 'between' parameter to constrain to specific pairs of nodes
5837
+ >>> pairs = model.Relationship(f"{{node_a:{Node}}} and {{node_b:{Node}}} are a pair")
5838
+ >>> node_a, node_b = Node.ref(), Node.ref()
5839
+ >>> where(node_a.id == 2, node_b.id == 4).define(pairs(node_a, node_b))
5840
+ >>> where(node_a.id == 3, node_b.id == 4).define(pairs(node_a, node_b))
5841
+ >>>
5842
+ >>> # Get Jaccard similarity scores only for the specific pairs (2, 4) and (3, 4)
5843
+ >>> constrained_jaccard_similarity = graph.jaccard_similarity(between=pairs)
5844
+ >>> select(u.id, v.id, score).where(constrained_jaccard_similarity(u, v, score)).inspect()
5845
+ ▰▰▰▰ Setup complete
5846
+ id id2 score
5847
+ 0 2 4 0.25
5848
+ 1 3 4 0.50
5849
+
5733
5850
  References
5734
5851
  ----------
5735
5852
  Frigo M, Cruciani E, Coudert D, Deriche R, Natale E, Deslauriers-Gauthier S.
@@ -5738,57 +5855,242 @@ class Graph():
5738
5855
  doi: 10.1162/netn_a_00199. PMID: 34746624; PMCID: PMC8567827.
5739
5856
 
5740
5857
  """
5741
- warnings.warn(
5742
- (
5743
- "`jaccard_similarity` presently always computes the similarity "
5744
- "of all pairs of nodes of the graph. To provide better control over "
5745
- "the computed subset, `jaccard_similarity`'s interface will soon "
5746
- "need to change."
5747
- ),
5748
- FutureWarning,
5749
- stacklevel=2
5858
+ # Validate domain constraint parameters.
5859
+ self._validate_domain_constraint_parameters(
5860
+ 'jaccard_similarity', full, from_, to, between
5750
5861
  )
5862
+
5863
+ # At this point, exactly one of `full`, `from_`, or `between`
5864
+ # has been provided, and if `to` is provided, `from_` is also provided.
5865
+
5866
+ # Handle `between`.
5867
+ if between is not None:
5868
+ self._validate_pair_subset_parameter(between)
5869
+ return self._jaccard_similarity_between(between)
5870
+
5871
+ # Handle `from_` (and potentially `to`).
5872
+ if from_ is not None:
5873
+ self._validate_node_subset_parameter('from_', from_)
5874
+ if to is not None:
5875
+ self._validate_node_subset_parameter('to', to)
5876
+ return self._jaccard_similarity_from_to(from_, to)
5877
+ return self._jaccard_similarity_from(from_)
5878
+
5879
+ # Handle `full`.
5751
5880
  return self._jaccard_similarity
5752
5881
 
5753
5882
  @cached_property
5754
5883
  def _jaccard_similarity(self):
5755
- """Lazily define and cache the self._jaccard_similarity relationship."""
5756
- _jaccard_similarity_rel = self._model.Relationship(f"{{node_u:{self._NodeConceptStr}}} has a similarity to {{node_v:{self._NodeConceptStr}}} of {{similarity:Float}}")
5884
+ """Lazily define and cache the full jaccard_similarity relationship."""
5885
+ _jaccard_similarity_rel = self._create_jaccard_similarity_relationship()
5757
5886
  _jaccard_similarity_rel.annotate(annotations.track("graphs", "jaccard_similarity"))
5887
+ return _jaccard_similarity_rel
5758
5888
 
5759
- if not self.weighted:
5760
- node_u, node_v = self.Node.ref(), self.Node.ref()
5761
- num_union_outneighbors, num_u_outneigbor, num_v_outneigbor, f = Integer.ref(),\
5762
- Integer.ref(), Integer.ref(), Float.ref()
5763
-
5764
- where(num_common_outneighbor := self._count_common_outneighbor_fragment(node_u, node_v),
5765
- self._count_outneighbor(node_u, num_u_outneigbor),
5766
- self._count_outneighbor(node_v, num_v_outneigbor),
5767
- num_union_outneighbors := num_u_outneigbor + num_v_outneigbor - num_common_outneighbor,
5768
- f := num_common_outneighbor / num_union_outneighbors).define(_jaccard_similarity_rel(node_u, node_v, f))
5889
+ def _jaccard_similarity_from(self, node_subset_from: Relationship):
5890
+ """
5891
+ Create a jaccard_similarity relationship, with the first position in each
5892
+ tuple constrained to be in the given subset of nodes. Note this relationship
5893
+ is not cached; it is specific to the callsite.
5894
+ """
5895
+ _jaccard_similarity_rel = self._create_jaccard_similarity_relationship(
5896
+ node_subset_from=node_subset_from
5897
+ )
5898
+ _jaccard_similarity_rel.annotate(annotations.track("graphs", "jaccard_similarity_from"))
5899
+ return _jaccard_similarity_rel
5900
+
5901
+ def _jaccard_similarity_from_to(self, node_subset_from: Relationship, node_subset_to: Relationship):
5902
+ """
5903
+ Create a jaccard_similarity relationship, with the first position in each
5904
+ tuple constrained to be in `node_subset_from`, and the second position in
5905
+ each tuple constrained to be in `node_subset_to`. Note this relationship
5906
+ is not cached; it is specific to the callsite.
5907
+ """
5908
+ _jaccard_similarity_rel = self._create_jaccard_similarity_relationship(
5909
+ node_subset_from=node_subset_from,
5910
+ node_subset_to=node_subset_to
5911
+ )
5912
+ _jaccard_similarity_rel.annotate(annotations.track("graphs", "jaccard_similarity_from_to"))
5913
+ return _jaccard_similarity_rel
5914
+
5915
+ def _jaccard_similarity_between(self, pair_subset_between: Relationship):
5916
+ """
5917
+ Create a jaccard_similarity relationship, with the first and second position
5918
+ in each tuple jointly constrained to be in the given set of pairs
5919
+ of nodes. Note this relationship is not cached;
5920
+ it is specific to the callsite.
5921
+ """
5922
+ _jaccard_similarity_rel = self._create_jaccard_similarity_relationship(
5923
+ pair_subset_between=pair_subset_between
5924
+ )
5925
+ _jaccard_similarity_rel.annotate(annotations.track("graphs", "jaccard_similarity_between"))
5926
+ return _jaccard_similarity_rel
5927
+
5928
+ def _create_jaccard_similarity_relationship(
5929
+ self,
5930
+ *,
5931
+ node_subset_from: Optional[Relationship] = None,
5932
+ node_subset_to: Optional[Relationship] = None,
5933
+ pair_subset_between: Optional[Relationship] = None,
5934
+ ):
5935
+ """
5936
+ Create jaccard_similarity relationship, optionally constrained by
5937
+ the provided node subsets or pair subset.
5938
+ """
5939
+ _jaccard_similarity_rel = self._model.Relationship(
5940
+ f"{{node_u:{self._NodeConceptStr}}} has a Jaccard similarity to "
5941
+ f"{{node_v:{self._NodeConceptStr}}} of {{score:Float}}"
5942
+ )
5943
+
5944
+ # Branch by case to select appropriate count_outneighbor,
5945
+ # outneighbor, and weighted_outdegree relationships, and build
5946
+ # appropriate constraints on the domain of the nodes.
5947
+ node_u, node_v = self.Node.ref(), self.Node.ref()
5948
+
5949
+ # TODO: Optimization opportunity. In a number of branches below,
5950
+ # we compute _count_outneighbor_of, which transitively computes
5951
+ # _outneighbor_of, and then compute _outneighbor_of directly;
5952
+ # the present code structure makes this a developer-time-efficient
5953
+ # way to get this off the ground, but of course involves redundant
5954
+ # work. In future this redundant work could be eliminated.
5955
+
5956
+ # Handle the `between` case.
5957
+ if pair_subset_between is not None:
5958
+ # Extract first-position and second-position nodes.
5959
+ first_position_subset = self._model.Relationship(f"{{node:{self._NodeConceptStr}}}")
5960
+ second_position_subset = self._model.Relationship(f"{{node:{self._NodeConceptStr}}}")
5961
+ node_x, node_y = self.Node.ref(), self.Node.ref()
5962
+ where(
5963
+ pair_subset_between(node_x, node_y)
5964
+ ).define(
5965
+ first_position_subset(node_x),
5966
+ second_position_subset(node_y)
5967
+ )
5968
+
5969
+ if not self.weighted:
5970
+ count_outneighbor_u_rel = self._count_outneighbor_of(first_position_subset)
5971
+ count_outneighbor_v_rel = self._count_outneighbor_of(second_position_subset)
5972
+ outneighbor_u_rel = self._outneighbor_of(first_position_subset)
5973
+ outneighbor_v_rel = self._outneighbor_of(second_position_subset)
5974
+ else: # self.weighted
5975
+ weighted_outdegree_u_rel = self._weighted_outdegree_of(first_position_subset)
5976
+ weighted_outdegree_v_rel = self._weighted_outdegree_of(second_position_subset)
5977
+
5978
+ node_constraints = [pair_subset_between(node_u, node_v)]
5979
+
5980
+ # Handle the `from_` case.
5981
+ elif node_subset_from is not None and node_subset_to is None:
5982
+ if not self.weighted:
5983
+ count_outneighbor_u_rel = self._count_outneighbor_of(node_subset_from)
5984
+ count_outneighbor_v_rel = self._count_outneighbor
5985
+ outneighbor_u_rel = self._outneighbor_of(node_subset_from)
5986
+ outneighbor_v_rel = self._outneighbor
5987
+ else: # self.weighted
5988
+ weighted_outdegree_u_rel = self._weighted_outdegree_of(node_subset_from)
5989
+ weighted_outdegree_v_rel = self._weighted_outdegree
5990
+
5991
+ # TODO: Implement depth-two traversal strategy for better performance.
5992
+ # See similar comments on related similarity metrics.
5993
+
5994
+ node_constraints = [node_subset_from(node_u)]
5995
+
5996
+ # Handle the `from_`/`to` case.
5997
+ elif node_subset_from is not None and node_subset_to is not None:
5998
+ # Check for object identity optimization.
5999
+ if node_subset_from is node_subset_to:
6000
+ if not self.weighted:
6001
+ count_outneighbor_u_rel = self._count_outneighbor_of(node_subset_from)
6002
+ count_outneighbor_v_rel = count_outneighbor_u_rel
6003
+ outneighbor_u_rel = self._outneighbor_of(node_subset_from)
6004
+ outneighbor_v_rel = outneighbor_u_rel
6005
+ else: # self.weighted
6006
+ weighted_outdegree_u_rel = self._weighted_outdegree_of(node_subset_from)
6007
+ weighted_outdegree_v_rel = weighted_outdegree_u_rel
6008
+ else:
6009
+ if not self.weighted:
6010
+ count_outneighbor_u_rel = self._count_outneighbor_of(node_subset_from)
6011
+ count_outneighbor_v_rel = self._count_outneighbor_of(node_subset_to)
6012
+ outneighbor_u_rel = self._outneighbor_of(node_subset_from)
6013
+ outneighbor_v_rel = self._outneighbor_of(node_subset_to)
6014
+ else: # self.weighted
6015
+ weighted_outdegree_u_rel = self._weighted_outdegree_of(node_subset_from)
6016
+ weighted_outdegree_v_rel = self._weighted_outdegree_of(node_subset_to)
6017
+
6018
+ node_constraints = [node_subset_from(node_u), node_subset_to(node_v)]
6019
+
6020
+ # Handle the `full` case.
5769
6021
  else:
5770
- # TODO (dba) Annotate local relationships in this scope with `@ondemand` once available.
6022
+ if not self.weighted:
6023
+ count_outneighbor_u_rel = self._count_outneighbor
6024
+ count_outneighbor_v_rel = self._count_outneighbor
6025
+ outneighbor_u_rel = self._outneighbor
6026
+ outneighbor_v_rel = self._outneighbor
6027
+ else: # self.weighted
6028
+ weighted_outdegree_u_rel = self._weighted_outdegree
6029
+ weighted_outdegree_v_rel = self._weighted_outdegree
5771
6030
 
6031
+ node_constraints = []
6032
+
6033
+ # Define Jaccard similarity logic for weighted and unweighted cases.
6034
+ if not self.weighted:
6035
+ num_u_outneigbor, num_v_outneigbor = Integer.ref(), Integer.ref()
6036
+ common_outneighbor_node = self.Node.ref()
6037
+ num_union_outneighbors = Integer.ref()
6038
+ score = Float.ref()
6039
+
6040
+ where(
6041
+ *node_constraints,
6042
+ count_outneighbor_u_rel(node_u, num_u_outneigbor), # type: ignore[possibly-unbound]
6043
+ count_outneighbor_v_rel(node_v, num_v_outneigbor), # type: ignore[possibly-unbound]
6044
+ num_common_outneighbor := count(common_outneighbor_node).per(node_u, node_v).where(
6045
+ outneighbor_u_rel(node_u, common_outneighbor_node), # type: ignore[possibly-unbound]
6046
+ outneighbor_v_rel(node_v, common_outneighbor_node), # type: ignore[possibly-unbound]
6047
+ ),
6048
+ num_union_outneighbors := num_u_outneigbor + num_v_outneigbor - num_common_outneighbor,
6049
+ score := num_common_outneighbor / num_union_outneighbors,
6050
+ ).define(
6051
+ _jaccard_similarity_rel(node_u, node_v, score)
6052
+ )
6053
+ else:
5772
6054
  # (1) The numerator: For every node `k` in the graph, find the minimum weight of
5773
6055
  # the out-edges from `u` and `v` to `k`, and sum those minimum weights.
5774
6056
 
5775
6057
  # Note that for any node `k` that is not a common out-neighbor of nodes `u` and `v`,
5776
6058
  # the minimum weight of the out-edges from `u` and `v` to `k` is zero/empty,
5777
6059
  # so the sum here reduces to a sum over the common out-neighbors of `u` and `v`.
5778
- min_weight_to_common_outneighbor = self._model.Relationship(f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} have common outneighbor {{node_k:{self._NodeConceptStr}}} with minimum weight {{minweight:Float}}")
6060
+ min_weight_to_common_outneighbor = self._model.Relationship(
6061
+ f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} "
6062
+ f"have common outneighbor {{node_k:{self._NodeConceptStr}}} "
6063
+ f"with minimum weight {{minweight:Float}}"
6064
+ )
5779
6065
 
5780
- node_u, node_v, node_k, w1, w2 = self.Node.ref(), self.Node.ref(), self.Node.ref(), Float.ref(), Float.ref()
5781
- w = union(where(self._weight(node_u, node_k, w1)).select(w1),
5782
- where(self._weight(node_v, node_k, w2)).select(w2))
5783
- where(self._edge(node_u, node_k),
5784
- self._edge(node_v, node_k))\
5785
- .define(min_weight_to_common_outneighbor(node_u, node_v, node_k, min(w).per(node_u, node_v, node_k)))
6066
+ node_k, w1, w2 = self.Node.ref(), Float.ref(), Float.ref()
6067
+ w = union(
6068
+ where(self._weight(node_u, node_k, w1)).select(w1),
6069
+ where(self._weight(node_v, node_k, w2)).select(w2)
6070
+ )
6071
+ where(
6072
+ *node_constraints,
6073
+ self._edge(node_u, node_k),
6074
+ self._edge(node_v, node_k)
6075
+ ).define(
6076
+ min_weight_to_common_outneighbor(
6077
+ node_u, node_v, node_k, min(w).per(node_u, node_v, node_k)
6078
+ )
6079
+ )
5786
6080
 
5787
- sum_of_min_weights_to_common_outneighbors = self._model.Relationship(f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} have a sum of minweights of {{minsum:Float}}")
6081
+ sum_of_min_weights_to_common_outneighbors = self._model.Relationship(
6082
+ f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} "
6083
+ f"have a sum of minweights of {{minsum:Float}}"
6084
+ )
5788
6085
 
5789
6086
  minweight = Float.ref()
5790
- where(min_weight_to_common_outneighbor(node_u, node_v, node_k, minweight)
5791
- ).define(sum_of_min_weights_to_common_outneighbors(node_u, node_v, sum(node_k, minweight).per(node_u, node_v)))
6087
+ where(
6088
+ min_weight_to_common_outneighbor(node_u, node_v, node_k, minweight)
6089
+ ).define(
6090
+ sum_of_min_weights_to_common_outneighbors(
6091
+ node_u, node_v, sum(node_k, minweight).per(node_u, node_v)
6092
+ )
6093
+ )
5792
6094
 
5793
6095
  # (2) The denominator: For every node `k` in the graph, find the maximum weight of
5794
6096
  # the out-edges from `u` and `v` to `k`, and sum those maximum weights.
@@ -5827,20 +6129,31 @@ class Graph():
5827
6129
  # self._weighted_outdegree(u) +
5828
6130
  # self._weighted_outdegree(v) -
5829
6131
  # _sum_of_min_weights_to_common_outneighbors(u, v)
5830
- sum_of_max_weights_to_other_nodes = self._model.Relationship(f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} have a maxsum of {{maxsum:Float}}")
6132
+ sum_of_max_weights_to_other_nodes = self._model.Relationship(
6133
+ f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} "
6134
+ f"have a maxsum of {{maxsum:Float}}"
6135
+ )
5831
6136
 
5832
6137
  u_outdegree, v_outdegree, maxsum, minsum = Float.ref(), Float.ref(), Float.ref(), Float.ref()
5833
- where(self._weighted_outdegree(node_u, u_outdegree),
5834
- self._weighted_outdegree(node_v, v_outdegree),
5835
- sum_of_min_weights_to_common_outneighbors(node_u, node_v, minsum),
5836
- maxsum == u_outdegree + v_outdegree - minsum
5837
- ).define(sum_of_max_weights_to_other_nodes(node_u, node_v, maxsum))
6138
+ where(
6139
+ *node_constraints,
6140
+ weighted_outdegree_u_rel(node_u, u_outdegree), # type: ignore[possibly-unbound]
6141
+ weighted_outdegree_v_rel(node_v, v_outdegree), # type: ignore[possibly-unbound]
6142
+ sum_of_min_weights_to_common_outneighbors(node_u, node_v, minsum),
6143
+ maxsum == u_outdegree + v_outdegree - minsum
6144
+ ).define(
6145
+ sum_of_max_weights_to_other_nodes(node_u, node_v, maxsum)
6146
+ )
5838
6147
 
6148
+ # Combination of (1) and (2) to produce score.
5839
6149
  score = Float.ref()
5840
- where(sum_of_min_weights_to_common_outneighbors(node_u, node_v, minsum),
5841
- sum_of_max_weights_to_other_nodes(node_u, node_v, maxsum),
5842
- score == minsum/maxsum
5843
- ).define(_jaccard_similarity_rel(node_u, node_v, score))
6150
+ where(
6151
+ sum_of_min_weights_to_common_outneighbors(node_u, node_v, minsum),
6152
+ sum_of_max_weights_to_other_nodes(node_u, node_v, maxsum),
6153
+ score == minsum / maxsum
6154
+ ).define(
6155
+ _jaccard_similarity_rel(node_u, node_v, score)
6156
+ )
5844
6157
 
5845
6158
  return _jaccard_similarity_rel
5846
6159
 
@@ -6662,19 +6975,72 @@ class Graph():
6662
6975
 
6663
6976
 
6664
6977
  @include_in_docs
6665
- def preferential_attachment(self):
6666
- """Returns a ternary relationship containing the preferential attachment score for all pairs of nodes.
6978
+ def preferential_attachment(
6979
+ self,
6980
+ *,
6981
+ full: Optional[bool] = None,
6982
+ from_: Optional[Relationship] = None,
6983
+ to: Optional[Relationship] = None,
6984
+ between: Optional[Relationship] = None,
6985
+ ):
6986
+ """Returns a ternary relationship containing
6987
+ the preferential attachment score for pairs of nodes.
6667
6988
 
6668
6989
  The preferential attachment score between two nodes `u` and `v` is the
6669
6990
  number of nodes adjacent to `u` multiplied by the number of nodes
6670
6991
  adjacent to `v`.
6671
6992
 
6993
+ Parameters
6994
+ ----------
6995
+ full : bool, optional
6996
+ If ``True``, computes the preferential attachment score for all pairs
6997
+ of nodes in the graph. This computation can be expensive for large graphs,
6998
+ as the result can scale quadratically in the number of nodes. Mutually exclusive
6999
+ with other parameters.
7000
+ Default is ``None``.
7001
+ from_ : Relationship, optional
7002
+ A unary relationship containing a subset of the graph's nodes. When
7003
+ provided, constrains the domain of the preferential attachment computation: only
7004
+ preferential attachment scores for node pairs where the first node is
7005
+ in this relationship are computed and returned. Mutually exclusive with
7006
+ ``full`` and ``between``.
7007
+ Default is ``None``.
7008
+ to : Relationship, optional
7009
+ A unary relationship containing a subset of the graph's nodes. Can only
7010
+ be used together with the ``from_`` parameter. When provided with ``from_``,
7011
+ constrains the domain of the preferential attachment computation: only
7012
+ preferential attachment scores for node pairs where the first node is
7013
+ in ``from_`` and the second node is in ``to`` are computed and returned.
7014
+ Default is ``None``.
7015
+ between : Relationship, optional
7016
+ A binary relationship containing pairs of nodes. When provided,
7017
+ constrains the domain of the preferential attachment computation: only
7018
+ preferential attachment scores for the specific node pairs in
7019
+ this relationship are computed and returned. Mutually exclusive
7020
+ with other parameters.
7021
+ Default is ``None``.
7022
+
6672
7023
  Returns
6673
7024
  -------
6674
7025
  Relationship
6675
7026
  A ternary relationship where each tuple represents a pair of nodes
6676
7027
  and their preferential attachment score.
6677
7028
 
7029
+ Raises
7030
+ ------
7031
+ ValueError
7032
+ If ``full`` is provided with any other parameter.
7033
+ If ``between`` is provided with any other parameter.
7034
+ If ``from_`` is provided with any parameter other than ``to``.
7035
+ If none of ``full``, ``from_``, or ``between`` is provided.
7036
+ If ``full`` is not ``True`` or ``None``.
7037
+ AssertionError
7038
+ If ``from_``, ``to``, or ``between`` is not a ``Relationship``.
7039
+ If ``from_``, ``to``, or ``between`` is not attached to the same model as the graph.
7040
+ If ``from_``, ``to``, or ``between`` does not contain the graph's ``Node`` concept.
7041
+ If ``from_`` or ``to`` is not a unary relationship.
7042
+ If ``between`` is not a binary relationship.
7043
+
6678
7044
  Relationship Schema
6679
7045
  -------------------
6680
7046
  ``preferential_attachment(node_u, node_v, score)``
@@ -6691,6 +7057,38 @@ class Graph():
6691
7057
  | Directed | Yes | |
6692
7058
  | Weighted | Yes | Weights are ignored. |
6693
7059
 
7060
+ Notes
7061
+ -----
7062
+ The ``preferential_attachment(full=True)`` method computes and caches
7063
+ the full preferential attachment relationship for all pairs of nodes,
7064
+ providing efficient reuse across multiple calls. This can be expensive
7065
+ as the result contains O(|V|²) tuples.
7066
+
7067
+ Calling ``preferential_attachment()`` without arguments raises a ``ValueError``,
7068
+ to ensure awareness and explicit acknowledgement (``full=True``) of this cost.
7069
+
7070
+ In contrast, ``preferential_attachment(from_=subset)`` constrains the computation to
7071
+ tuples with the first position in the passed-in ``subset``. The result is
7072
+ not cached; it is specific to the call site. When a significant fraction of
7073
+ the preferential attachment relation is needed across a program,
7074
+ ``preferential_attachment(full=True)`` is typically more efficient. Use
7075
+ ``preferential_attachment(from_=subset)`` only when small subsets of
7076
+ the preferential attachment relationship are needed
7077
+ collectively across the program.
7078
+
7079
+ The ``to`` parameter can be used together with ``from_`` to further
7080
+ constrain the computation: ``preferential_attachment(from_=subset_a, to=subset_b)``
7081
+ computes preferential attachment scores only for node pairs where the first node is in
7082
+ ``subset_a`` and the second node is in ``subset_b``. (Since ``preferential_attachment``
7083
+ is symmetric in its first two positions, using ``to`` without ``from_``would
7084
+ be functionally redundant, and is not allowed.)
7085
+
7086
+ The ``between`` parameter provides another way to constrain the computation.
7087
+ Unlike ``from_`` and ``to``, which allow you to independently constrain the first
7088
+ and second positions in ``preferential_attachment`` tuples to sets of nodes, ``between``
7089
+ allows you constrain the first and second positions, jointly, to specific pairs
7090
+ of nodes.
7091
+
6694
7092
  Examples
6695
7093
  --------
6696
7094
  >>> from relationalai.semantics import Model, define, select, Integer
@@ -6712,10 +7110,10 @@ class Graph():
6712
7110
  ... Edge.new(src=n4, dst=n3),
6713
7111
  ... )
6714
7112
  >>>
6715
- >>> # 3. Select the preferential attachment score for the pair (1, 3)
7113
+ >>> # 3. Select the preferential attachment scores from the full relationship
6716
7114
  >>> u, v = Node.ref("u"), Node.ref("v")
6717
7115
  >>> score = Integer.ref("score")
6718
- >>> preferential_attachment = graph.preferential_attachment()
7116
+ >>> preferential_attachment = graph.preferential_attachment(full=True)
6719
7117
  >>> select(
6720
7118
  ... u.id, v.id, score,
6721
7119
  ... ).where(
@@ -6727,64 +7125,302 @@ class Graph():
6727
7125
  id id2 score
6728
7126
  0 1 3 3
6729
7127
 
7128
+ >>> # 4. Use 'from_' parameter to constrain the set of nodes for the first position
7129
+ >>> # Define a subset containing only node 1
7130
+ >>> from relationalai.semantics import where
7131
+ >>> subset = model.Relationship(f"{{node:{Node}}} is in subset")
7132
+ >>> node = Node.ref()
7133
+ >>> where(node.id == 1).define(subset(node))
7134
+ >>>
7135
+ >>> # Get preferential attachment scores only for pairs where first node is in subset
7136
+ >>> constrained_preferential_attachment = graph.preferential_attachment(from_=subset)
7137
+ >>> select(u.id, v.id, score).where(constrained_preferential_attachment(u, v, score)).inspect()
7138
+ ▰▰▰▰ Setup complete
7139
+ id id2 score
7140
+ 0 1 1 1
7141
+ 1 1 2 3
7142
+ 2 1 3 3
7143
+ 3 1 4 3
7144
+
7145
+ >>> # 5. Use both 'from_' and 'to' parameters to constrain both positions
7146
+ >>> from_subset = model.Relationship(f"{{node:{Node}}} is in from_subset")
7147
+ >>> to_subset = model.Relationship(f"{{node:{Node}}} is in to_subset")
7148
+ >>> where(node.id == 1).define(from_subset(node))
7149
+ >>> where(node.id == 3).define(to_subset(node))
7150
+ >>>
7151
+ >>> # Get preferential attachment scores only where first node is in from_subset and second node is in to_subset
7152
+ >>> constrained_preferential_attachment = graph.preferential_attachment(from_=from_subset, to=to_subset)
7153
+ >>> select(u.id, v.id, score).where(constrained_preferential_attachment(u, v, score)).inspect()
7154
+ ▰▰▰▰ Setup complete
7155
+ id id2 score
7156
+ 0 1 3 3
7157
+
7158
+ >>> # 6. Use 'between' parameter to constrain to specific pairs of nodes
7159
+ >>> pairs = model.Relationship(f"{{node_a:{Node}}} and {{node_b:{Node}}} are a pair")
7160
+ >>> node_a, node_b = Node.ref(), Node.ref()
7161
+ >>> where(node_a.id == 1, node_b.id == 3).define(pairs(node_a, node_b))
7162
+ >>> where(node_a.id == 2, node_b.id == 4).define(pairs(node_a, node_b))
7163
+ >>>
7164
+ >>> # Get preferential attachment scores only for the specific pairs (1, 3) and (2, 4)
7165
+ >>> constrained_preferential_attachment = graph.preferential_attachment(between=pairs)
7166
+ >>> select(u.id, v.id, score).where(constrained_preferential_attachment(u, v, score)).inspect()
7167
+ ▰▰▰▰ Setup complete
7168
+ id id2 score
7169
+ 0 1 3 3
7170
+ 1 2 4 6
7171
+
6730
7172
  """
6731
- warnings.warn(
6732
- (
6733
- "`preferential_attachment` presently always computes the similarity "
6734
- "of all pairs of nodes of the graph. To provide better control over "
6735
- "the computed subset, `preferential_attachment`'s interface will soon "
6736
- "need to change."
6737
- ),
6738
- FutureWarning,
6739
- stacklevel=2
7173
+ # Validate domain constraint parameters.
7174
+ self._validate_domain_constraint_parameters(
7175
+ 'preferential_attachment', full, from_, to, between
6740
7176
  )
6741
7177
 
7178
+ # At this point, exactly one of `full`, `from_`, or `between`
7179
+ # has been provided, and if `to` is provided, `from_` is also provided.
7180
+
7181
+ # Handle `between`.
7182
+ if between is not None:
7183
+ self._validate_pair_subset_parameter(between)
7184
+ return self._preferential_attachment_between(between)
7185
+
7186
+ # Handle `from_` (and potentially `to`).
7187
+ if from_ is not None:
7188
+ self._validate_node_subset_parameter('from_', from_)
7189
+ if to is not None:
7190
+ self._validate_node_subset_parameter('to', to)
7191
+ return self._preferential_attachment_from_to(from_, to)
7192
+ return self._preferential_attachment_from(from_)
7193
+
7194
+ # Handle `full`.
6742
7195
  return self._preferential_attachment
6743
7196
 
6744
7197
  @cached_property
6745
7198
  def _preferential_attachment(self):
6746
- """Lazily define and cache the self._preferential_attachment relationship."""
6747
- _preferential_attachment_rel = self._model.Relationship(f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} have preferential attachment score {{score:Integer}}")
7199
+ """Lazily define and cache the full preferential_attachment relationship."""
7200
+ _preferential_attachment_rel = self._create_preferential_attachment_relationship()
6748
7201
  _preferential_attachment_rel.annotate(annotations.track("graphs", "preferential_attachment"))
7202
+ return _preferential_attachment_rel
7203
+
7204
+ def _preferential_attachment_from(self, node_subset_from: Relationship):
7205
+ """
7206
+ Create a preferential_attachment relationship, with the first position in each
7207
+ tuple constrained to be in the given subset of nodes. Note this relationship
7208
+ is not cached; it is specific to the callsite.
7209
+ """
7210
+ _preferential_attachment_rel = self._create_preferential_attachment_relationship(
7211
+ node_subset_from=node_subset_from
7212
+ )
7213
+ _preferential_attachment_rel.annotate(annotations.track("graphs", "preferential_attachment_from"))
7214
+ return _preferential_attachment_rel
7215
+
7216
+ def _preferential_attachment_from_to(self, node_subset_from: Relationship, node_subset_to: Relationship):
7217
+ """
7218
+ Create a preferential_attachment relationship, with the first position in each
7219
+ tuple constrained to be in `node_subset_from`, and the second position in
7220
+ each tuple constrained to be in `node_subset_to`. Note this relationship
7221
+ is not cached; it is specific to the callsite.
7222
+ """
7223
+ _preferential_attachment_rel = self._create_preferential_attachment_relationship(
7224
+ node_subset_from=node_subset_from,
7225
+ node_subset_to=node_subset_to
7226
+ )
7227
+ _preferential_attachment_rel.annotate(annotations.track("graphs", "preferential_attachment_from_to"))
7228
+ return _preferential_attachment_rel
7229
+
7230
+ def _preferential_attachment_between(self, pair_subset_between: Relationship):
7231
+ """
7232
+ Create a preferential_attachment relationship, with the first and second position
7233
+ in each tuple jointly constrained to be in the given set of pairs
7234
+ of nodes. Note this relationship is not cached;
7235
+ it is specific to the callsite.
7236
+ """
7237
+ _preferential_attachment_rel = self._create_preferential_attachment_relationship(
7238
+ pair_subset_between=pair_subset_between
7239
+ )
7240
+ _preferential_attachment_rel.annotate(annotations.track("graphs", "preferential_attachment_between"))
7241
+ return _preferential_attachment_rel
6749
7242
 
7243
+ def _create_preferential_attachment_relationship(
7244
+ self,
7245
+ *,
7246
+ node_subset_from: Optional[Relationship] = None,
7247
+ node_subset_to: Optional[Relationship] = None,
7248
+ pair_subset_between: Optional[Relationship] = None,
7249
+ ):
7250
+ """
7251
+ Create preferential_attachment relationship, optionally constrained by
7252
+ the provided node subsets or pair subset.
7253
+ """
7254
+ _preferential_attachment_rel = self._model.Relationship(
7255
+ f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} "
7256
+ f"have preferential attachment score {{score:Integer}}"
7257
+ )
7258
+
7259
+ # Branch by case to select appropriate count_neighbor and isolated_node relationships,
7260
+ # and to define relevant constraints on the separate and joint domains of node_u and node_v.
6750
7261
  node_u, node_v = self.Node.ref(), self.Node.ref()
6751
- count_u, count_v = Integer.ref(), Integer.ref()
6752
7262
 
6753
- # NOTE: We consider isolated nodes separately to maintain
6754
- # the dense behavior of preferential attachment.
7263
+ # Handle the `between` case.
7264
+ if pair_subset_between is not None:
7265
+ # Collect nodes that appear in the subset by position.
7266
+ first_position_subset = self._model.Relationship(f"{{node:{self._NodeConceptStr}}}")
7267
+ second_position_subset = self._model.Relationship(f"{{node:{self._NodeConceptStr}}}")
7268
+ node_x, node_y = self.Node.ref(), self.Node.ref()
7269
+ where(
7270
+ pair_subset_between(node_x, node_y)
7271
+ ).define(
7272
+ first_position_subset(node_x),
7273
+ second_position_subset(node_y)
7274
+ )
7275
+
7276
+ # Constituents of non-isolated-nodes rule.
7277
+ non_isolated_rule_uv_constraint = [pair_subset_between(node_u, node_v)]
7278
+ count_neighbor_u_rel = self._count_neighbor_of(first_position_subset)
7279
+ count_neighbor_v_rel = self._count_neighbor_of(second_position_subset)
7280
+
7281
+ # Constituents of u-isolated rule.
7282
+ isolated_u_rel = self._isolated_node_of(first_position_subset)
7283
+ isolated_u_rule_uv_constraint = [pair_subset_between(node_u, node_v)]
7284
+
7285
+ # Constituents of v-isolated rule.
7286
+ isolated_v_rel = self._isolated_node_of(second_position_subset)
7287
+ isolated_v_rule_uv_constraint = [pair_subset_between(node_u, node_v)]
7288
+
7289
+ # Handle the `from_` case.
7290
+ elif node_subset_from is not None and node_subset_to is None:
7291
+ # NOTE: It isn't necessary to compute _count_neighbor_of
7292
+ # and _isolated_node_of for node_subset_from, given
7293
+ # we have to compute _count_neighbor and _isolated_node
7294
+ # for the unconstrained second position anyway. That does
7295
+ # require additional constraints as seen below, though.
7296
+ #
7297
+ # It's not clear to this author that there is a more clever
7298
+ # way to do this, given that in preferential attachment,
7299
+ # constraining one position implies no constraint on the
7300
+ # other position, unlike in, e.g., common neighbor?
7301
+
7302
+ # Constituents of non-isolated-nodes rule.
7303
+ non_isolated_rule_uv_constraint = [node_subset_from(node_u)]
7304
+ count_neighbor_u_rel = self._count_neighbor
7305
+ count_neighbor_v_rel = self._count_neighbor
7306
+
7307
+ # Constituents of u-isolated rule.
7308
+ isolated_u_rel = self._isolated_node
7309
+ isolated_u_rule_uv_constraint = [
7310
+ node_subset_from(node_u),
7311
+ self.Node(node_v)
7312
+ ]
7313
+
7314
+ # Constituents of v-isolated rule.
7315
+ isolated_v_rel = self._isolated_node
7316
+ isolated_v_rule_uv_constraint = [node_subset_from(node_u)]
6755
7317
 
6756
- # Case where node u is isolated, and node v is any node: score 0.
7318
+ # Handle the `from_`/`to` case.
7319
+ elif node_subset_from is not None and node_subset_to is not None:
7320
+ # Check for object identity optimization.
7321
+ if node_subset_from is node_subset_to:
7322
+ # Constituents of non-isolated-nodes rule.
7323
+ non_isolated_rule_uv_constraint = []
7324
+ count_neighbor_u_rel = self._count_neighbor_of(node_subset_from)
7325
+ count_neighbor_v_rel = count_neighbor_u_rel
7326
+
7327
+ # Constituents of u-isolated rule.
7328
+ isolated_u_rel = self._isolated_node_of(node_subset_from)
7329
+ isolated_u_rule_uv_constraint = [node_subset_to(node_v)]
7330
+
7331
+ # Constituents of v-isolated rule.
7332
+ isolated_v_rel = isolated_u_rel
7333
+ isolated_v_rule_uv_constraint = [node_subset_from(node_u)]
7334
+ else:
7335
+ # Constituents of non-isolated-nodes rule.
7336
+ non_isolated_rule_uv_constraint = []
7337
+ count_neighbor_u_rel = self._count_neighbor_of(node_subset_from)
7338
+ count_neighbor_v_rel = self._count_neighbor_of(node_subset_to)
7339
+
7340
+ # Constituents of u-isolated rule.
7341
+ isolated_u_rel = self._isolated_node_of(node_subset_from)
7342
+ isolated_u_rule_uv_constraint = [node_subset_to(node_v)]
7343
+
7344
+ # Constituents of v-isolated rule.
7345
+ isolated_v_rel = self._isolated_node_of(node_subset_to)
7346
+ isolated_v_rule_uv_constraint = [node_subset_from(node_u)]
7347
+
7348
+
7349
+ # Handle the `full` case.
7350
+ else:
7351
+ # Constituents of non-isolated-nodes rule.
7352
+ non_isolated_rule_uv_constraint = []
7353
+ count_neighbor_u_rel = self._count_neighbor
7354
+ count_neighbor_v_rel = self._count_neighbor
7355
+
7356
+ # Constituents of u-isolated rule.
7357
+ isolated_u_rel = self._isolated_node
7358
+ isolated_u_rule_uv_constraint = [self.Node(node_v)]
7359
+
7360
+ # Constituents of v-isolated rule.
7361
+ isolated_v_rel = self._isolated_node
7362
+ isolated_v_rule_uv_constraint = [self.Node(node_u)]
7363
+
7364
+ # Define shared logic, which has three cases.
7365
+ count_u, count_v = Integer.ref(), Integer.ref()
7366
+
7367
+ # Case where node u is isolated, and node v is any node (respecting constraints): score 0.
6757
7368
  where(
6758
- self._isolated_node(node_u),
6759
- self.Node(node_v),
7369
+ isolated_u_rel(node_u),
7370
+ *isolated_u_rule_uv_constraint,
6760
7371
  ).define(_preferential_attachment_rel(node_u, node_v, 0))
6761
7372
 
6762
- # Case where node u is any node, and node v is isolated: score 0.
7373
+ # Case where node u is any node (respecting constraints), and node v is isolated: score 0.
6763
7374
  where(
6764
- self.Node(node_u),
6765
- self._isolated_node(node_v)
7375
+ *isolated_v_rule_uv_constraint,
7376
+ isolated_v_rel(node_v)
6766
7377
  ).define(_preferential_attachment_rel(node_u, node_v, 0))
6767
7378
 
6768
7379
  # Case where neither node is isolated: score is count_neighbor[u] * count_neighbor[v].
6769
7380
  where(
6770
- self._count_neighbor(node_u, count_u),
6771
- self._count_neighbor(node_v, count_v)
7381
+ *non_isolated_rule_uv_constraint,
7382
+ count_neighbor_u_rel(node_u, count_u),
7383
+ count_neighbor_v_rel(node_v, count_v)
6772
7384
  ).define(_preferential_attachment_rel(node_u, node_v, count_u * count_v))
6773
7385
 
6774
7386
  return _preferential_attachment_rel
6775
7387
 
7388
+
6776
7389
  @cached_property
6777
7390
  def _isolated_node(self):
7391
+ """Lazily define and cache the self._isolated_node relationship."""
7392
+ return self._create_isolated_node_relationship()
7393
+
7394
+ def _isolated_node_of(self, node_subset: Relationship):
6778
7395
  """
6779
- Lazily define and cache the self._isolated_node (helper, non-public) relationship.
6780
- At this time, exclusively a helper for preferential_attachment.
7396
+ Create an _isolated_node relationship constrained to the subset of nodes
7397
+ in `node_subset`. Note this relationship is not cached; it is
7398
+ specific to the callsite.
7399
+ """
7400
+ return self._create_isolated_node_relationship(node_subset=node_subset)
7401
+
7402
+ def _create_isolated_node_relationship(
7403
+ self,
7404
+ *,
7405
+ node_subset: Optional[Relationship] = None,
7406
+ ):
7407
+ """
7408
+ Create _isolated_node relationship, optionally constrained by
7409
+ the provided node subset.
6781
7410
  """
6782
7411
  _isolated_node_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} is isolated")
6783
7412
 
6784
7413
  neighbor_node = self.Node.ref()
7414
+ if node_subset is not None:
7415
+ neighbor_rel = self._neighbor_of(node_subset)
7416
+ node_constraint = node_subset(self.Node)
7417
+ else:
7418
+ neighbor_rel = self._neighbor
7419
+ node_constraint = self.Node
7420
+
6785
7421
  where(
6786
- self.Node,
6787
- not_(self._neighbor(self.Node, neighbor_node))
7422
+ node_constraint,
7423
+ not_(neighbor_rel(self.Node, neighbor_node))
6788
7424
  ).define(_isolated_node_rel(self.Node))
6789
7425
 
6790
7426
  return _isolated_node_rel