relationalai 0.12.2__py3-none-any.whl → 0.12.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -353,14 +353,16 @@ class Graph():
353
353
  @cached_property
354
354
  def Node(self) -> Concept:
355
355
  """Lazily define and cache the self.Node concept."""
356
- return self._user_node_concept or self._model.Concept(self._NodeConceptStr)
357
-
356
+ _Node = self._user_node_concept or self._model.Concept(self._NodeConceptStr)
357
+ _Node.annotate(annotations.track("graphs", "Node"))
358
+ return _Node
358
359
 
359
360
  @cached_property
360
361
  def Edge(self):
361
362
  """Lazily define and cache the self.Edge concept and friends,
362
363
  by passing through to self._EdgeComplex."""
363
364
  _Edge, _, _, _ = self._EdgeComplex
365
+ _Edge.annotate(annotations.track("graphs", "Edge"))
364
366
  return _Edge
365
367
 
366
368
  @cached_property
@@ -594,6 +596,7 @@ class Graph():
594
596
  consuming the `Edge` concept's `EdgeSrc` and `EdgeDst` relationships.
595
597
  """
596
598
  _edge_rel = self._model.Relationship(f"{{src:{self._NodeConceptStr}}} has edge to {{dst:{self._NodeConceptStr}}}")
599
+ _edge_rel.annotate(annotations.track("graphs", "_edge"))
597
600
 
598
601
  Edge, EdgeSrc, EdgeDst = self.Edge, self.EdgeSrc, self.EdgeDst
599
602
  src, dst = self.Node.ref(), self.Node.ref()
@@ -620,6 +623,7 @@ class Graph():
620
623
  consuming the `Edge` concept's `EdgeSrc`, `EdgeDst`, and `EdgeWeight` relationships.
621
624
  """
622
625
  _weight_rel = self._model.Relationship(f"{{src:{self._NodeConceptStr}}} has edge to {{dst:{self._NodeConceptStr}}} with weight {{weight:Float}}")
626
+ _weight_rel.annotate(annotations.track("graphs", "_weight"))
623
627
 
624
628
  Edge, EdgeSrc, EdgeDst, EdgeWeight = self.Edge, self.EdgeSrc, self.EdgeDst, self.EdgeWeight
625
629
  src, dst, weight = self.Node.ref(), self.Node.ref(), Float.ref()
@@ -1332,21 +1336,6 @@ class Graph():
1332
1336
  # presently in use by the `cosine_similarity` and
1333
1337
  # `jaccard_similarity` relationships.
1334
1338
 
1335
- def _count_common_outneighbor_fragment(self, node_u, node_v):
1336
- """
1337
- Helper for cosine_similarity and jaccard_similarity that returns a fragment
1338
- that counts the common outneighbors of given nodes `node_u` and `node_v`.
1339
- """
1340
- common_outneighbor_node = self.Node.ref()
1341
- return (
1342
- count(common_outneighbor_node)
1343
- .per(node_u, node_v)
1344
- .where(
1345
- self._outneighbor(node_u, common_outneighbor_node),
1346
- self._outneighbor(node_v, common_outneighbor_node),
1347
- )
1348
- )
1349
-
1350
1339
  def _wu_dot_wv_fragment(self, node_u, node_v):
1351
1340
  """
1352
1341
  Helper for cosine_similarity that returns a fragment that produces an
@@ -3935,17 +3924,31 @@ class Graph():
3935
3924
 
3936
3925
 
3937
3926
  @include_in_docs
3938
- def triangle(self):
3927
+ def triangle(self, *, full: Optional[bool] = None):
3939
3928
  """Returns a ternary relationship containing all triangles in the graph.
3940
3929
 
3941
3930
  Unlike `unique_triangle`, this relationship contains all permutations
3942
3931
  of the nodes for each triangle found.
3943
3932
 
3933
+ Parameters
3934
+ ----------
3935
+ full : bool, optional
3936
+ If ``True``, computes triangles for all triplets of nodes in the graph.
3937
+ This computation can be expensive for large graphs. Must be set to ``True``
3938
+ to compute the full triangle relationship.
3939
+ Default is ``None``.
3940
+
3944
3941
  Returns
3945
3942
  -------
3946
3943
  Relationship
3947
3944
  A ternary relationship where each tuple represents a triangle.
3948
3945
 
3946
+ Raises
3947
+ ------
3948
+ ValueError
3949
+ If ``full`` is not provided.
3950
+ If ``full`` is not ``True``.
3951
+
3949
3952
  Relationship Schema
3950
3953
  -------------------
3951
3954
  ``triangle(node_a, node_b, node_c)``
@@ -3985,7 +3988,7 @@ class Graph():
3985
3988
  >>>
3986
3989
  >>> # 3. Select all triangles and inspect
3987
3990
  >>> a,b,c = Node.ref("a"), Node.ref("b"), Node.ref("c")
3988
- >>> triangle = graph.triangle()
3991
+ >>> triangle = graph.triangle(full=True)
3989
3992
  >>> select(a.id, b.id, c.id).where(triangle(a, b, c)).inspect()
3990
3993
  ▰▰▰▰ Setup complete
3991
3994
  id id2 id3
@@ -4014,7 +4017,7 @@ class Graph():
4014
4017
  >>>
4015
4018
  >>> # 3. Select all triangles and inspect
4016
4019
  >>> a,b,c = Node.ref("a"), Node.ref("b"), Node.ref("c")
4017
- >>> triangle = graph.triangle()
4020
+ >>> triangle = graph.triangle(full=True)
4018
4021
  >>> select(a.id, b.id, c.id).where(triangle(a, b, c)).inspect()
4019
4022
  ▰▰▰▰ Setup complete
4020
4023
  id id2 id3
@@ -4032,15 +4035,22 @@ class Graph():
4032
4035
  triangle_count
4033
4036
 
4034
4037
  """
4035
- warnings.warn(
4036
- (
4037
- "`triangle` presently always computes all triangles "
4038
- "in the graph. To provide better control over the computed subset, "
4039
- "`triangle`'s interface may soon change."
4040
- ),
4041
- FutureWarning,
4042
- stacklevel=2
4043
- )
4038
+ # Validate full parameter
4039
+ if full is None:
4040
+ raise ValueError(
4041
+ "Computing triangle for all triplets can be expensive. To confirm "
4042
+ "that you would like to compute the full triangle relationship, "
4043
+ "please call `triangle(full=True)`. "
4044
+ "(Domain constraints are not available for `triangle` at this time. "
4045
+ "If you need domain constraints for `triangle`, please reach out.)"
4046
+ )
4047
+
4048
+ if full is not True:
4049
+ raise ValueError(
4050
+ f"Invalid value (`{full}`) for 'full' parameter. Use `full=True` "
4051
+ "to compute the full triangle relationship. "
4052
+ )
4053
+
4044
4054
  return self._triangle
4045
4055
 
4046
4056
  @cached_property
@@ -4067,15 +4077,29 @@ class Graph():
4067
4077
 
4068
4078
 
4069
4079
  @include_in_docs
4070
- def unique_triangle(self):
4080
+ def unique_triangle(self, *, full: Optional[bool] = None):
4071
4081
  """Returns a ternary relationship containing all unique triangles in the graph.
4072
4082
 
4083
+ Parameters
4084
+ ----------
4085
+ full : bool, optional
4086
+ If ``True``, computes unique triangles for all triplets of nodes in the graph.
4087
+ This computation can be expensive for large graphs. Must be set to ``True``
4088
+ to compute the full unique_triangle relationship.
4089
+ Default is ``None``.
4090
+
4073
4091
  Returns
4074
4092
  -------
4075
4093
  Relationship
4076
4094
  A ternary relationship where each tuple represents a unique
4077
4095
  triangle.
4078
4096
 
4097
+ Raises
4098
+ ------
4099
+ ValueError
4100
+ If ``full`` is not provided.
4101
+ If ``full`` is not ``True``.
4102
+
4079
4103
  Relationship Schema
4080
4104
  -------------------
4081
4105
  ``unique_triangle(node_a, node_b, node_c)``
@@ -4132,7 +4156,7 @@ class Graph():
4132
4156
  >>>
4133
4157
  >>> # 3. Select the unique triangles and inspect
4134
4158
  >>> a,b,c = Node.ref("a"), Node.ref("b"), Node.ref("c")
4135
- >>> unique_triangle = graph.unique_triangle()
4159
+ >>> unique_triangle = graph.unique_triangle(full=True)
4136
4160
  >>> select(a.id, b.id, c.id).where(unique_triangle(a, b, c)).inspect()
4137
4161
  ▰▰▰▰ Setup complete
4138
4162
  id id2 id3
@@ -4161,7 +4185,7 @@ class Graph():
4161
4185
  >>>
4162
4186
  >>> # 3. Select the unique triangles and inspect
4163
4187
  >>> a,b,c = Node.ref("a"), Node.ref("b"), Node.ref("c")
4164
- >>> unique_triangle = graph.unique_triangle()
4188
+ >>> unique_triangle = graph.unique_triangle(full=True)
4165
4189
  >>> select(a.id, b.id, c.id).where(unique_triangle(a, b, c)).inspect()
4166
4190
  ▰▰▰▰ Setup complete
4167
4191
  id id2 id3
@@ -4175,15 +4199,22 @@ class Graph():
4175
4199
  triangle_count
4176
4200
 
4177
4201
  """
4178
- warnings.warn(
4179
- (
4180
- "`unique_triangle` presently always computes all unique triangles "
4181
- "in the graph. To provide better control over the computed subset, "
4182
- "`unique_triangle`'s interface may soon change."
4183
- ),
4184
- FutureWarning,
4185
- stacklevel=2
4186
- )
4202
+ # Validate full parameter
4203
+ if full is None:
4204
+ raise ValueError(
4205
+ "Computing unique_triangle for all triplets can be expensive. To confirm "
4206
+ "that you would like to compute the full unique_triangle relationship, "
4207
+ "please call `unique_triangle(full=True)`. "
4208
+ "(Domain constraints are not available for `unique_triangle` at this time. "
4209
+ "If you need domain constraints for `unique_triangle`, please reach out.)"
4210
+ )
4211
+
4212
+ if full is not True:
4213
+ raise ValueError(
4214
+ f"Invalid value (`{full}`) for 'full' parameter. Use `full=True` "
4215
+ "to compute the full unique_triangle relationship."
4216
+ )
4217
+
4187
4218
  return self._unique_triangle
4188
4219
 
4189
4220
  @cached_property
@@ -5592,18 +5623,71 @@ class Graph():
5592
5623
 
5593
5624
 
5594
5625
  @include_in_docs
5595
- def jaccard_similarity(self):
5596
- """Returns a ternary relationship containing the Jaccard similarity for all pairs of nodes.
5626
+ def jaccard_similarity(
5627
+ self,
5628
+ *,
5629
+ full: Optional[bool] = None,
5630
+ from_: Optional[Relationship] = None,
5631
+ to: Optional[Relationship] = None,
5632
+ between: Optional[Relationship] = None,
5633
+ ):
5634
+ """Returns a ternary relationship containing
5635
+ the Jaccard similarity for pairs of nodes.
5597
5636
 
5598
5637
  The Jaccard similarity is a measure between two nodes that ranges from
5599
5638
  0.0 to 1.0, where higher values indicate greater similarity.
5600
5639
 
5640
+ Parameters
5641
+ ----------
5642
+ full : bool, optional
5643
+ If ``True``, computes the Jaccard similarity for all pairs
5644
+ of nodes in the graph. This computation can be expensive for large graphs,
5645
+ as the result can scale quadratically in the number of nodes. Mutually exclusive
5646
+ with other parameters.
5647
+ Default is ``None``.
5648
+ from_ : Relationship, optional
5649
+ A unary relationship containing a subset of the graph's nodes. When
5650
+ provided, constrains the domain of the Jaccard similarity computation: only
5651
+ Jaccard similarity scores for node pairs where the first node is
5652
+ in this relationship are computed and returned. Mutually exclusive with
5653
+ ``full`` and ``between``.
5654
+ Default is ``None``.
5655
+ to : Relationship, optional
5656
+ A unary relationship containing a subset of the graph's nodes. Can only
5657
+ be used together with the ``from_`` parameter. When provided with ``from_``,
5658
+ constrains the domain of the Jaccard similarity computation: only
5659
+ Jaccard similarity scores for node pairs where the first node is
5660
+ in ``from_`` and the second node is in ``to`` are computed and returned.
5661
+ Default is ``None``.
5662
+ between : Relationship, optional
5663
+ A binary relationship containing pairs of nodes. When provided,
5664
+ constrains the domain of the Jaccard similarity computation: only
5665
+ Jaccard similarity scores for the specific node pairs in
5666
+ this relationship are computed and returned. Mutually exclusive
5667
+ with other parameters.
5668
+ Default is ``None``.
5669
+
5601
5670
  Returns
5602
5671
  -------
5603
5672
  Relationship
5604
5673
  A ternary relationship where each tuple represents a pair of nodes
5605
5674
  and their Jaccard similarity.
5606
5675
 
5676
+ Raises
5677
+ ------
5678
+ ValueError
5679
+ If ``full`` is provided with any other parameter.
5680
+ If ``between`` is provided with any other parameter.
5681
+ If ``from_`` is provided with any parameter other than ``to``.
5682
+ If none of ``full``, ``from_``, or ``between`` is provided.
5683
+ If ``full`` is not ``True`` or ``None``.
5684
+ AssertionError
5685
+ If ``from_``, ``to``, or ``between`` is not a ``Relationship``.
5686
+ If ``from_``, ``to``, or ``between`` is not attached to the same model as the graph.
5687
+ If ``from_``, ``to``, or ``between`` does not contain the graph's ``Node`` concept.
5688
+ If ``from_`` or ``to`` is not a unary relationship.
5689
+ If ``between`` is not a binary relationship.
5690
+
5607
5691
  Relationship Schema
5608
5692
  -------------------
5609
5693
  ``jaccard_similarity(node_u, node_v, score)``
@@ -5652,6 +5736,40 @@ class Graph():
5652
5736
  The weighted Jaccard similarity between node 1 and 2 is then:
5653
5737
  `0.46 / (1.6 + 1.6 + 1.4) = 0.1`.
5654
5738
 
5739
+ Edge weights are assumed to be non-negative, so the neighborhood
5740
+ vectors contain only non-negative elements. Therefore, the Jaccard
5741
+ similarity score is always between 0.0 and 1.0, inclusive.
5742
+
5743
+ The ``jaccard_similarity(full=True)`` method computes and caches
5744
+ the full Jaccard similarity relationship for all pairs of nodes,
5745
+ providing efficient reuse across multiple calls. This can be expensive
5746
+ as the result can contain O(|V|²) tuples.
5747
+
5748
+ Calling ``jaccard_similarity()`` without arguments raises a ``ValueError``,
5749
+ to ensure awareness and explicit acknowledgement (``full=True``) of this cost.
5750
+
5751
+ In contrast, ``jaccard_similarity(from_=subset)`` constrains the computation to
5752
+ tuples with the first position in the passed-in ``subset``. The result is
5753
+ not cached; it is specific to the call site. When a significant fraction of
5754
+ the Jaccard similarity relation is needed across a program,
5755
+ ``jaccard_similarity(full=True)`` is typically more efficient. Use
5756
+ ``jaccard_similarity(from_=subset)`` only when small subsets of
5757
+ the Jaccard similarity relationship are needed
5758
+ collectively across the program.
5759
+
5760
+ The ``to`` parameter can be used together with ``from_`` to further
5761
+ constrain the computation: ``jaccard_similarity(from_=subset_a, to=subset_b)``
5762
+ computes Jaccard similarity scores only for node pairs where the first node is in
5763
+ ``subset_a`` and the second node is in ``subset_b``. (Since ``jaccard_similarity``
5764
+ is symmetric in its first two positions, using ``to`` without ``from_`` would
5765
+ be functionally redundant, and is not allowed.)
5766
+
5767
+ The ``between`` parameter provides another way to constrain the computation.
5768
+ Unlike ``from_`` and ``to``, which allow you to independently constrain the first
5769
+ and second positions in ``jaccard_similarity`` tuples to sets of nodes, ``between``
5770
+ allows you constrain the first and second positions, jointly, to specific pairs
5771
+ of nodes.
5772
+
5655
5773
  Examples
5656
5774
  --------
5657
5775
  **Unweighted Graph Examples**
@@ -5673,8 +5791,8 @@ class Graph():
5673
5791
  ... Edge.new(src=n4, dst=n3),
5674
5792
  ... )
5675
5793
  >>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
5676
- >>> jaccard = graph.jaccard_similarity()
5677
- >>> select(score).where(jaccard(u, v, score), u.id == 2, v.id == 4).inspect()
5794
+ >>> jaccard_similarity = graph.jaccard_similarity(full=True)
5795
+ >>> select(score).where(jaccard_similarity(u, v, score), u.id == 2, v.id == 4).inspect()
5678
5796
  ▰▰▰▰ Setup complete
5679
5797
  score
5680
5798
  0 0.25
@@ -5696,8 +5814,8 @@ class Graph():
5696
5814
  ... Edge.new(src=n4, dst=n3),
5697
5815
  ... )
5698
5816
  >>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
5699
- >>> jaccard = graph.jaccard_similarity()
5700
- >>> select(score).where(jaccard(u, v, score), u.id == 2, v.id == 4).inspect()
5817
+ >>> jaccard_similarity = graph.jaccard_similarity(full=True)
5818
+ >>> select(score).where(jaccard_similarity(u, v, score), u.id == 2, v.id == 4).inspect()
5701
5819
  ▰▰▰▰ Setup complete
5702
5820
  score
5703
5821
  0 0.5
@@ -5724,12 +5842,57 @@ class Graph():
5724
5842
  >>>
5725
5843
  >>> # 3. Select the weighted Jaccard similarity for the pair (1, 2)
5726
5844
  >>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
5727
- >>> jaccard = graph.jaccard_similarity()
5728
- >>> select(score).where(jaccard(u, v, score), u.id == 1, v.id == 2).inspect()
5845
+ >>> jaccard_similarity = graph.jaccard_similarity(full=True)
5846
+ >>> select(score).where(jaccard_similarity(u, v, score), u.id == 1, v.id == 2).inspect()
5729
5847
  ▰▰▰▰ Setup complete
5730
5848
  score
5731
5849
  0 0.1
5732
5850
 
5851
+ **Domain Constraint Examples**
5852
+
5853
+ >>> # Use 'from_' parameter to constrain the set of nodes for the first position
5854
+ >>> # Using the same undirected unweighted graph from above
5855
+ >>> from relationalai.semantics import where
5856
+ >>> subset = model.Relationship(f"{{node:{Node}}} is in subset")
5857
+ >>> node = Node.ref()
5858
+ >>> where(node.id == 2).define(subset(node))
5859
+ >>>
5860
+ >>> # Get Jaccard similarity scores only for pairs where first node is in subset
5861
+ >>> constrained_jaccard_similarity = graph.jaccard_similarity(from_=subset)
5862
+ >>> select(u.id, v.id, score).where(constrained_jaccard_similarity(u, v, score)).inspect()
5863
+ ▰▰▰▰ Setup complete
5864
+ id id2 score
5865
+ 0 2 2 1.00
5866
+ 1 2 3 0.50
5867
+ 2 2 4 0.25
5868
+
5869
+ >>> # Use both 'from_' and 'to' parameters to constrain both positions
5870
+ >>> from_subset = model.Relationship(f"{{node:{Node}}} is in from_subset")
5871
+ >>> to_subset = model.Relationship(f"{{node:{Node}}} is in to_subset")
5872
+ >>> where(node.id == 2).define(from_subset(node))
5873
+ >>> where(node.id == 4).define(to_subset(node))
5874
+ >>>
5875
+ >>> # Get Jaccard similarity scores only where first node is in from_subset and second node is in to_subset
5876
+ >>> constrained_jaccard_similarity = graph.jaccard_similarity(from_=from_subset, to=to_subset)
5877
+ >>> select(u.id, v.id, score).where(constrained_jaccard_similarity(u, v, score)).inspect()
5878
+ ▰▰▰▰ Setup complete
5879
+ id id2 score
5880
+ 0 2 4 0.25
5881
+
5882
+ >>> # Use 'between' parameter to constrain to specific pairs of nodes
5883
+ >>> pairs = model.Relationship(f"{{node_a:{Node}}} and {{node_b:{Node}}} are a pair")
5884
+ >>> node_a, node_b = Node.ref(), Node.ref()
5885
+ >>> where(node_a.id == 2, node_b.id == 4).define(pairs(node_a, node_b))
5886
+ >>> where(node_a.id == 3, node_b.id == 4).define(pairs(node_a, node_b))
5887
+ >>>
5888
+ >>> # Get Jaccard similarity scores only for the specific pairs (2, 4) and (3, 4)
5889
+ >>> constrained_jaccard_similarity = graph.jaccard_similarity(between=pairs)
5890
+ >>> select(u.id, v.id, score).where(constrained_jaccard_similarity(u, v, score)).inspect()
5891
+ ▰▰▰▰ Setup complete
5892
+ id id2 score
5893
+ 0 2 4 0.25
5894
+ 1 3 4 0.50
5895
+
5733
5896
  References
5734
5897
  ----------
5735
5898
  Frigo M, Cruciani E, Coudert D, Deriche R, Natale E, Deslauriers-Gauthier S.
@@ -5738,57 +5901,242 @@ class Graph():
5738
5901
  doi: 10.1162/netn_a_00199. PMID: 34746624; PMCID: PMC8567827.
5739
5902
 
5740
5903
  """
5741
- warnings.warn(
5742
- (
5743
- "`jaccard_similarity` presently always computes the similarity "
5744
- "of all pairs of nodes of the graph. To provide better control over "
5745
- "the computed subset, `jaccard_similarity`'s interface will soon "
5746
- "need to change."
5747
- ),
5748
- FutureWarning,
5749
- stacklevel=2
5904
+ # Validate domain constraint parameters.
5905
+ self._validate_domain_constraint_parameters(
5906
+ 'jaccard_similarity', full, from_, to, between
5750
5907
  )
5908
+
5909
+ # At this point, exactly one of `full`, `from_`, or `between`
5910
+ # has been provided, and if `to` is provided, `from_` is also provided.
5911
+
5912
+ # Handle `between`.
5913
+ if between is not None:
5914
+ self._validate_pair_subset_parameter(between)
5915
+ return self._jaccard_similarity_between(between)
5916
+
5917
+ # Handle `from_` (and potentially `to`).
5918
+ if from_ is not None:
5919
+ self._validate_node_subset_parameter('from_', from_)
5920
+ if to is not None:
5921
+ self._validate_node_subset_parameter('to', to)
5922
+ return self._jaccard_similarity_from_to(from_, to)
5923
+ return self._jaccard_similarity_from(from_)
5924
+
5925
+ # Handle `full`.
5751
5926
  return self._jaccard_similarity
5752
5927
 
5753
5928
  @cached_property
5754
5929
  def _jaccard_similarity(self):
5755
- """Lazily define and cache the self._jaccard_similarity relationship."""
5756
- _jaccard_similarity_rel = self._model.Relationship(f"{{node_u:{self._NodeConceptStr}}} has a similarity to {{node_v:{self._NodeConceptStr}}} of {{similarity:Float}}")
5930
+ """Lazily define and cache the full jaccard_similarity relationship."""
5931
+ _jaccard_similarity_rel = self._create_jaccard_similarity_relationship()
5757
5932
  _jaccard_similarity_rel.annotate(annotations.track("graphs", "jaccard_similarity"))
5933
+ return _jaccard_similarity_rel
5758
5934
 
5759
- if not self.weighted:
5760
- node_u, node_v = self.Node.ref(), self.Node.ref()
5761
- num_union_outneighbors, num_u_outneigbor, num_v_outneigbor, f = Integer.ref(),\
5762
- Integer.ref(), Integer.ref(), Float.ref()
5763
-
5764
- where(num_common_outneighbor := self._count_common_outneighbor_fragment(node_u, node_v),
5765
- self._count_outneighbor(node_u, num_u_outneigbor),
5766
- self._count_outneighbor(node_v, num_v_outneigbor),
5767
- num_union_outneighbors := num_u_outneigbor + num_v_outneigbor - num_common_outneighbor,
5768
- f := num_common_outneighbor / num_union_outneighbors).define(_jaccard_similarity_rel(node_u, node_v, f))
5935
+ def _jaccard_similarity_from(self, node_subset_from: Relationship):
5936
+ """
5937
+ Create a jaccard_similarity relationship, with the first position in each
5938
+ tuple constrained to be in the given subset of nodes. Note this relationship
5939
+ is not cached; it is specific to the callsite.
5940
+ """
5941
+ _jaccard_similarity_rel = self._create_jaccard_similarity_relationship(
5942
+ node_subset_from=node_subset_from
5943
+ )
5944
+ _jaccard_similarity_rel.annotate(annotations.track("graphs", "jaccard_similarity_from"))
5945
+ return _jaccard_similarity_rel
5946
+
5947
+ def _jaccard_similarity_from_to(self, node_subset_from: Relationship, node_subset_to: Relationship):
5948
+ """
5949
+ Create a jaccard_similarity relationship, with the first position in each
5950
+ tuple constrained to be in `node_subset_from`, and the second position in
5951
+ each tuple constrained to be in `node_subset_to`. Note this relationship
5952
+ is not cached; it is specific to the callsite.
5953
+ """
5954
+ _jaccard_similarity_rel = self._create_jaccard_similarity_relationship(
5955
+ node_subset_from=node_subset_from,
5956
+ node_subset_to=node_subset_to
5957
+ )
5958
+ _jaccard_similarity_rel.annotate(annotations.track("graphs", "jaccard_similarity_from_to"))
5959
+ return _jaccard_similarity_rel
5960
+
5961
+ def _jaccard_similarity_between(self, pair_subset_between: Relationship):
5962
+ """
5963
+ Create a jaccard_similarity relationship, with the first and second position
5964
+ in each tuple jointly constrained to be in the given set of pairs
5965
+ of nodes. Note this relationship is not cached;
5966
+ it is specific to the callsite.
5967
+ """
5968
+ _jaccard_similarity_rel = self._create_jaccard_similarity_relationship(
5969
+ pair_subset_between=pair_subset_between
5970
+ )
5971
+ _jaccard_similarity_rel.annotate(annotations.track("graphs", "jaccard_similarity_between"))
5972
+ return _jaccard_similarity_rel
5973
+
5974
+ def _create_jaccard_similarity_relationship(
5975
+ self,
5976
+ *,
5977
+ node_subset_from: Optional[Relationship] = None,
5978
+ node_subset_to: Optional[Relationship] = None,
5979
+ pair_subset_between: Optional[Relationship] = None,
5980
+ ):
5981
+ """
5982
+ Create jaccard_similarity relationship, optionally constrained by
5983
+ the provided node subsets or pair subset.
5984
+ """
5985
+ _jaccard_similarity_rel = self._model.Relationship(
5986
+ f"{{node_u:{self._NodeConceptStr}}} has a Jaccard similarity to "
5987
+ f"{{node_v:{self._NodeConceptStr}}} of {{score:Float}}"
5988
+ )
5989
+
5990
+ # Branch by case to select appropriate count_outneighbor,
5991
+ # outneighbor, and weighted_outdegree relationships, and build
5992
+ # appropriate constraints on the domain of the nodes.
5993
+ node_u, node_v = self.Node.ref(), self.Node.ref()
5994
+
5995
+ # TODO: Optimization opportunity. In a number of branches below,
5996
+ # we compute _count_outneighbor_of, which transitively computes
5997
+ # _outneighbor_of, and then compute _outneighbor_of directly;
5998
+ # the present code structure makes this a developer-time-efficient
5999
+ # way to get this off the ground, but of course involves redundant
6000
+ # work. In future this redundant work could be eliminated.
6001
+
6002
+ # Handle the `between` case.
6003
+ if pair_subset_between is not None:
6004
+ # Extract first-position and second-position nodes.
6005
+ first_position_subset = self._model.Relationship(f"{{node:{self._NodeConceptStr}}}")
6006
+ second_position_subset = self._model.Relationship(f"{{node:{self._NodeConceptStr}}}")
6007
+ node_x, node_y = self.Node.ref(), self.Node.ref()
6008
+ where(
6009
+ pair_subset_between(node_x, node_y)
6010
+ ).define(
6011
+ first_position_subset(node_x),
6012
+ second_position_subset(node_y)
6013
+ )
6014
+
6015
+ if not self.weighted:
6016
+ count_outneighbor_u_rel = self._count_outneighbor_of(first_position_subset)
6017
+ count_outneighbor_v_rel = self._count_outneighbor_of(second_position_subset)
6018
+ outneighbor_u_rel = self._outneighbor_of(first_position_subset)
6019
+ outneighbor_v_rel = self._outneighbor_of(second_position_subset)
6020
+ else: # self.weighted
6021
+ weighted_outdegree_u_rel = self._weighted_outdegree_of(first_position_subset)
6022
+ weighted_outdegree_v_rel = self._weighted_outdegree_of(second_position_subset)
6023
+
6024
+ node_constraints = [pair_subset_between(node_u, node_v)]
6025
+
6026
+ # Handle the `from_` case.
6027
+ elif node_subset_from is not None and node_subset_to is None:
6028
+ if not self.weighted:
6029
+ count_outneighbor_u_rel = self._count_outneighbor_of(node_subset_from)
6030
+ count_outneighbor_v_rel = self._count_outneighbor
6031
+ outneighbor_u_rel = self._outneighbor_of(node_subset_from)
6032
+ outneighbor_v_rel = self._outneighbor
6033
+ else: # self.weighted
6034
+ weighted_outdegree_u_rel = self._weighted_outdegree_of(node_subset_from)
6035
+ weighted_outdegree_v_rel = self._weighted_outdegree
6036
+
6037
+ # TODO: Implement depth-two traversal strategy for better performance.
6038
+ # See similar comments on related similarity metrics.
6039
+
6040
+ node_constraints = [node_subset_from(node_u)]
6041
+
6042
+ # Handle the `from_`/`to` case.
6043
+ elif node_subset_from is not None and node_subset_to is not None:
6044
+ # Check for object identity optimization.
6045
+ if node_subset_from is node_subset_to:
6046
+ if not self.weighted:
6047
+ count_outneighbor_u_rel = self._count_outneighbor_of(node_subset_from)
6048
+ count_outneighbor_v_rel = count_outneighbor_u_rel
6049
+ outneighbor_u_rel = self._outneighbor_of(node_subset_from)
6050
+ outneighbor_v_rel = outneighbor_u_rel
6051
+ else: # self.weighted
6052
+ weighted_outdegree_u_rel = self._weighted_outdegree_of(node_subset_from)
6053
+ weighted_outdegree_v_rel = weighted_outdegree_u_rel
6054
+ else:
6055
+ if not self.weighted:
6056
+ count_outneighbor_u_rel = self._count_outneighbor_of(node_subset_from)
6057
+ count_outneighbor_v_rel = self._count_outneighbor_of(node_subset_to)
6058
+ outneighbor_u_rel = self._outneighbor_of(node_subset_from)
6059
+ outneighbor_v_rel = self._outneighbor_of(node_subset_to)
6060
+ else: # self.weighted
6061
+ weighted_outdegree_u_rel = self._weighted_outdegree_of(node_subset_from)
6062
+ weighted_outdegree_v_rel = self._weighted_outdegree_of(node_subset_to)
6063
+
6064
+ node_constraints = [node_subset_from(node_u), node_subset_to(node_v)]
6065
+
6066
+ # Handle the `full` case.
5769
6067
  else:
5770
- # TODO (dba) Annotate local relationships in this scope with `@ondemand` once available.
6068
+ if not self.weighted:
6069
+ count_outneighbor_u_rel = self._count_outneighbor
6070
+ count_outneighbor_v_rel = self._count_outneighbor
6071
+ outneighbor_u_rel = self._outneighbor
6072
+ outneighbor_v_rel = self._outneighbor
6073
+ else: # self.weighted
6074
+ weighted_outdegree_u_rel = self._weighted_outdegree
6075
+ weighted_outdegree_v_rel = self._weighted_outdegree
6076
+
6077
+ node_constraints = []
6078
+
6079
+ # Define Jaccard similarity logic for weighted and unweighted cases.
6080
+ if not self.weighted:
6081
+ num_u_outneigbor, num_v_outneigbor = Integer.ref(), Integer.ref()
6082
+ common_outneighbor_node = self.Node.ref()
6083
+ num_union_outneighbors = Integer.ref()
6084
+ score = Float.ref()
5771
6085
 
6086
+ where(
6087
+ *node_constraints,
6088
+ count_outneighbor_u_rel(node_u, num_u_outneigbor), # type: ignore[possibly-unbound]
6089
+ count_outneighbor_v_rel(node_v, num_v_outneigbor), # type: ignore[possibly-unbound]
6090
+ num_common_outneighbor := count(common_outneighbor_node).per(node_u, node_v).where(
6091
+ outneighbor_u_rel(node_u, common_outneighbor_node), # type: ignore[possibly-unbound]
6092
+ outneighbor_v_rel(node_v, common_outneighbor_node), # type: ignore[possibly-unbound]
6093
+ ),
6094
+ num_union_outneighbors := num_u_outneigbor + num_v_outneigbor - num_common_outneighbor,
6095
+ score := num_common_outneighbor / num_union_outneighbors,
6096
+ ).define(
6097
+ _jaccard_similarity_rel(node_u, node_v, score)
6098
+ )
6099
+ else:
5772
6100
  # (1) The numerator: For every node `k` in the graph, find the minimum weight of
5773
6101
  # the out-edges from `u` and `v` to `k`, and sum those minimum weights.
5774
6102
 
5775
6103
  # Note that for any node `k` that is not a common out-neighbor of nodes `u` and `v`,
5776
6104
  # the minimum weight of the out-edges from `u` and `v` to `k` is zero/empty,
5777
6105
  # so the sum here reduces to a sum over the common out-neighbors of `u` and `v`.
5778
- min_weight_to_common_outneighbor = self._model.Relationship(f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} have common outneighbor {{node_k:{self._NodeConceptStr}}} with minimum weight {{minweight:Float}}")
6106
+ min_weight_to_common_outneighbor = self._model.Relationship(
6107
+ f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} "
6108
+ f"have common outneighbor {{node_k:{self._NodeConceptStr}}} "
6109
+ f"with minimum weight {{minweight:Float}}"
6110
+ )
5779
6111
 
5780
- node_u, node_v, node_k, w1, w2 = self.Node.ref(), self.Node.ref(), self.Node.ref(), Float.ref(), Float.ref()
5781
- w = union(where(self._weight(node_u, node_k, w1)).select(w1),
5782
- where(self._weight(node_v, node_k, w2)).select(w2))
5783
- where(self._edge(node_u, node_k),
5784
- self._edge(node_v, node_k))\
5785
- .define(min_weight_to_common_outneighbor(node_u, node_v, node_k, min(w).per(node_u, node_v, node_k)))
6112
+ node_k, w1, w2 = self.Node.ref(), Float.ref(), Float.ref()
6113
+ w = union(
6114
+ where(self._weight(node_u, node_k, w1)).select(w1),
6115
+ where(self._weight(node_v, node_k, w2)).select(w2)
6116
+ )
6117
+ where(
6118
+ *node_constraints,
6119
+ self._edge(node_u, node_k),
6120
+ self._edge(node_v, node_k)
6121
+ ).define(
6122
+ min_weight_to_common_outneighbor(
6123
+ node_u, node_v, node_k, min(w).per(node_u, node_v, node_k)
6124
+ )
6125
+ )
5786
6126
 
5787
- sum_of_min_weights_to_common_outneighbors = self._model.Relationship(f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} have a sum of minweights of {{minsum:Float}}")
6127
+ sum_of_min_weights_to_common_outneighbors = self._model.Relationship(
6128
+ f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} "
6129
+ f"have a sum of minweights of {{minsum:Float}}"
6130
+ )
5788
6131
 
5789
6132
  minweight = Float.ref()
5790
- where(min_weight_to_common_outneighbor(node_u, node_v, node_k, minweight)
5791
- ).define(sum_of_min_weights_to_common_outneighbors(node_u, node_v, sum(node_k, minweight).per(node_u, node_v)))
6133
+ where(
6134
+ min_weight_to_common_outneighbor(node_u, node_v, node_k, minweight)
6135
+ ).define(
6136
+ sum_of_min_weights_to_common_outneighbors(
6137
+ node_u, node_v, sum(node_k, minweight).per(node_u, node_v)
6138
+ )
6139
+ )
5792
6140
 
5793
6141
  # (2) The denominator: For every node `k` in the graph, find the maximum weight of
5794
6142
  # the out-edges from `u` and `v` to `k`, and sum those maximum weights.
@@ -5827,20 +6175,31 @@ class Graph():
5827
6175
  # self._weighted_outdegree(u) +
5828
6176
  # self._weighted_outdegree(v) -
5829
6177
  # _sum_of_min_weights_to_common_outneighbors(u, v)
5830
- sum_of_max_weights_to_other_nodes = self._model.Relationship(f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} have a maxsum of {{maxsum:Float}}")
6178
+ sum_of_max_weights_to_other_nodes = self._model.Relationship(
6179
+ f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} "
6180
+ f"have a maxsum of {{maxsum:Float}}"
6181
+ )
5831
6182
 
5832
6183
  u_outdegree, v_outdegree, maxsum, minsum = Float.ref(), Float.ref(), Float.ref(), Float.ref()
5833
- where(self._weighted_outdegree(node_u, u_outdegree),
5834
- self._weighted_outdegree(node_v, v_outdegree),
5835
- sum_of_min_weights_to_common_outneighbors(node_u, node_v, minsum),
5836
- maxsum == u_outdegree + v_outdegree - minsum
5837
- ).define(sum_of_max_weights_to_other_nodes(node_u, node_v, maxsum))
6184
+ where(
6185
+ *node_constraints,
6186
+ weighted_outdegree_u_rel(node_u, u_outdegree), # type: ignore[possibly-unbound]
6187
+ weighted_outdegree_v_rel(node_v, v_outdegree), # type: ignore[possibly-unbound]
6188
+ sum_of_min_weights_to_common_outneighbors(node_u, node_v, minsum),
6189
+ maxsum == u_outdegree + v_outdegree - minsum
6190
+ ).define(
6191
+ sum_of_max_weights_to_other_nodes(node_u, node_v, maxsum)
6192
+ )
5838
6193
 
6194
+ # Combination of (1) and (2) to produce score.
5839
6195
  score = Float.ref()
5840
- where(sum_of_min_weights_to_common_outneighbors(node_u, node_v, minsum),
5841
- sum_of_max_weights_to_other_nodes(node_u, node_v, maxsum),
5842
- score == minsum/maxsum
5843
- ).define(_jaccard_similarity_rel(node_u, node_v, score))
6196
+ where(
6197
+ sum_of_min_weights_to_common_outneighbors(node_u, node_v, minsum),
6198
+ sum_of_max_weights_to_other_nodes(node_u, node_v, maxsum),
6199
+ score == minsum / maxsum
6200
+ ).define(
6201
+ _jaccard_similarity_rel(node_u, node_v, score)
6202
+ )
5844
6203
 
5845
6204
  return _jaccard_similarity_rel
5846
6205
 
@@ -6662,19 +7021,72 @@ class Graph():
6662
7021
 
6663
7022
 
6664
7023
  @include_in_docs
6665
- def preferential_attachment(self):
6666
- """Returns a ternary relationship containing the preferential attachment score for all pairs of nodes.
7024
+ def preferential_attachment(
7025
+ self,
7026
+ *,
7027
+ full: Optional[bool] = None,
7028
+ from_: Optional[Relationship] = None,
7029
+ to: Optional[Relationship] = None,
7030
+ between: Optional[Relationship] = None,
7031
+ ):
7032
+ """Returns a ternary relationship containing
7033
+ the preferential attachment score for pairs of nodes.
6667
7034
 
6668
7035
  The preferential attachment score between two nodes `u` and `v` is the
6669
7036
  number of nodes adjacent to `u` multiplied by the number of nodes
6670
7037
  adjacent to `v`.
6671
7038
 
7039
+ Parameters
7040
+ ----------
7041
+ full : bool, optional
7042
+ If ``True``, computes the preferential attachment score for all pairs
7043
+ of nodes in the graph. This computation can be expensive for large graphs,
7044
+ as the result can scale quadratically in the number of nodes. Mutually exclusive
7045
+ with other parameters.
7046
+ Default is ``None``.
7047
+ from_ : Relationship, optional
7048
+ A unary relationship containing a subset of the graph's nodes. When
7049
+ provided, constrains the domain of the preferential attachment computation: only
7050
+ preferential attachment scores for node pairs where the first node is
7051
+ in this relationship are computed and returned. Mutually exclusive with
7052
+ ``full`` and ``between``.
7053
+ Default is ``None``.
7054
+ to : Relationship, optional
7055
+ A unary relationship containing a subset of the graph's nodes. Can only
7056
+ be used together with the ``from_`` parameter. When provided with ``from_``,
7057
+ constrains the domain of the preferential attachment computation: only
7058
+ preferential attachment scores for node pairs where the first node is
7059
+ in ``from_`` and the second node is in ``to`` are computed and returned.
7060
+ Default is ``None``.
7061
+ between : Relationship, optional
7062
+ A binary relationship containing pairs of nodes. When provided,
7063
+ constrains the domain of the preferential attachment computation: only
7064
+ preferential attachment scores for the specific node pairs in
7065
+ this relationship are computed and returned. Mutually exclusive
7066
+ with other parameters.
7067
+ Default is ``None``.
7068
+
6672
7069
  Returns
6673
7070
  -------
6674
7071
  Relationship
6675
7072
  A ternary relationship where each tuple represents a pair of nodes
6676
7073
  and their preferential attachment score.
6677
7074
 
7075
+ Raises
7076
+ ------
7077
+ ValueError
7078
+ If ``full`` is provided with any other parameter.
7079
+ If ``between`` is provided with any other parameter.
7080
+ If ``from_`` is provided with any parameter other than ``to``.
7081
+ If none of ``full``, ``from_``, or ``between`` is provided.
7082
+ If ``full`` is not ``True`` or ``None``.
7083
+ AssertionError
7084
+ If ``from_``, ``to``, or ``between`` is not a ``Relationship``.
7085
+ If ``from_``, ``to``, or ``between`` is not attached to the same model as the graph.
7086
+ If ``from_``, ``to``, or ``between`` does not contain the graph's ``Node`` concept.
7087
+ If ``from_`` or ``to`` is not a unary relationship.
7088
+ If ``between`` is not a binary relationship.
7089
+
6678
7090
  Relationship Schema
6679
7091
  -------------------
6680
7092
  ``preferential_attachment(node_u, node_v, score)``
@@ -6691,6 +7103,38 @@ class Graph():
6691
7103
  | Directed | Yes | |
6692
7104
  | Weighted | Yes | Weights are ignored. |
6693
7105
 
7106
+ Notes
7107
+ -----
7108
+ The ``preferential_attachment(full=True)`` method computes and caches
7109
+ the full preferential attachment relationship for all pairs of nodes,
7110
+ providing efficient reuse across multiple calls. This can be expensive
7111
+ as the result contains O(|V|²) tuples.
7112
+
7113
+ Calling ``preferential_attachment()`` without arguments raises a ``ValueError``,
7114
+ to ensure awareness and explicit acknowledgement (``full=True``) of this cost.
7115
+
7116
+ In contrast, ``preferential_attachment(from_=subset)`` constrains the computation to
7117
+ tuples with the first position in the passed-in ``subset``. The result is
7118
+ not cached; it is specific to the call site. When a significant fraction of
7119
+ the preferential attachment relation is needed across a program,
7120
+ ``preferential_attachment(full=True)`` is typically more efficient. Use
7121
+ ``preferential_attachment(from_=subset)`` only when small subsets of
7122
+ the preferential attachment relationship are needed
7123
+ collectively across the program.
7124
+
7125
+ The ``to`` parameter can be used together with ``from_`` to further
7126
+ constrain the computation: ``preferential_attachment(from_=subset_a, to=subset_b)``
7127
+ computes preferential attachment scores only for node pairs where the first node is in
7128
+ ``subset_a`` and the second node is in ``subset_b``. (Since ``preferential_attachment``
7129
+ is symmetric in its first two positions, using ``to`` without ``from_``would
7130
+ be functionally redundant, and is not allowed.)
7131
+
7132
+ The ``between`` parameter provides another way to constrain the computation.
7133
+ Unlike ``from_`` and ``to``, which allow you to independently constrain the first
7134
+ and second positions in ``preferential_attachment`` tuples to sets of nodes, ``between``
7135
+ allows you constrain the first and second positions, jointly, to specific pairs
7136
+ of nodes.
7137
+
6694
7138
  Examples
6695
7139
  --------
6696
7140
  >>> from relationalai.semantics import Model, define, select, Integer
@@ -6712,10 +7156,10 @@ class Graph():
6712
7156
  ... Edge.new(src=n4, dst=n3),
6713
7157
  ... )
6714
7158
  >>>
6715
- >>> # 3. Select the preferential attachment score for the pair (1, 3)
7159
+ >>> # 3. Select the preferential attachment scores from the full relationship
6716
7160
  >>> u, v = Node.ref("u"), Node.ref("v")
6717
7161
  >>> score = Integer.ref("score")
6718
- >>> preferential_attachment = graph.preferential_attachment()
7162
+ >>> preferential_attachment = graph.preferential_attachment(full=True)
6719
7163
  >>> select(
6720
7164
  ... u.id, v.id, score,
6721
7165
  ... ).where(
@@ -6727,64 +7171,302 @@ class Graph():
6727
7171
  id id2 score
6728
7172
  0 1 3 3
6729
7173
 
7174
+ >>> # 4. Use 'from_' parameter to constrain the set of nodes for the first position
7175
+ >>> # Define a subset containing only node 1
7176
+ >>> from relationalai.semantics import where
7177
+ >>> subset = model.Relationship(f"{{node:{Node}}} is in subset")
7178
+ >>> node = Node.ref()
7179
+ >>> where(node.id == 1).define(subset(node))
7180
+ >>>
7181
+ >>> # Get preferential attachment scores only for pairs where first node is in subset
7182
+ >>> constrained_preferential_attachment = graph.preferential_attachment(from_=subset)
7183
+ >>> select(u.id, v.id, score).where(constrained_preferential_attachment(u, v, score)).inspect()
7184
+ ▰▰▰▰ Setup complete
7185
+ id id2 score
7186
+ 0 1 1 1
7187
+ 1 1 2 3
7188
+ 2 1 3 3
7189
+ 3 1 4 3
7190
+
7191
+ >>> # 5. Use both 'from_' and 'to' parameters to constrain both positions
7192
+ >>> from_subset = model.Relationship(f"{{node:{Node}}} is in from_subset")
7193
+ >>> to_subset = model.Relationship(f"{{node:{Node}}} is in to_subset")
7194
+ >>> where(node.id == 1).define(from_subset(node))
7195
+ >>> where(node.id == 3).define(to_subset(node))
7196
+ >>>
7197
+ >>> # Get preferential attachment scores only where first node is in from_subset and second node is in to_subset
7198
+ >>> constrained_preferential_attachment = graph.preferential_attachment(from_=from_subset, to=to_subset)
7199
+ >>> select(u.id, v.id, score).where(constrained_preferential_attachment(u, v, score)).inspect()
7200
+ ▰▰▰▰ Setup complete
7201
+ id id2 score
7202
+ 0 1 3 3
7203
+
7204
+ >>> # 6. Use 'between' parameter to constrain to specific pairs of nodes
7205
+ >>> pairs = model.Relationship(f"{{node_a:{Node}}} and {{node_b:{Node}}} are a pair")
7206
+ >>> node_a, node_b = Node.ref(), Node.ref()
7207
+ >>> where(node_a.id == 1, node_b.id == 3).define(pairs(node_a, node_b))
7208
+ >>> where(node_a.id == 2, node_b.id == 4).define(pairs(node_a, node_b))
7209
+ >>>
7210
+ >>> # Get preferential attachment scores only for the specific pairs (1, 3) and (2, 4)
7211
+ >>> constrained_preferential_attachment = graph.preferential_attachment(between=pairs)
7212
+ >>> select(u.id, v.id, score).where(constrained_preferential_attachment(u, v, score)).inspect()
7213
+ ▰▰▰▰ Setup complete
7214
+ id id2 score
7215
+ 0 1 3 3
7216
+ 1 2 4 6
7217
+
6730
7218
  """
6731
- warnings.warn(
6732
- (
6733
- "`preferential_attachment` presently always computes the similarity "
6734
- "of all pairs of nodes of the graph. To provide better control over "
6735
- "the computed subset, `preferential_attachment`'s interface will soon "
6736
- "need to change."
6737
- ),
6738
- FutureWarning,
6739
- stacklevel=2
7219
+ # Validate domain constraint parameters.
7220
+ self._validate_domain_constraint_parameters(
7221
+ 'preferential_attachment', full, from_, to, between
6740
7222
  )
6741
7223
 
7224
+ # At this point, exactly one of `full`, `from_`, or `between`
7225
+ # has been provided, and if `to` is provided, `from_` is also provided.
7226
+
7227
+ # Handle `between`.
7228
+ if between is not None:
7229
+ self._validate_pair_subset_parameter(between)
7230
+ return self._preferential_attachment_between(between)
7231
+
7232
+ # Handle `from_` (and potentially `to`).
7233
+ if from_ is not None:
7234
+ self._validate_node_subset_parameter('from_', from_)
7235
+ if to is not None:
7236
+ self._validate_node_subset_parameter('to', to)
7237
+ return self._preferential_attachment_from_to(from_, to)
7238
+ return self._preferential_attachment_from(from_)
7239
+
7240
+ # Handle `full`.
6742
7241
  return self._preferential_attachment
6743
7242
 
6744
7243
  @cached_property
6745
7244
  def _preferential_attachment(self):
6746
- """Lazily define and cache the self._preferential_attachment relationship."""
6747
- _preferential_attachment_rel = self._model.Relationship(f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} have preferential attachment score {{score:Integer}}")
7245
+ """Lazily define and cache the full preferential_attachment relationship."""
7246
+ _preferential_attachment_rel = self._create_preferential_attachment_relationship()
6748
7247
  _preferential_attachment_rel.annotate(annotations.track("graphs", "preferential_attachment"))
7248
+ return _preferential_attachment_rel
7249
+
7250
+ def _preferential_attachment_from(self, node_subset_from: Relationship):
7251
+ """
7252
+ Create a preferential_attachment relationship, with the first position in each
7253
+ tuple constrained to be in the given subset of nodes. Note this relationship
7254
+ is not cached; it is specific to the callsite.
7255
+ """
7256
+ _preferential_attachment_rel = self._create_preferential_attachment_relationship(
7257
+ node_subset_from=node_subset_from
7258
+ )
7259
+ _preferential_attachment_rel.annotate(annotations.track("graphs", "preferential_attachment_from"))
7260
+ return _preferential_attachment_rel
7261
+
7262
+ def _preferential_attachment_from_to(self, node_subset_from: Relationship, node_subset_to: Relationship):
7263
+ """
7264
+ Create a preferential_attachment relationship, with the first position in each
7265
+ tuple constrained to be in `node_subset_from`, and the second position in
7266
+ each tuple constrained to be in `node_subset_to`. Note this relationship
7267
+ is not cached; it is specific to the callsite.
7268
+ """
7269
+ _preferential_attachment_rel = self._create_preferential_attachment_relationship(
7270
+ node_subset_from=node_subset_from,
7271
+ node_subset_to=node_subset_to
7272
+ )
7273
+ _preferential_attachment_rel.annotate(annotations.track("graphs", "preferential_attachment_from_to"))
7274
+ return _preferential_attachment_rel
7275
+
7276
+ def _preferential_attachment_between(self, pair_subset_between: Relationship):
7277
+ """
7278
+ Create a preferential_attachment relationship, with the first and second position
7279
+ in each tuple jointly constrained to be in the given set of pairs
7280
+ of nodes. Note this relationship is not cached;
7281
+ it is specific to the callsite.
7282
+ """
7283
+ _preferential_attachment_rel = self._create_preferential_attachment_relationship(
7284
+ pair_subset_between=pair_subset_between
7285
+ )
7286
+ _preferential_attachment_rel.annotate(annotations.track("graphs", "preferential_attachment_between"))
7287
+ return _preferential_attachment_rel
7288
+
7289
+ def _create_preferential_attachment_relationship(
7290
+ self,
7291
+ *,
7292
+ node_subset_from: Optional[Relationship] = None,
7293
+ node_subset_to: Optional[Relationship] = None,
7294
+ pair_subset_between: Optional[Relationship] = None,
7295
+ ):
7296
+ """
7297
+ Create preferential_attachment relationship, optionally constrained by
7298
+ the provided node subsets or pair subset.
7299
+ """
7300
+ _preferential_attachment_rel = self._model.Relationship(
7301
+ f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} "
7302
+ f"have preferential attachment score {{score:Integer}}"
7303
+ )
6749
7304
 
7305
+ # Branch by case to select appropriate count_neighbor and isolated_node relationships,
7306
+ # and to define relevant constraints on the separate and joint domains of node_u and node_v.
6750
7307
  node_u, node_v = self.Node.ref(), self.Node.ref()
6751
- count_u, count_v = Integer.ref(), Integer.ref()
6752
7308
 
6753
- # NOTE: We consider isolated nodes separately to maintain
6754
- # the dense behavior of preferential attachment.
7309
+ # Handle the `between` case.
7310
+ if pair_subset_between is not None:
7311
+ # Collect nodes that appear in the subset by position.
7312
+ first_position_subset = self._model.Relationship(f"{{node:{self._NodeConceptStr}}}")
7313
+ second_position_subset = self._model.Relationship(f"{{node:{self._NodeConceptStr}}}")
7314
+ node_x, node_y = self.Node.ref(), self.Node.ref()
7315
+ where(
7316
+ pair_subset_between(node_x, node_y)
7317
+ ).define(
7318
+ first_position_subset(node_x),
7319
+ second_position_subset(node_y)
7320
+ )
7321
+
7322
+ # Constituents of non-isolated-nodes rule.
7323
+ non_isolated_rule_uv_constraint = [pair_subset_between(node_u, node_v)]
7324
+ count_neighbor_u_rel = self._count_neighbor_of(first_position_subset)
7325
+ count_neighbor_v_rel = self._count_neighbor_of(second_position_subset)
6755
7326
 
6756
- # Case where node u is isolated, and node v is any node: score 0.
7327
+ # Constituents of u-isolated rule.
7328
+ isolated_u_rel = self._isolated_node_of(first_position_subset)
7329
+ isolated_u_rule_uv_constraint = [pair_subset_between(node_u, node_v)]
7330
+
7331
+ # Constituents of v-isolated rule.
7332
+ isolated_v_rel = self._isolated_node_of(second_position_subset)
7333
+ isolated_v_rule_uv_constraint = [pair_subset_between(node_u, node_v)]
7334
+
7335
+ # Handle the `from_` case.
7336
+ elif node_subset_from is not None and node_subset_to is None:
7337
+ # NOTE: It isn't necessary to compute _count_neighbor_of
7338
+ # and _isolated_node_of for node_subset_from, given
7339
+ # we have to compute _count_neighbor and _isolated_node
7340
+ # for the unconstrained second position anyway. That does
7341
+ # require additional constraints as seen below, though.
7342
+ #
7343
+ # It's not clear to this author that there is a more clever
7344
+ # way to do this, given that in preferential attachment,
7345
+ # constraining one position implies no constraint on the
7346
+ # other position, unlike in, e.g., common neighbor?
7347
+
7348
+ # Constituents of non-isolated-nodes rule.
7349
+ non_isolated_rule_uv_constraint = [node_subset_from(node_u)]
7350
+ count_neighbor_u_rel = self._count_neighbor
7351
+ count_neighbor_v_rel = self._count_neighbor
7352
+
7353
+ # Constituents of u-isolated rule.
7354
+ isolated_u_rel = self._isolated_node
7355
+ isolated_u_rule_uv_constraint = [
7356
+ node_subset_from(node_u),
7357
+ self.Node(node_v)
7358
+ ]
7359
+
7360
+ # Constituents of v-isolated rule.
7361
+ isolated_v_rel = self._isolated_node
7362
+ isolated_v_rule_uv_constraint = [node_subset_from(node_u)]
7363
+
7364
+ # Handle the `from_`/`to` case.
7365
+ elif node_subset_from is not None and node_subset_to is not None:
7366
+ # Check for object identity optimization.
7367
+ if node_subset_from is node_subset_to:
7368
+ # Constituents of non-isolated-nodes rule.
7369
+ non_isolated_rule_uv_constraint = []
7370
+ count_neighbor_u_rel = self._count_neighbor_of(node_subset_from)
7371
+ count_neighbor_v_rel = count_neighbor_u_rel
7372
+
7373
+ # Constituents of u-isolated rule.
7374
+ isolated_u_rel = self._isolated_node_of(node_subset_from)
7375
+ isolated_u_rule_uv_constraint = [node_subset_to(node_v)]
7376
+
7377
+ # Constituents of v-isolated rule.
7378
+ isolated_v_rel = isolated_u_rel
7379
+ isolated_v_rule_uv_constraint = [node_subset_from(node_u)]
7380
+ else:
7381
+ # Constituents of non-isolated-nodes rule.
7382
+ non_isolated_rule_uv_constraint = []
7383
+ count_neighbor_u_rel = self._count_neighbor_of(node_subset_from)
7384
+ count_neighbor_v_rel = self._count_neighbor_of(node_subset_to)
7385
+
7386
+ # Constituents of u-isolated rule.
7387
+ isolated_u_rel = self._isolated_node_of(node_subset_from)
7388
+ isolated_u_rule_uv_constraint = [node_subset_to(node_v)]
7389
+
7390
+ # Constituents of v-isolated rule.
7391
+ isolated_v_rel = self._isolated_node_of(node_subset_to)
7392
+ isolated_v_rule_uv_constraint = [node_subset_from(node_u)]
7393
+
7394
+
7395
+ # Handle the `full` case.
7396
+ else:
7397
+ # Constituents of non-isolated-nodes rule.
7398
+ non_isolated_rule_uv_constraint = []
7399
+ count_neighbor_u_rel = self._count_neighbor
7400
+ count_neighbor_v_rel = self._count_neighbor
7401
+
7402
+ # Constituents of u-isolated rule.
7403
+ isolated_u_rel = self._isolated_node
7404
+ isolated_u_rule_uv_constraint = [self.Node(node_v)]
7405
+
7406
+ # Constituents of v-isolated rule.
7407
+ isolated_v_rel = self._isolated_node
7408
+ isolated_v_rule_uv_constraint = [self.Node(node_u)]
7409
+
7410
+ # Define shared logic, which has three cases.
7411
+ count_u, count_v = Integer.ref(), Integer.ref()
7412
+
7413
+ # Case where node u is isolated, and node v is any node (respecting constraints): score 0.
6757
7414
  where(
6758
- self._isolated_node(node_u),
6759
- self.Node(node_v),
7415
+ isolated_u_rel(node_u),
7416
+ *isolated_u_rule_uv_constraint,
6760
7417
  ).define(_preferential_attachment_rel(node_u, node_v, 0))
6761
7418
 
6762
- # Case where node u is any node, and node v is isolated: score 0.
7419
+ # Case where node u is any node (respecting constraints), and node v is isolated: score 0.
6763
7420
  where(
6764
- self.Node(node_u),
6765
- self._isolated_node(node_v)
7421
+ *isolated_v_rule_uv_constraint,
7422
+ isolated_v_rel(node_v)
6766
7423
  ).define(_preferential_attachment_rel(node_u, node_v, 0))
6767
7424
 
6768
7425
  # Case where neither node is isolated: score is count_neighbor[u] * count_neighbor[v].
6769
7426
  where(
6770
- self._count_neighbor(node_u, count_u),
6771
- self._count_neighbor(node_v, count_v)
7427
+ *non_isolated_rule_uv_constraint,
7428
+ count_neighbor_u_rel(node_u, count_u),
7429
+ count_neighbor_v_rel(node_v, count_v)
6772
7430
  ).define(_preferential_attachment_rel(node_u, node_v, count_u * count_v))
6773
7431
 
6774
7432
  return _preferential_attachment_rel
6775
7433
 
7434
+
6776
7435
  @cached_property
6777
7436
  def _isolated_node(self):
7437
+ """Lazily define and cache the self._isolated_node relationship."""
7438
+ return self._create_isolated_node_relationship()
7439
+
7440
+ def _isolated_node_of(self, node_subset: Relationship):
6778
7441
  """
6779
- Lazily define and cache the self._isolated_node (helper, non-public) relationship.
6780
- At this time, exclusively a helper for preferential_attachment.
7442
+ Create an _isolated_node relationship constrained to the subset of nodes
7443
+ in `node_subset`. Note this relationship is not cached; it is
7444
+ specific to the callsite.
7445
+ """
7446
+ return self._create_isolated_node_relationship(node_subset=node_subset)
7447
+
7448
+ def _create_isolated_node_relationship(
7449
+ self,
7450
+ *,
7451
+ node_subset: Optional[Relationship] = None,
7452
+ ):
7453
+ """
7454
+ Create _isolated_node relationship, optionally constrained by
7455
+ the provided node subset.
6781
7456
  """
6782
7457
  _isolated_node_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} is isolated")
6783
7458
 
6784
7459
  neighbor_node = self.Node.ref()
7460
+ if node_subset is not None:
7461
+ neighbor_rel = self._neighbor_of(node_subset)
7462
+ node_constraint = node_subset(self.Node)
7463
+ else:
7464
+ neighbor_rel = self._neighbor
7465
+ node_constraint = self.Node
7466
+
6785
7467
  where(
6786
- self.Node,
6787
- not_(self._neighbor(self.Node, neighbor_node))
7468
+ node_constraint,
7469
+ not_(neighbor_rel(self.Node, neighbor_node))
6788
7470
  ).define(_isolated_node_rel(self.Node))
6789
7471
 
6790
7472
  return _isolated_node_rel