relationalai 0.12.2__py3-none-any.whl → 0.12.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- relationalai/clients/snowflake.py +117 -28
- relationalai/clients/use_index_poller.py +3 -0
- relationalai/experimental/solvers.py +18 -19
- relationalai/semantics/internal/snowflake.py +2 -3
- relationalai/semantics/lqp/executor.py +39 -9
- relationalai/semantics/lqp/model2lqp.py +0 -1
- relationalai/semantics/lqp/rewrite/extract_common.py +30 -8
- relationalai/semantics/metamodel/builtins.py +6 -6
- relationalai/semantics/metamodel/dependency.py +44 -21
- relationalai/semantics/metamodel/helpers.py +7 -6
- relationalai/semantics/metamodel/rewrite/extract_nested_logicals.py +1 -4
- relationalai/semantics/metamodel/rewrite/flatten.py +1 -13
- relationalai/semantics/reasoners/graph/core.py +803 -121
- relationalai/semantics/rel/executor.py +13 -6
- relationalai/semantics/sql/executor/snowflake.py +2 -2
- relationalai/semantics/std/math.py +2 -2
- {relationalai-0.12.2.dist-info → relationalai-0.12.4.dist-info}/METADATA +1 -1
- {relationalai-0.12.2.dist-info → relationalai-0.12.4.dist-info}/RECORD +21 -21
- {relationalai-0.12.2.dist-info → relationalai-0.12.4.dist-info}/WHEEL +0 -0
- {relationalai-0.12.2.dist-info → relationalai-0.12.4.dist-info}/entry_points.txt +0 -0
- {relationalai-0.12.2.dist-info → relationalai-0.12.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -353,14 +353,16 @@ class Graph():
|
|
|
353
353
|
@cached_property
|
|
354
354
|
def Node(self) -> Concept:
|
|
355
355
|
"""Lazily define and cache the self.Node concept."""
|
|
356
|
-
|
|
357
|
-
|
|
356
|
+
_Node = self._user_node_concept or self._model.Concept(self._NodeConceptStr)
|
|
357
|
+
_Node.annotate(annotations.track("graphs", "Node"))
|
|
358
|
+
return _Node
|
|
358
359
|
|
|
359
360
|
@cached_property
|
|
360
361
|
def Edge(self):
|
|
361
362
|
"""Lazily define and cache the self.Edge concept and friends,
|
|
362
363
|
by passing through to self._EdgeComplex."""
|
|
363
364
|
_Edge, _, _, _ = self._EdgeComplex
|
|
365
|
+
_Edge.annotate(annotations.track("graphs", "Edge"))
|
|
364
366
|
return _Edge
|
|
365
367
|
|
|
366
368
|
@cached_property
|
|
@@ -594,6 +596,7 @@ class Graph():
|
|
|
594
596
|
consuming the `Edge` concept's `EdgeSrc` and `EdgeDst` relationships.
|
|
595
597
|
"""
|
|
596
598
|
_edge_rel = self._model.Relationship(f"{{src:{self._NodeConceptStr}}} has edge to {{dst:{self._NodeConceptStr}}}")
|
|
599
|
+
_edge_rel.annotate(annotations.track("graphs", "_edge"))
|
|
597
600
|
|
|
598
601
|
Edge, EdgeSrc, EdgeDst = self.Edge, self.EdgeSrc, self.EdgeDst
|
|
599
602
|
src, dst = self.Node.ref(), self.Node.ref()
|
|
@@ -620,6 +623,7 @@ class Graph():
|
|
|
620
623
|
consuming the `Edge` concept's `EdgeSrc`, `EdgeDst`, and `EdgeWeight` relationships.
|
|
621
624
|
"""
|
|
622
625
|
_weight_rel = self._model.Relationship(f"{{src:{self._NodeConceptStr}}} has edge to {{dst:{self._NodeConceptStr}}} with weight {{weight:Float}}")
|
|
626
|
+
_weight_rel.annotate(annotations.track("graphs", "_weight"))
|
|
623
627
|
|
|
624
628
|
Edge, EdgeSrc, EdgeDst, EdgeWeight = self.Edge, self.EdgeSrc, self.EdgeDst, self.EdgeWeight
|
|
625
629
|
src, dst, weight = self.Node.ref(), self.Node.ref(), Float.ref()
|
|
@@ -1332,21 +1336,6 @@ class Graph():
|
|
|
1332
1336
|
# presently in use by the `cosine_similarity` and
|
|
1333
1337
|
# `jaccard_similarity` relationships.
|
|
1334
1338
|
|
|
1335
|
-
def _count_common_outneighbor_fragment(self, node_u, node_v):
|
|
1336
|
-
"""
|
|
1337
|
-
Helper for cosine_similarity and jaccard_similarity that returns a fragment
|
|
1338
|
-
that counts the common outneighbors of given nodes `node_u` and `node_v`.
|
|
1339
|
-
"""
|
|
1340
|
-
common_outneighbor_node = self.Node.ref()
|
|
1341
|
-
return (
|
|
1342
|
-
count(common_outneighbor_node)
|
|
1343
|
-
.per(node_u, node_v)
|
|
1344
|
-
.where(
|
|
1345
|
-
self._outneighbor(node_u, common_outneighbor_node),
|
|
1346
|
-
self._outneighbor(node_v, common_outneighbor_node),
|
|
1347
|
-
)
|
|
1348
|
-
)
|
|
1349
|
-
|
|
1350
1339
|
def _wu_dot_wv_fragment(self, node_u, node_v):
|
|
1351
1340
|
"""
|
|
1352
1341
|
Helper for cosine_similarity that returns a fragment that produces an
|
|
@@ -3935,17 +3924,31 @@ class Graph():
|
|
|
3935
3924
|
|
|
3936
3925
|
|
|
3937
3926
|
@include_in_docs
|
|
3938
|
-
def triangle(self):
|
|
3927
|
+
def triangle(self, *, full: Optional[bool] = None):
|
|
3939
3928
|
"""Returns a ternary relationship containing all triangles in the graph.
|
|
3940
3929
|
|
|
3941
3930
|
Unlike `unique_triangle`, this relationship contains all permutations
|
|
3942
3931
|
of the nodes for each triangle found.
|
|
3943
3932
|
|
|
3933
|
+
Parameters
|
|
3934
|
+
----------
|
|
3935
|
+
full : bool, optional
|
|
3936
|
+
If ``True``, computes triangles for all triplets of nodes in the graph.
|
|
3937
|
+
This computation can be expensive for large graphs. Must be set to ``True``
|
|
3938
|
+
to compute the full triangle relationship.
|
|
3939
|
+
Default is ``None``.
|
|
3940
|
+
|
|
3944
3941
|
Returns
|
|
3945
3942
|
-------
|
|
3946
3943
|
Relationship
|
|
3947
3944
|
A ternary relationship where each tuple represents a triangle.
|
|
3948
3945
|
|
|
3946
|
+
Raises
|
|
3947
|
+
------
|
|
3948
|
+
ValueError
|
|
3949
|
+
If ``full`` is not provided.
|
|
3950
|
+
If ``full`` is not ``True``.
|
|
3951
|
+
|
|
3949
3952
|
Relationship Schema
|
|
3950
3953
|
-------------------
|
|
3951
3954
|
``triangle(node_a, node_b, node_c)``
|
|
@@ -3985,7 +3988,7 @@ class Graph():
|
|
|
3985
3988
|
>>>
|
|
3986
3989
|
>>> # 3. Select all triangles and inspect
|
|
3987
3990
|
>>> a,b,c = Node.ref("a"), Node.ref("b"), Node.ref("c")
|
|
3988
|
-
>>> triangle = graph.triangle()
|
|
3991
|
+
>>> triangle = graph.triangle(full=True)
|
|
3989
3992
|
>>> select(a.id, b.id, c.id).where(triangle(a, b, c)).inspect()
|
|
3990
3993
|
▰▰▰▰ Setup complete
|
|
3991
3994
|
id id2 id3
|
|
@@ -4014,7 +4017,7 @@ class Graph():
|
|
|
4014
4017
|
>>>
|
|
4015
4018
|
>>> # 3. Select all triangles and inspect
|
|
4016
4019
|
>>> a,b,c = Node.ref("a"), Node.ref("b"), Node.ref("c")
|
|
4017
|
-
>>> triangle = graph.triangle()
|
|
4020
|
+
>>> triangle = graph.triangle(full=True)
|
|
4018
4021
|
>>> select(a.id, b.id, c.id).where(triangle(a, b, c)).inspect()
|
|
4019
4022
|
▰▰▰▰ Setup complete
|
|
4020
4023
|
id id2 id3
|
|
@@ -4032,15 +4035,22 @@ class Graph():
|
|
|
4032
4035
|
triangle_count
|
|
4033
4036
|
|
|
4034
4037
|
"""
|
|
4035
|
-
|
|
4036
|
-
|
|
4037
|
-
|
|
4038
|
-
"
|
|
4039
|
-
"
|
|
4040
|
-
|
|
4041
|
-
|
|
4042
|
-
|
|
4043
|
-
|
|
4038
|
+
# Validate full parameter
|
|
4039
|
+
if full is None:
|
|
4040
|
+
raise ValueError(
|
|
4041
|
+
"Computing triangle for all triplets can be expensive. To confirm "
|
|
4042
|
+
"that you would like to compute the full triangle relationship, "
|
|
4043
|
+
"please call `triangle(full=True)`. "
|
|
4044
|
+
"(Domain constraints are not available for `triangle` at this time. "
|
|
4045
|
+
"If you need domain constraints for `triangle`, please reach out.)"
|
|
4046
|
+
)
|
|
4047
|
+
|
|
4048
|
+
if full is not True:
|
|
4049
|
+
raise ValueError(
|
|
4050
|
+
f"Invalid value (`{full}`) for 'full' parameter. Use `full=True` "
|
|
4051
|
+
"to compute the full triangle relationship. "
|
|
4052
|
+
)
|
|
4053
|
+
|
|
4044
4054
|
return self._triangle
|
|
4045
4055
|
|
|
4046
4056
|
@cached_property
|
|
@@ -4067,15 +4077,29 @@ class Graph():
|
|
|
4067
4077
|
|
|
4068
4078
|
|
|
4069
4079
|
@include_in_docs
|
|
4070
|
-
def unique_triangle(self):
|
|
4080
|
+
def unique_triangle(self, *, full: Optional[bool] = None):
|
|
4071
4081
|
"""Returns a ternary relationship containing all unique triangles in the graph.
|
|
4072
4082
|
|
|
4083
|
+
Parameters
|
|
4084
|
+
----------
|
|
4085
|
+
full : bool, optional
|
|
4086
|
+
If ``True``, computes unique triangles for all triplets of nodes in the graph.
|
|
4087
|
+
This computation can be expensive for large graphs. Must be set to ``True``
|
|
4088
|
+
to compute the full unique_triangle relationship.
|
|
4089
|
+
Default is ``None``.
|
|
4090
|
+
|
|
4073
4091
|
Returns
|
|
4074
4092
|
-------
|
|
4075
4093
|
Relationship
|
|
4076
4094
|
A ternary relationship where each tuple represents a unique
|
|
4077
4095
|
triangle.
|
|
4078
4096
|
|
|
4097
|
+
Raises
|
|
4098
|
+
------
|
|
4099
|
+
ValueError
|
|
4100
|
+
If ``full`` is not provided.
|
|
4101
|
+
If ``full`` is not ``True``.
|
|
4102
|
+
|
|
4079
4103
|
Relationship Schema
|
|
4080
4104
|
-------------------
|
|
4081
4105
|
``unique_triangle(node_a, node_b, node_c)``
|
|
@@ -4132,7 +4156,7 @@ class Graph():
|
|
|
4132
4156
|
>>>
|
|
4133
4157
|
>>> # 3. Select the unique triangles and inspect
|
|
4134
4158
|
>>> a,b,c = Node.ref("a"), Node.ref("b"), Node.ref("c")
|
|
4135
|
-
>>> unique_triangle = graph.unique_triangle()
|
|
4159
|
+
>>> unique_triangle = graph.unique_triangle(full=True)
|
|
4136
4160
|
>>> select(a.id, b.id, c.id).where(unique_triangle(a, b, c)).inspect()
|
|
4137
4161
|
▰▰▰▰ Setup complete
|
|
4138
4162
|
id id2 id3
|
|
@@ -4161,7 +4185,7 @@ class Graph():
|
|
|
4161
4185
|
>>>
|
|
4162
4186
|
>>> # 3. Select the unique triangles and inspect
|
|
4163
4187
|
>>> a,b,c = Node.ref("a"), Node.ref("b"), Node.ref("c")
|
|
4164
|
-
>>> unique_triangle = graph.unique_triangle()
|
|
4188
|
+
>>> unique_triangle = graph.unique_triangle(full=True)
|
|
4165
4189
|
>>> select(a.id, b.id, c.id).where(unique_triangle(a, b, c)).inspect()
|
|
4166
4190
|
▰▰▰▰ Setup complete
|
|
4167
4191
|
id id2 id3
|
|
@@ -4175,15 +4199,22 @@ class Graph():
|
|
|
4175
4199
|
triangle_count
|
|
4176
4200
|
|
|
4177
4201
|
"""
|
|
4178
|
-
|
|
4179
|
-
|
|
4180
|
-
|
|
4181
|
-
"
|
|
4182
|
-
"
|
|
4183
|
-
|
|
4184
|
-
|
|
4185
|
-
|
|
4186
|
-
|
|
4202
|
+
# Validate full parameter
|
|
4203
|
+
if full is None:
|
|
4204
|
+
raise ValueError(
|
|
4205
|
+
"Computing unique_triangle for all triplets can be expensive. To confirm "
|
|
4206
|
+
"that you would like to compute the full unique_triangle relationship, "
|
|
4207
|
+
"please call `unique_triangle(full=True)`. "
|
|
4208
|
+
"(Domain constraints are not available for `unique_triangle` at this time. "
|
|
4209
|
+
"If you need domain constraints for `unique_triangle`, please reach out.)"
|
|
4210
|
+
)
|
|
4211
|
+
|
|
4212
|
+
if full is not True:
|
|
4213
|
+
raise ValueError(
|
|
4214
|
+
f"Invalid value (`{full}`) for 'full' parameter. Use `full=True` "
|
|
4215
|
+
"to compute the full unique_triangle relationship."
|
|
4216
|
+
)
|
|
4217
|
+
|
|
4187
4218
|
return self._unique_triangle
|
|
4188
4219
|
|
|
4189
4220
|
@cached_property
|
|
@@ -5592,18 +5623,71 @@ class Graph():
|
|
|
5592
5623
|
|
|
5593
5624
|
|
|
5594
5625
|
@include_in_docs
|
|
5595
|
-
def jaccard_similarity(
|
|
5596
|
-
|
|
5626
|
+
def jaccard_similarity(
|
|
5627
|
+
self,
|
|
5628
|
+
*,
|
|
5629
|
+
full: Optional[bool] = None,
|
|
5630
|
+
from_: Optional[Relationship] = None,
|
|
5631
|
+
to: Optional[Relationship] = None,
|
|
5632
|
+
between: Optional[Relationship] = None,
|
|
5633
|
+
):
|
|
5634
|
+
"""Returns a ternary relationship containing
|
|
5635
|
+
the Jaccard similarity for pairs of nodes.
|
|
5597
5636
|
|
|
5598
5637
|
The Jaccard similarity is a measure between two nodes that ranges from
|
|
5599
5638
|
0.0 to 1.0, where higher values indicate greater similarity.
|
|
5600
5639
|
|
|
5640
|
+
Parameters
|
|
5641
|
+
----------
|
|
5642
|
+
full : bool, optional
|
|
5643
|
+
If ``True``, computes the Jaccard similarity for all pairs
|
|
5644
|
+
of nodes in the graph. This computation can be expensive for large graphs,
|
|
5645
|
+
as the result can scale quadratically in the number of nodes. Mutually exclusive
|
|
5646
|
+
with other parameters.
|
|
5647
|
+
Default is ``None``.
|
|
5648
|
+
from_ : Relationship, optional
|
|
5649
|
+
A unary relationship containing a subset of the graph's nodes. When
|
|
5650
|
+
provided, constrains the domain of the Jaccard similarity computation: only
|
|
5651
|
+
Jaccard similarity scores for node pairs where the first node is
|
|
5652
|
+
in this relationship are computed and returned. Mutually exclusive with
|
|
5653
|
+
``full`` and ``between``.
|
|
5654
|
+
Default is ``None``.
|
|
5655
|
+
to : Relationship, optional
|
|
5656
|
+
A unary relationship containing a subset of the graph's nodes. Can only
|
|
5657
|
+
be used together with the ``from_`` parameter. When provided with ``from_``,
|
|
5658
|
+
constrains the domain of the Jaccard similarity computation: only
|
|
5659
|
+
Jaccard similarity scores for node pairs where the first node is
|
|
5660
|
+
in ``from_`` and the second node is in ``to`` are computed and returned.
|
|
5661
|
+
Default is ``None``.
|
|
5662
|
+
between : Relationship, optional
|
|
5663
|
+
A binary relationship containing pairs of nodes. When provided,
|
|
5664
|
+
constrains the domain of the Jaccard similarity computation: only
|
|
5665
|
+
Jaccard similarity scores for the specific node pairs in
|
|
5666
|
+
this relationship are computed and returned. Mutually exclusive
|
|
5667
|
+
with other parameters.
|
|
5668
|
+
Default is ``None``.
|
|
5669
|
+
|
|
5601
5670
|
Returns
|
|
5602
5671
|
-------
|
|
5603
5672
|
Relationship
|
|
5604
5673
|
A ternary relationship where each tuple represents a pair of nodes
|
|
5605
5674
|
and their Jaccard similarity.
|
|
5606
5675
|
|
|
5676
|
+
Raises
|
|
5677
|
+
------
|
|
5678
|
+
ValueError
|
|
5679
|
+
If ``full`` is provided with any other parameter.
|
|
5680
|
+
If ``between`` is provided with any other parameter.
|
|
5681
|
+
If ``from_`` is provided with any parameter other than ``to``.
|
|
5682
|
+
If none of ``full``, ``from_``, or ``between`` is provided.
|
|
5683
|
+
If ``full`` is not ``True`` or ``None``.
|
|
5684
|
+
AssertionError
|
|
5685
|
+
If ``from_``, ``to``, or ``between`` is not a ``Relationship``.
|
|
5686
|
+
If ``from_``, ``to``, or ``between`` is not attached to the same model as the graph.
|
|
5687
|
+
If ``from_``, ``to``, or ``between`` does not contain the graph's ``Node`` concept.
|
|
5688
|
+
If ``from_`` or ``to`` is not a unary relationship.
|
|
5689
|
+
If ``between`` is not a binary relationship.
|
|
5690
|
+
|
|
5607
5691
|
Relationship Schema
|
|
5608
5692
|
-------------------
|
|
5609
5693
|
``jaccard_similarity(node_u, node_v, score)``
|
|
@@ -5652,6 +5736,40 @@ class Graph():
|
|
|
5652
5736
|
The weighted Jaccard similarity between node 1 and 2 is then:
|
|
5653
5737
|
`0.46 / (1.6 + 1.6 + 1.4) = 0.1`.
|
|
5654
5738
|
|
|
5739
|
+
Edge weights are assumed to be non-negative, so the neighborhood
|
|
5740
|
+
vectors contain only non-negative elements. Therefore, the Jaccard
|
|
5741
|
+
similarity score is always between 0.0 and 1.0, inclusive.
|
|
5742
|
+
|
|
5743
|
+
The ``jaccard_similarity(full=True)`` method computes and caches
|
|
5744
|
+
the full Jaccard similarity relationship for all pairs of nodes,
|
|
5745
|
+
providing efficient reuse across multiple calls. This can be expensive
|
|
5746
|
+
as the result can contain O(|V|²) tuples.
|
|
5747
|
+
|
|
5748
|
+
Calling ``jaccard_similarity()`` without arguments raises a ``ValueError``,
|
|
5749
|
+
to ensure awareness and explicit acknowledgement (``full=True``) of this cost.
|
|
5750
|
+
|
|
5751
|
+
In contrast, ``jaccard_similarity(from_=subset)`` constrains the computation to
|
|
5752
|
+
tuples with the first position in the passed-in ``subset``. The result is
|
|
5753
|
+
not cached; it is specific to the call site. When a significant fraction of
|
|
5754
|
+
the Jaccard similarity relation is needed across a program,
|
|
5755
|
+
``jaccard_similarity(full=True)`` is typically more efficient. Use
|
|
5756
|
+
``jaccard_similarity(from_=subset)`` only when small subsets of
|
|
5757
|
+
the Jaccard similarity relationship are needed
|
|
5758
|
+
collectively across the program.
|
|
5759
|
+
|
|
5760
|
+
The ``to`` parameter can be used together with ``from_`` to further
|
|
5761
|
+
constrain the computation: ``jaccard_similarity(from_=subset_a, to=subset_b)``
|
|
5762
|
+
computes Jaccard similarity scores only for node pairs where the first node is in
|
|
5763
|
+
``subset_a`` and the second node is in ``subset_b``. (Since ``jaccard_similarity``
|
|
5764
|
+
is symmetric in its first two positions, using ``to`` without ``from_`` would
|
|
5765
|
+
be functionally redundant, and is not allowed.)
|
|
5766
|
+
|
|
5767
|
+
The ``between`` parameter provides another way to constrain the computation.
|
|
5768
|
+
Unlike ``from_`` and ``to``, which allow you to independently constrain the first
|
|
5769
|
+
and second positions in ``jaccard_similarity`` tuples to sets of nodes, ``between``
|
|
5770
|
+
allows you constrain the first and second positions, jointly, to specific pairs
|
|
5771
|
+
of nodes.
|
|
5772
|
+
|
|
5655
5773
|
Examples
|
|
5656
5774
|
--------
|
|
5657
5775
|
**Unweighted Graph Examples**
|
|
@@ -5673,8 +5791,8 @@ class Graph():
|
|
|
5673
5791
|
... Edge.new(src=n4, dst=n3),
|
|
5674
5792
|
... )
|
|
5675
5793
|
>>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
|
|
5676
|
-
>>>
|
|
5677
|
-
>>> select(score).where(
|
|
5794
|
+
>>> jaccard_similarity = graph.jaccard_similarity(full=True)
|
|
5795
|
+
>>> select(score).where(jaccard_similarity(u, v, score), u.id == 2, v.id == 4).inspect()
|
|
5678
5796
|
▰▰▰▰ Setup complete
|
|
5679
5797
|
score
|
|
5680
5798
|
0 0.25
|
|
@@ -5696,8 +5814,8 @@ class Graph():
|
|
|
5696
5814
|
... Edge.new(src=n4, dst=n3),
|
|
5697
5815
|
... )
|
|
5698
5816
|
>>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
|
|
5699
|
-
>>>
|
|
5700
|
-
>>> select(score).where(
|
|
5817
|
+
>>> jaccard_similarity = graph.jaccard_similarity(full=True)
|
|
5818
|
+
>>> select(score).where(jaccard_similarity(u, v, score), u.id == 2, v.id == 4).inspect()
|
|
5701
5819
|
▰▰▰▰ Setup complete
|
|
5702
5820
|
score
|
|
5703
5821
|
0 0.5
|
|
@@ -5724,12 +5842,57 @@ class Graph():
|
|
|
5724
5842
|
>>>
|
|
5725
5843
|
>>> # 3. Select the weighted Jaccard similarity for the pair (1, 2)
|
|
5726
5844
|
>>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
|
|
5727
|
-
>>>
|
|
5728
|
-
>>> select(score).where(
|
|
5845
|
+
>>> jaccard_similarity = graph.jaccard_similarity(full=True)
|
|
5846
|
+
>>> select(score).where(jaccard_similarity(u, v, score), u.id == 1, v.id == 2).inspect()
|
|
5729
5847
|
▰▰▰▰ Setup complete
|
|
5730
5848
|
score
|
|
5731
5849
|
0 0.1
|
|
5732
5850
|
|
|
5851
|
+
**Domain Constraint Examples**
|
|
5852
|
+
|
|
5853
|
+
>>> # Use 'from_' parameter to constrain the set of nodes for the first position
|
|
5854
|
+
>>> # Using the same undirected unweighted graph from above
|
|
5855
|
+
>>> from relationalai.semantics import where
|
|
5856
|
+
>>> subset = model.Relationship(f"{{node:{Node}}} is in subset")
|
|
5857
|
+
>>> node = Node.ref()
|
|
5858
|
+
>>> where(node.id == 2).define(subset(node))
|
|
5859
|
+
>>>
|
|
5860
|
+
>>> # Get Jaccard similarity scores only for pairs where first node is in subset
|
|
5861
|
+
>>> constrained_jaccard_similarity = graph.jaccard_similarity(from_=subset)
|
|
5862
|
+
>>> select(u.id, v.id, score).where(constrained_jaccard_similarity(u, v, score)).inspect()
|
|
5863
|
+
▰▰▰▰ Setup complete
|
|
5864
|
+
id id2 score
|
|
5865
|
+
0 2 2 1.00
|
|
5866
|
+
1 2 3 0.50
|
|
5867
|
+
2 2 4 0.25
|
|
5868
|
+
|
|
5869
|
+
>>> # Use both 'from_' and 'to' parameters to constrain both positions
|
|
5870
|
+
>>> from_subset = model.Relationship(f"{{node:{Node}}} is in from_subset")
|
|
5871
|
+
>>> to_subset = model.Relationship(f"{{node:{Node}}} is in to_subset")
|
|
5872
|
+
>>> where(node.id == 2).define(from_subset(node))
|
|
5873
|
+
>>> where(node.id == 4).define(to_subset(node))
|
|
5874
|
+
>>>
|
|
5875
|
+
>>> # Get Jaccard similarity scores only where first node is in from_subset and second node is in to_subset
|
|
5876
|
+
>>> constrained_jaccard_similarity = graph.jaccard_similarity(from_=from_subset, to=to_subset)
|
|
5877
|
+
>>> select(u.id, v.id, score).where(constrained_jaccard_similarity(u, v, score)).inspect()
|
|
5878
|
+
▰▰▰▰ Setup complete
|
|
5879
|
+
id id2 score
|
|
5880
|
+
0 2 4 0.25
|
|
5881
|
+
|
|
5882
|
+
>>> # Use 'between' parameter to constrain to specific pairs of nodes
|
|
5883
|
+
>>> pairs = model.Relationship(f"{{node_a:{Node}}} and {{node_b:{Node}}} are a pair")
|
|
5884
|
+
>>> node_a, node_b = Node.ref(), Node.ref()
|
|
5885
|
+
>>> where(node_a.id == 2, node_b.id == 4).define(pairs(node_a, node_b))
|
|
5886
|
+
>>> where(node_a.id == 3, node_b.id == 4).define(pairs(node_a, node_b))
|
|
5887
|
+
>>>
|
|
5888
|
+
>>> # Get Jaccard similarity scores only for the specific pairs (2, 4) and (3, 4)
|
|
5889
|
+
>>> constrained_jaccard_similarity = graph.jaccard_similarity(between=pairs)
|
|
5890
|
+
>>> select(u.id, v.id, score).where(constrained_jaccard_similarity(u, v, score)).inspect()
|
|
5891
|
+
▰▰▰▰ Setup complete
|
|
5892
|
+
id id2 score
|
|
5893
|
+
0 2 4 0.25
|
|
5894
|
+
1 3 4 0.50
|
|
5895
|
+
|
|
5733
5896
|
References
|
|
5734
5897
|
----------
|
|
5735
5898
|
Frigo M, Cruciani E, Coudert D, Deriche R, Natale E, Deslauriers-Gauthier S.
|
|
@@ -5738,57 +5901,242 @@ class Graph():
|
|
|
5738
5901
|
doi: 10.1162/netn_a_00199. PMID: 34746624; PMCID: PMC8567827.
|
|
5739
5902
|
|
|
5740
5903
|
"""
|
|
5741
|
-
|
|
5742
|
-
|
|
5743
|
-
|
|
5744
|
-
"of all pairs of nodes of the graph. To provide better control over "
|
|
5745
|
-
"the computed subset, `jaccard_similarity`'s interface will soon "
|
|
5746
|
-
"need to change."
|
|
5747
|
-
),
|
|
5748
|
-
FutureWarning,
|
|
5749
|
-
stacklevel=2
|
|
5904
|
+
# Validate domain constraint parameters.
|
|
5905
|
+
self._validate_domain_constraint_parameters(
|
|
5906
|
+
'jaccard_similarity', full, from_, to, between
|
|
5750
5907
|
)
|
|
5908
|
+
|
|
5909
|
+
# At this point, exactly one of `full`, `from_`, or `between`
|
|
5910
|
+
# has been provided, and if `to` is provided, `from_` is also provided.
|
|
5911
|
+
|
|
5912
|
+
# Handle `between`.
|
|
5913
|
+
if between is not None:
|
|
5914
|
+
self._validate_pair_subset_parameter(between)
|
|
5915
|
+
return self._jaccard_similarity_between(between)
|
|
5916
|
+
|
|
5917
|
+
# Handle `from_` (and potentially `to`).
|
|
5918
|
+
if from_ is not None:
|
|
5919
|
+
self._validate_node_subset_parameter('from_', from_)
|
|
5920
|
+
if to is not None:
|
|
5921
|
+
self._validate_node_subset_parameter('to', to)
|
|
5922
|
+
return self._jaccard_similarity_from_to(from_, to)
|
|
5923
|
+
return self._jaccard_similarity_from(from_)
|
|
5924
|
+
|
|
5925
|
+
# Handle `full`.
|
|
5751
5926
|
return self._jaccard_similarity
|
|
5752
5927
|
|
|
5753
5928
|
@cached_property
|
|
5754
5929
|
def _jaccard_similarity(self):
|
|
5755
|
-
"""Lazily define and cache the
|
|
5756
|
-
_jaccard_similarity_rel = self.
|
|
5930
|
+
"""Lazily define and cache the full jaccard_similarity relationship."""
|
|
5931
|
+
_jaccard_similarity_rel = self._create_jaccard_similarity_relationship()
|
|
5757
5932
|
_jaccard_similarity_rel.annotate(annotations.track("graphs", "jaccard_similarity"))
|
|
5933
|
+
return _jaccard_similarity_rel
|
|
5758
5934
|
|
|
5759
|
-
|
|
5760
|
-
|
|
5761
|
-
|
|
5762
|
-
|
|
5763
|
-
|
|
5764
|
-
|
|
5765
|
-
|
|
5766
|
-
|
|
5767
|
-
|
|
5768
|
-
|
|
5935
|
+
def _jaccard_similarity_from(self, node_subset_from: Relationship):
|
|
5936
|
+
"""
|
|
5937
|
+
Create a jaccard_similarity relationship, with the first position in each
|
|
5938
|
+
tuple constrained to be in the given subset of nodes. Note this relationship
|
|
5939
|
+
is not cached; it is specific to the callsite.
|
|
5940
|
+
"""
|
|
5941
|
+
_jaccard_similarity_rel = self._create_jaccard_similarity_relationship(
|
|
5942
|
+
node_subset_from=node_subset_from
|
|
5943
|
+
)
|
|
5944
|
+
_jaccard_similarity_rel.annotate(annotations.track("graphs", "jaccard_similarity_from"))
|
|
5945
|
+
return _jaccard_similarity_rel
|
|
5946
|
+
|
|
5947
|
+
def _jaccard_similarity_from_to(self, node_subset_from: Relationship, node_subset_to: Relationship):
|
|
5948
|
+
"""
|
|
5949
|
+
Create a jaccard_similarity relationship, with the first position in each
|
|
5950
|
+
tuple constrained to be in `node_subset_from`, and the second position in
|
|
5951
|
+
each tuple constrained to be in `node_subset_to`. Note this relationship
|
|
5952
|
+
is not cached; it is specific to the callsite.
|
|
5953
|
+
"""
|
|
5954
|
+
_jaccard_similarity_rel = self._create_jaccard_similarity_relationship(
|
|
5955
|
+
node_subset_from=node_subset_from,
|
|
5956
|
+
node_subset_to=node_subset_to
|
|
5957
|
+
)
|
|
5958
|
+
_jaccard_similarity_rel.annotate(annotations.track("graphs", "jaccard_similarity_from_to"))
|
|
5959
|
+
return _jaccard_similarity_rel
|
|
5960
|
+
|
|
5961
|
+
def _jaccard_similarity_between(self, pair_subset_between: Relationship):
|
|
5962
|
+
"""
|
|
5963
|
+
Create a jaccard_similarity relationship, with the first and second position
|
|
5964
|
+
in each tuple jointly constrained to be in the given set of pairs
|
|
5965
|
+
of nodes. Note this relationship is not cached;
|
|
5966
|
+
it is specific to the callsite.
|
|
5967
|
+
"""
|
|
5968
|
+
_jaccard_similarity_rel = self._create_jaccard_similarity_relationship(
|
|
5969
|
+
pair_subset_between=pair_subset_between
|
|
5970
|
+
)
|
|
5971
|
+
_jaccard_similarity_rel.annotate(annotations.track("graphs", "jaccard_similarity_between"))
|
|
5972
|
+
return _jaccard_similarity_rel
|
|
5973
|
+
|
|
5974
|
+
def _create_jaccard_similarity_relationship(
|
|
5975
|
+
self,
|
|
5976
|
+
*,
|
|
5977
|
+
node_subset_from: Optional[Relationship] = None,
|
|
5978
|
+
node_subset_to: Optional[Relationship] = None,
|
|
5979
|
+
pair_subset_between: Optional[Relationship] = None,
|
|
5980
|
+
):
|
|
5981
|
+
"""
|
|
5982
|
+
Create jaccard_similarity relationship, optionally constrained by
|
|
5983
|
+
the provided node subsets or pair subset.
|
|
5984
|
+
"""
|
|
5985
|
+
_jaccard_similarity_rel = self._model.Relationship(
|
|
5986
|
+
f"{{node_u:{self._NodeConceptStr}}} has a Jaccard similarity to "
|
|
5987
|
+
f"{{node_v:{self._NodeConceptStr}}} of {{score:Float}}"
|
|
5988
|
+
)
|
|
5989
|
+
|
|
5990
|
+
# Branch by case to select appropriate count_outneighbor,
|
|
5991
|
+
# outneighbor, and weighted_outdegree relationships, and build
|
|
5992
|
+
# appropriate constraints on the domain of the nodes.
|
|
5993
|
+
node_u, node_v = self.Node.ref(), self.Node.ref()
|
|
5994
|
+
|
|
5995
|
+
# TODO: Optimization opportunity. In a number of branches below,
|
|
5996
|
+
# we compute _count_outneighbor_of, which transitively computes
|
|
5997
|
+
# _outneighbor_of, and then compute _outneighbor_of directly;
|
|
5998
|
+
# the present code structure makes this a developer-time-efficient
|
|
5999
|
+
# way to get this off the ground, but of course involves redundant
|
|
6000
|
+
# work. In future this redundant work could be eliminated.
|
|
6001
|
+
|
|
6002
|
+
# Handle the `between` case.
|
|
6003
|
+
if pair_subset_between is not None:
|
|
6004
|
+
# Extract first-position and second-position nodes.
|
|
6005
|
+
first_position_subset = self._model.Relationship(f"{{node:{self._NodeConceptStr}}}")
|
|
6006
|
+
second_position_subset = self._model.Relationship(f"{{node:{self._NodeConceptStr}}}")
|
|
6007
|
+
node_x, node_y = self.Node.ref(), self.Node.ref()
|
|
6008
|
+
where(
|
|
6009
|
+
pair_subset_between(node_x, node_y)
|
|
6010
|
+
).define(
|
|
6011
|
+
first_position_subset(node_x),
|
|
6012
|
+
second_position_subset(node_y)
|
|
6013
|
+
)
|
|
6014
|
+
|
|
6015
|
+
if not self.weighted:
|
|
6016
|
+
count_outneighbor_u_rel = self._count_outneighbor_of(first_position_subset)
|
|
6017
|
+
count_outneighbor_v_rel = self._count_outneighbor_of(second_position_subset)
|
|
6018
|
+
outneighbor_u_rel = self._outneighbor_of(first_position_subset)
|
|
6019
|
+
outneighbor_v_rel = self._outneighbor_of(second_position_subset)
|
|
6020
|
+
else: # self.weighted
|
|
6021
|
+
weighted_outdegree_u_rel = self._weighted_outdegree_of(first_position_subset)
|
|
6022
|
+
weighted_outdegree_v_rel = self._weighted_outdegree_of(second_position_subset)
|
|
6023
|
+
|
|
6024
|
+
node_constraints = [pair_subset_between(node_u, node_v)]
|
|
6025
|
+
|
|
6026
|
+
# Handle the `from_` case.
|
|
6027
|
+
elif node_subset_from is not None and node_subset_to is None:
|
|
6028
|
+
if not self.weighted:
|
|
6029
|
+
count_outneighbor_u_rel = self._count_outneighbor_of(node_subset_from)
|
|
6030
|
+
count_outneighbor_v_rel = self._count_outneighbor
|
|
6031
|
+
outneighbor_u_rel = self._outneighbor_of(node_subset_from)
|
|
6032
|
+
outneighbor_v_rel = self._outneighbor
|
|
6033
|
+
else: # self.weighted
|
|
6034
|
+
weighted_outdegree_u_rel = self._weighted_outdegree_of(node_subset_from)
|
|
6035
|
+
weighted_outdegree_v_rel = self._weighted_outdegree
|
|
6036
|
+
|
|
6037
|
+
# TODO: Implement depth-two traversal strategy for better performance.
|
|
6038
|
+
# See similar comments on related similarity metrics.
|
|
6039
|
+
|
|
6040
|
+
node_constraints = [node_subset_from(node_u)]
|
|
6041
|
+
|
|
6042
|
+
# Handle the `from_`/`to` case.
|
|
6043
|
+
elif node_subset_from is not None and node_subset_to is not None:
|
|
6044
|
+
# Check for object identity optimization.
|
|
6045
|
+
if node_subset_from is node_subset_to:
|
|
6046
|
+
if not self.weighted:
|
|
6047
|
+
count_outneighbor_u_rel = self._count_outneighbor_of(node_subset_from)
|
|
6048
|
+
count_outneighbor_v_rel = count_outneighbor_u_rel
|
|
6049
|
+
outneighbor_u_rel = self._outneighbor_of(node_subset_from)
|
|
6050
|
+
outneighbor_v_rel = outneighbor_u_rel
|
|
6051
|
+
else: # self.weighted
|
|
6052
|
+
weighted_outdegree_u_rel = self._weighted_outdegree_of(node_subset_from)
|
|
6053
|
+
weighted_outdegree_v_rel = weighted_outdegree_u_rel
|
|
6054
|
+
else:
|
|
6055
|
+
if not self.weighted:
|
|
6056
|
+
count_outneighbor_u_rel = self._count_outneighbor_of(node_subset_from)
|
|
6057
|
+
count_outneighbor_v_rel = self._count_outneighbor_of(node_subset_to)
|
|
6058
|
+
outneighbor_u_rel = self._outneighbor_of(node_subset_from)
|
|
6059
|
+
outneighbor_v_rel = self._outneighbor_of(node_subset_to)
|
|
6060
|
+
else: # self.weighted
|
|
6061
|
+
weighted_outdegree_u_rel = self._weighted_outdegree_of(node_subset_from)
|
|
6062
|
+
weighted_outdegree_v_rel = self._weighted_outdegree_of(node_subset_to)
|
|
6063
|
+
|
|
6064
|
+
node_constraints = [node_subset_from(node_u), node_subset_to(node_v)]
|
|
6065
|
+
|
|
6066
|
+
# Handle the `full` case.
|
|
5769
6067
|
else:
|
|
5770
|
-
|
|
6068
|
+
if not self.weighted:
|
|
6069
|
+
count_outneighbor_u_rel = self._count_outneighbor
|
|
6070
|
+
count_outneighbor_v_rel = self._count_outneighbor
|
|
6071
|
+
outneighbor_u_rel = self._outneighbor
|
|
6072
|
+
outneighbor_v_rel = self._outneighbor
|
|
6073
|
+
else: # self.weighted
|
|
6074
|
+
weighted_outdegree_u_rel = self._weighted_outdegree
|
|
6075
|
+
weighted_outdegree_v_rel = self._weighted_outdegree
|
|
6076
|
+
|
|
6077
|
+
node_constraints = []
|
|
6078
|
+
|
|
6079
|
+
# Define Jaccard similarity logic for weighted and unweighted cases.
|
|
6080
|
+
if not self.weighted:
|
|
6081
|
+
num_u_outneigbor, num_v_outneigbor = Integer.ref(), Integer.ref()
|
|
6082
|
+
common_outneighbor_node = self.Node.ref()
|
|
6083
|
+
num_union_outneighbors = Integer.ref()
|
|
6084
|
+
score = Float.ref()
|
|
5771
6085
|
|
|
6086
|
+
where(
|
|
6087
|
+
*node_constraints,
|
|
6088
|
+
count_outneighbor_u_rel(node_u, num_u_outneigbor), # type: ignore[possibly-unbound]
|
|
6089
|
+
count_outneighbor_v_rel(node_v, num_v_outneigbor), # type: ignore[possibly-unbound]
|
|
6090
|
+
num_common_outneighbor := count(common_outneighbor_node).per(node_u, node_v).where(
|
|
6091
|
+
outneighbor_u_rel(node_u, common_outneighbor_node), # type: ignore[possibly-unbound]
|
|
6092
|
+
outneighbor_v_rel(node_v, common_outneighbor_node), # type: ignore[possibly-unbound]
|
|
6093
|
+
),
|
|
6094
|
+
num_union_outneighbors := num_u_outneigbor + num_v_outneigbor - num_common_outneighbor,
|
|
6095
|
+
score := num_common_outneighbor / num_union_outneighbors,
|
|
6096
|
+
).define(
|
|
6097
|
+
_jaccard_similarity_rel(node_u, node_v, score)
|
|
6098
|
+
)
|
|
6099
|
+
else:
|
|
5772
6100
|
# (1) The numerator: For every node `k` in the graph, find the minimum weight of
|
|
5773
6101
|
# the out-edges from `u` and `v` to `k`, and sum those minimum weights.
|
|
5774
6102
|
|
|
5775
6103
|
# Note that for any node `k` that is not a common out-neighbor of nodes `u` and `v`,
|
|
5776
6104
|
# the minimum weight of the out-edges from `u` and `v` to `k` is zero/empty,
|
|
5777
6105
|
# so the sum here reduces to a sum over the common out-neighbors of `u` and `v`.
|
|
5778
|
-
min_weight_to_common_outneighbor = self._model.Relationship(
|
|
6106
|
+
min_weight_to_common_outneighbor = self._model.Relationship(
|
|
6107
|
+
f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} "
|
|
6108
|
+
f"have common outneighbor {{node_k:{self._NodeConceptStr}}} "
|
|
6109
|
+
f"with minimum weight {{minweight:Float}}"
|
|
6110
|
+
)
|
|
5779
6111
|
|
|
5780
|
-
|
|
5781
|
-
w = union(
|
|
5782
|
-
|
|
5783
|
-
|
|
5784
|
-
|
|
5785
|
-
|
|
6112
|
+
node_k, w1, w2 = self.Node.ref(), Float.ref(), Float.ref()
|
|
6113
|
+
w = union(
|
|
6114
|
+
where(self._weight(node_u, node_k, w1)).select(w1),
|
|
6115
|
+
where(self._weight(node_v, node_k, w2)).select(w2)
|
|
6116
|
+
)
|
|
6117
|
+
where(
|
|
6118
|
+
*node_constraints,
|
|
6119
|
+
self._edge(node_u, node_k),
|
|
6120
|
+
self._edge(node_v, node_k)
|
|
6121
|
+
).define(
|
|
6122
|
+
min_weight_to_common_outneighbor(
|
|
6123
|
+
node_u, node_v, node_k, min(w).per(node_u, node_v, node_k)
|
|
6124
|
+
)
|
|
6125
|
+
)
|
|
5786
6126
|
|
|
5787
|
-
sum_of_min_weights_to_common_outneighbors = self._model.Relationship(
|
|
6127
|
+
sum_of_min_weights_to_common_outneighbors = self._model.Relationship(
|
|
6128
|
+
f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} "
|
|
6129
|
+
f"have a sum of minweights of {{minsum:Float}}"
|
|
6130
|
+
)
|
|
5788
6131
|
|
|
5789
6132
|
minweight = Float.ref()
|
|
5790
|
-
where(
|
|
5791
|
-
|
|
6133
|
+
where(
|
|
6134
|
+
min_weight_to_common_outneighbor(node_u, node_v, node_k, minweight)
|
|
6135
|
+
).define(
|
|
6136
|
+
sum_of_min_weights_to_common_outneighbors(
|
|
6137
|
+
node_u, node_v, sum(node_k, minweight).per(node_u, node_v)
|
|
6138
|
+
)
|
|
6139
|
+
)
|
|
5792
6140
|
|
|
5793
6141
|
# (2) The denominator: For every node `k` in the graph, find the maximum weight of
|
|
5794
6142
|
# the out-edges from `u` and `v` to `k`, and sum those maximum weights.
|
|
@@ -5827,20 +6175,31 @@ class Graph():
|
|
|
5827
6175
|
# self._weighted_outdegree(u) +
|
|
5828
6176
|
# self._weighted_outdegree(v) -
|
|
5829
6177
|
# _sum_of_min_weights_to_common_outneighbors(u, v)
|
|
5830
|
-
sum_of_max_weights_to_other_nodes = self._model.Relationship(
|
|
6178
|
+
sum_of_max_weights_to_other_nodes = self._model.Relationship(
|
|
6179
|
+
f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} "
|
|
6180
|
+
f"have a maxsum of {{maxsum:Float}}"
|
|
6181
|
+
)
|
|
5831
6182
|
|
|
5832
6183
|
u_outdegree, v_outdegree, maxsum, minsum = Float.ref(), Float.ref(), Float.ref(), Float.ref()
|
|
5833
|
-
where(
|
|
5834
|
-
|
|
5835
|
-
|
|
5836
|
-
|
|
5837
|
-
|
|
6184
|
+
where(
|
|
6185
|
+
*node_constraints,
|
|
6186
|
+
weighted_outdegree_u_rel(node_u, u_outdegree), # type: ignore[possibly-unbound]
|
|
6187
|
+
weighted_outdegree_v_rel(node_v, v_outdegree), # type: ignore[possibly-unbound]
|
|
6188
|
+
sum_of_min_weights_to_common_outneighbors(node_u, node_v, minsum),
|
|
6189
|
+
maxsum == u_outdegree + v_outdegree - minsum
|
|
6190
|
+
).define(
|
|
6191
|
+
sum_of_max_weights_to_other_nodes(node_u, node_v, maxsum)
|
|
6192
|
+
)
|
|
5838
6193
|
|
|
6194
|
+
# Combination of (1) and (2) to produce score.
|
|
5839
6195
|
score = Float.ref()
|
|
5840
|
-
where(
|
|
5841
|
-
|
|
5842
|
-
|
|
5843
|
-
|
|
6196
|
+
where(
|
|
6197
|
+
sum_of_min_weights_to_common_outneighbors(node_u, node_v, minsum),
|
|
6198
|
+
sum_of_max_weights_to_other_nodes(node_u, node_v, maxsum),
|
|
6199
|
+
score == minsum / maxsum
|
|
6200
|
+
).define(
|
|
6201
|
+
_jaccard_similarity_rel(node_u, node_v, score)
|
|
6202
|
+
)
|
|
5844
6203
|
|
|
5845
6204
|
return _jaccard_similarity_rel
|
|
5846
6205
|
|
|
@@ -6662,19 +7021,72 @@ class Graph():
|
|
|
6662
7021
|
|
|
6663
7022
|
|
|
6664
7023
|
@include_in_docs
|
|
6665
|
-
def preferential_attachment(
|
|
6666
|
-
|
|
7024
|
+
def preferential_attachment(
|
|
7025
|
+
self,
|
|
7026
|
+
*,
|
|
7027
|
+
full: Optional[bool] = None,
|
|
7028
|
+
from_: Optional[Relationship] = None,
|
|
7029
|
+
to: Optional[Relationship] = None,
|
|
7030
|
+
between: Optional[Relationship] = None,
|
|
7031
|
+
):
|
|
7032
|
+
"""Returns a ternary relationship containing
|
|
7033
|
+
the preferential attachment score for pairs of nodes.
|
|
6667
7034
|
|
|
6668
7035
|
The preferential attachment score between two nodes `u` and `v` is the
|
|
6669
7036
|
number of nodes adjacent to `u` multiplied by the number of nodes
|
|
6670
7037
|
adjacent to `v`.
|
|
6671
7038
|
|
|
7039
|
+
Parameters
|
|
7040
|
+
----------
|
|
7041
|
+
full : bool, optional
|
|
7042
|
+
If ``True``, computes the preferential attachment score for all pairs
|
|
7043
|
+
of nodes in the graph. This computation can be expensive for large graphs,
|
|
7044
|
+
as the result can scale quadratically in the number of nodes. Mutually exclusive
|
|
7045
|
+
with other parameters.
|
|
7046
|
+
Default is ``None``.
|
|
7047
|
+
from_ : Relationship, optional
|
|
7048
|
+
A unary relationship containing a subset of the graph's nodes. When
|
|
7049
|
+
provided, constrains the domain of the preferential attachment computation: only
|
|
7050
|
+
preferential attachment scores for node pairs where the first node is
|
|
7051
|
+
in this relationship are computed and returned. Mutually exclusive with
|
|
7052
|
+
``full`` and ``between``.
|
|
7053
|
+
Default is ``None``.
|
|
7054
|
+
to : Relationship, optional
|
|
7055
|
+
A unary relationship containing a subset of the graph's nodes. Can only
|
|
7056
|
+
be used together with the ``from_`` parameter. When provided with ``from_``,
|
|
7057
|
+
constrains the domain of the preferential attachment computation: only
|
|
7058
|
+
preferential attachment scores for node pairs where the first node is
|
|
7059
|
+
in ``from_`` and the second node is in ``to`` are computed and returned.
|
|
7060
|
+
Default is ``None``.
|
|
7061
|
+
between : Relationship, optional
|
|
7062
|
+
A binary relationship containing pairs of nodes. When provided,
|
|
7063
|
+
constrains the domain of the preferential attachment computation: only
|
|
7064
|
+
preferential attachment scores for the specific node pairs in
|
|
7065
|
+
this relationship are computed and returned. Mutually exclusive
|
|
7066
|
+
with other parameters.
|
|
7067
|
+
Default is ``None``.
|
|
7068
|
+
|
|
6672
7069
|
Returns
|
|
6673
7070
|
-------
|
|
6674
7071
|
Relationship
|
|
6675
7072
|
A ternary relationship where each tuple represents a pair of nodes
|
|
6676
7073
|
and their preferential attachment score.
|
|
6677
7074
|
|
|
7075
|
+
Raises
|
|
7076
|
+
------
|
|
7077
|
+
ValueError
|
|
7078
|
+
If ``full`` is provided with any other parameter.
|
|
7079
|
+
If ``between`` is provided with any other parameter.
|
|
7080
|
+
If ``from_`` is provided with any parameter other than ``to``.
|
|
7081
|
+
If none of ``full``, ``from_``, or ``between`` is provided.
|
|
7082
|
+
If ``full`` is not ``True`` or ``None``.
|
|
7083
|
+
AssertionError
|
|
7084
|
+
If ``from_``, ``to``, or ``between`` is not a ``Relationship``.
|
|
7085
|
+
If ``from_``, ``to``, or ``between`` is not attached to the same model as the graph.
|
|
7086
|
+
If ``from_``, ``to``, or ``between`` does not contain the graph's ``Node`` concept.
|
|
7087
|
+
If ``from_`` or ``to`` is not a unary relationship.
|
|
7088
|
+
If ``between`` is not a binary relationship.
|
|
7089
|
+
|
|
6678
7090
|
Relationship Schema
|
|
6679
7091
|
-------------------
|
|
6680
7092
|
``preferential_attachment(node_u, node_v, score)``
|
|
@@ -6691,6 +7103,38 @@ class Graph():
|
|
|
6691
7103
|
| Directed | Yes | |
|
|
6692
7104
|
| Weighted | Yes | Weights are ignored. |
|
|
6693
7105
|
|
|
7106
|
+
Notes
|
|
7107
|
+
-----
|
|
7108
|
+
The ``preferential_attachment(full=True)`` method computes and caches
|
|
7109
|
+
the full preferential attachment relationship for all pairs of nodes,
|
|
7110
|
+
providing efficient reuse across multiple calls. This can be expensive
|
|
7111
|
+
as the result contains O(|V|²) tuples.
|
|
7112
|
+
|
|
7113
|
+
Calling ``preferential_attachment()`` without arguments raises a ``ValueError``,
|
|
7114
|
+
to ensure awareness and explicit acknowledgement (``full=True``) of this cost.
|
|
7115
|
+
|
|
7116
|
+
In contrast, ``preferential_attachment(from_=subset)`` constrains the computation to
|
|
7117
|
+
tuples with the first position in the passed-in ``subset``. The result is
|
|
7118
|
+
not cached; it is specific to the call site. When a significant fraction of
|
|
7119
|
+
the preferential attachment relation is needed across a program,
|
|
7120
|
+
``preferential_attachment(full=True)`` is typically more efficient. Use
|
|
7121
|
+
``preferential_attachment(from_=subset)`` only when small subsets of
|
|
7122
|
+
the preferential attachment relationship are needed
|
|
7123
|
+
collectively across the program.
|
|
7124
|
+
|
|
7125
|
+
The ``to`` parameter can be used together with ``from_`` to further
|
|
7126
|
+
constrain the computation: ``preferential_attachment(from_=subset_a, to=subset_b)``
|
|
7127
|
+
computes preferential attachment scores only for node pairs where the first node is in
|
|
7128
|
+
``subset_a`` and the second node is in ``subset_b``. (Since ``preferential_attachment``
|
|
7129
|
+
is symmetric in its first two positions, using ``to`` without ``from_``would
|
|
7130
|
+
be functionally redundant, and is not allowed.)
|
|
7131
|
+
|
|
7132
|
+
The ``between`` parameter provides another way to constrain the computation.
|
|
7133
|
+
Unlike ``from_`` and ``to``, which allow you to independently constrain the first
|
|
7134
|
+
and second positions in ``preferential_attachment`` tuples to sets of nodes, ``between``
|
|
7135
|
+
allows you constrain the first and second positions, jointly, to specific pairs
|
|
7136
|
+
of nodes.
|
|
7137
|
+
|
|
6694
7138
|
Examples
|
|
6695
7139
|
--------
|
|
6696
7140
|
>>> from relationalai.semantics import Model, define, select, Integer
|
|
@@ -6712,10 +7156,10 @@ class Graph():
|
|
|
6712
7156
|
... Edge.new(src=n4, dst=n3),
|
|
6713
7157
|
... )
|
|
6714
7158
|
>>>
|
|
6715
|
-
>>> # 3. Select the preferential attachment
|
|
7159
|
+
>>> # 3. Select the preferential attachment scores from the full relationship
|
|
6716
7160
|
>>> u, v = Node.ref("u"), Node.ref("v")
|
|
6717
7161
|
>>> score = Integer.ref("score")
|
|
6718
|
-
>>> preferential_attachment = graph.preferential_attachment()
|
|
7162
|
+
>>> preferential_attachment = graph.preferential_attachment(full=True)
|
|
6719
7163
|
>>> select(
|
|
6720
7164
|
... u.id, v.id, score,
|
|
6721
7165
|
... ).where(
|
|
@@ -6727,64 +7171,302 @@ class Graph():
|
|
|
6727
7171
|
id id2 score
|
|
6728
7172
|
0 1 3 3
|
|
6729
7173
|
|
|
7174
|
+
>>> # 4. Use 'from_' parameter to constrain the set of nodes for the first position
|
|
7175
|
+
>>> # Define a subset containing only node 1
|
|
7176
|
+
>>> from relationalai.semantics import where
|
|
7177
|
+
>>> subset = model.Relationship(f"{{node:{Node}}} is in subset")
|
|
7178
|
+
>>> node = Node.ref()
|
|
7179
|
+
>>> where(node.id == 1).define(subset(node))
|
|
7180
|
+
>>>
|
|
7181
|
+
>>> # Get preferential attachment scores only for pairs where first node is in subset
|
|
7182
|
+
>>> constrained_preferential_attachment = graph.preferential_attachment(from_=subset)
|
|
7183
|
+
>>> select(u.id, v.id, score).where(constrained_preferential_attachment(u, v, score)).inspect()
|
|
7184
|
+
▰▰▰▰ Setup complete
|
|
7185
|
+
id id2 score
|
|
7186
|
+
0 1 1 1
|
|
7187
|
+
1 1 2 3
|
|
7188
|
+
2 1 3 3
|
|
7189
|
+
3 1 4 3
|
|
7190
|
+
|
|
7191
|
+
>>> # 5. Use both 'from_' and 'to' parameters to constrain both positions
|
|
7192
|
+
>>> from_subset = model.Relationship(f"{{node:{Node}}} is in from_subset")
|
|
7193
|
+
>>> to_subset = model.Relationship(f"{{node:{Node}}} is in to_subset")
|
|
7194
|
+
>>> where(node.id == 1).define(from_subset(node))
|
|
7195
|
+
>>> where(node.id == 3).define(to_subset(node))
|
|
7196
|
+
>>>
|
|
7197
|
+
>>> # Get preferential attachment scores only where first node is in from_subset and second node is in to_subset
|
|
7198
|
+
>>> constrained_preferential_attachment = graph.preferential_attachment(from_=from_subset, to=to_subset)
|
|
7199
|
+
>>> select(u.id, v.id, score).where(constrained_preferential_attachment(u, v, score)).inspect()
|
|
7200
|
+
▰▰▰▰ Setup complete
|
|
7201
|
+
id id2 score
|
|
7202
|
+
0 1 3 3
|
|
7203
|
+
|
|
7204
|
+
>>> # 6. Use 'between' parameter to constrain to specific pairs of nodes
|
|
7205
|
+
>>> pairs = model.Relationship(f"{{node_a:{Node}}} and {{node_b:{Node}}} are a pair")
|
|
7206
|
+
>>> node_a, node_b = Node.ref(), Node.ref()
|
|
7207
|
+
>>> where(node_a.id == 1, node_b.id == 3).define(pairs(node_a, node_b))
|
|
7208
|
+
>>> where(node_a.id == 2, node_b.id == 4).define(pairs(node_a, node_b))
|
|
7209
|
+
>>>
|
|
7210
|
+
>>> # Get preferential attachment scores only for the specific pairs (1, 3) and (2, 4)
|
|
7211
|
+
>>> constrained_preferential_attachment = graph.preferential_attachment(between=pairs)
|
|
7212
|
+
>>> select(u.id, v.id, score).where(constrained_preferential_attachment(u, v, score)).inspect()
|
|
7213
|
+
▰▰▰▰ Setup complete
|
|
7214
|
+
id id2 score
|
|
7215
|
+
0 1 3 3
|
|
7216
|
+
1 2 4 6
|
|
7217
|
+
|
|
6730
7218
|
"""
|
|
6731
|
-
|
|
6732
|
-
|
|
6733
|
-
|
|
6734
|
-
"of all pairs of nodes of the graph. To provide better control over "
|
|
6735
|
-
"the computed subset, `preferential_attachment`'s interface will soon "
|
|
6736
|
-
"need to change."
|
|
6737
|
-
),
|
|
6738
|
-
FutureWarning,
|
|
6739
|
-
stacklevel=2
|
|
7219
|
+
# Validate domain constraint parameters.
|
|
7220
|
+
self._validate_domain_constraint_parameters(
|
|
7221
|
+
'preferential_attachment', full, from_, to, between
|
|
6740
7222
|
)
|
|
6741
7223
|
|
|
7224
|
+
# At this point, exactly one of `full`, `from_`, or `between`
|
|
7225
|
+
# has been provided, and if `to` is provided, `from_` is also provided.
|
|
7226
|
+
|
|
7227
|
+
# Handle `between`.
|
|
7228
|
+
if between is not None:
|
|
7229
|
+
self._validate_pair_subset_parameter(between)
|
|
7230
|
+
return self._preferential_attachment_between(between)
|
|
7231
|
+
|
|
7232
|
+
# Handle `from_` (and potentially `to`).
|
|
7233
|
+
if from_ is not None:
|
|
7234
|
+
self._validate_node_subset_parameter('from_', from_)
|
|
7235
|
+
if to is not None:
|
|
7236
|
+
self._validate_node_subset_parameter('to', to)
|
|
7237
|
+
return self._preferential_attachment_from_to(from_, to)
|
|
7238
|
+
return self._preferential_attachment_from(from_)
|
|
7239
|
+
|
|
7240
|
+
# Handle `full`.
|
|
6742
7241
|
return self._preferential_attachment
|
|
6743
7242
|
|
|
6744
7243
|
@cached_property
|
|
6745
7244
|
def _preferential_attachment(self):
|
|
6746
|
-
"""Lazily define and cache the
|
|
6747
|
-
_preferential_attachment_rel = self.
|
|
7245
|
+
"""Lazily define and cache the full preferential_attachment relationship."""
|
|
7246
|
+
_preferential_attachment_rel = self._create_preferential_attachment_relationship()
|
|
6748
7247
|
_preferential_attachment_rel.annotate(annotations.track("graphs", "preferential_attachment"))
|
|
7248
|
+
return _preferential_attachment_rel
|
|
7249
|
+
|
|
7250
|
+
def _preferential_attachment_from(self, node_subset_from: Relationship):
|
|
7251
|
+
"""
|
|
7252
|
+
Create a preferential_attachment relationship, with the first position in each
|
|
7253
|
+
tuple constrained to be in the given subset of nodes. Note this relationship
|
|
7254
|
+
is not cached; it is specific to the callsite.
|
|
7255
|
+
"""
|
|
7256
|
+
_preferential_attachment_rel = self._create_preferential_attachment_relationship(
|
|
7257
|
+
node_subset_from=node_subset_from
|
|
7258
|
+
)
|
|
7259
|
+
_preferential_attachment_rel.annotate(annotations.track("graphs", "preferential_attachment_from"))
|
|
7260
|
+
return _preferential_attachment_rel
|
|
7261
|
+
|
|
7262
|
+
def _preferential_attachment_from_to(self, node_subset_from: Relationship, node_subset_to: Relationship):
|
|
7263
|
+
"""
|
|
7264
|
+
Create a preferential_attachment relationship, with the first position in each
|
|
7265
|
+
tuple constrained to be in `node_subset_from`, and the second position in
|
|
7266
|
+
each tuple constrained to be in `node_subset_to`. Note this relationship
|
|
7267
|
+
is not cached; it is specific to the callsite.
|
|
7268
|
+
"""
|
|
7269
|
+
_preferential_attachment_rel = self._create_preferential_attachment_relationship(
|
|
7270
|
+
node_subset_from=node_subset_from,
|
|
7271
|
+
node_subset_to=node_subset_to
|
|
7272
|
+
)
|
|
7273
|
+
_preferential_attachment_rel.annotate(annotations.track("graphs", "preferential_attachment_from_to"))
|
|
7274
|
+
return _preferential_attachment_rel
|
|
7275
|
+
|
|
7276
|
+
def _preferential_attachment_between(self, pair_subset_between: Relationship):
|
|
7277
|
+
"""
|
|
7278
|
+
Create a preferential_attachment relationship, with the first and second position
|
|
7279
|
+
in each tuple jointly constrained to be in the given set of pairs
|
|
7280
|
+
of nodes. Note this relationship is not cached;
|
|
7281
|
+
it is specific to the callsite.
|
|
7282
|
+
"""
|
|
7283
|
+
_preferential_attachment_rel = self._create_preferential_attachment_relationship(
|
|
7284
|
+
pair_subset_between=pair_subset_between
|
|
7285
|
+
)
|
|
7286
|
+
_preferential_attachment_rel.annotate(annotations.track("graphs", "preferential_attachment_between"))
|
|
7287
|
+
return _preferential_attachment_rel
|
|
7288
|
+
|
|
7289
|
+
def _create_preferential_attachment_relationship(
|
|
7290
|
+
self,
|
|
7291
|
+
*,
|
|
7292
|
+
node_subset_from: Optional[Relationship] = None,
|
|
7293
|
+
node_subset_to: Optional[Relationship] = None,
|
|
7294
|
+
pair_subset_between: Optional[Relationship] = None,
|
|
7295
|
+
):
|
|
7296
|
+
"""
|
|
7297
|
+
Create preferential_attachment relationship, optionally constrained by
|
|
7298
|
+
the provided node subsets or pair subset.
|
|
7299
|
+
"""
|
|
7300
|
+
_preferential_attachment_rel = self._model.Relationship(
|
|
7301
|
+
f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} "
|
|
7302
|
+
f"have preferential attachment score {{score:Integer}}"
|
|
7303
|
+
)
|
|
6749
7304
|
|
|
7305
|
+
# Branch by case to select appropriate count_neighbor and isolated_node relationships,
|
|
7306
|
+
# and to define relevant constraints on the separate and joint domains of node_u and node_v.
|
|
6750
7307
|
node_u, node_v = self.Node.ref(), self.Node.ref()
|
|
6751
|
-
count_u, count_v = Integer.ref(), Integer.ref()
|
|
6752
7308
|
|
|
6753
|
-
#
|
|
6754
|
-
|
|
7309
|
+
# Handle the `between` case.
|
|
7310
|
+
if pair_subset_between is not None:
|
|
7311
|
+
# Collect nodes that appear in the subset by position.
|
|
7312
|
+
first_position_subset = self._model.Relationship(f"{{node:{self._NodeConceptStr}}}")
|
|
7313
|
+
second_position_subset = self._model.Relationship(f"{{node:{self._NodeConceptStr}}}")
|
|
7314
|
+
node_x, node_y = self.Node.ref(), self.Node.ref()
|
|
7315
|
+
where(
|
|
7316
|
+
pair_subset_between(node_x, node_y)
|
|
7317
|
+
).define(
|
|
7318
|
+
first_position_subset(node_x),
|
|
7319
|
+
second_position_subset(node_y)
|
|
7320
|
+
)
|
|
7321
|
+
|
|
7322
|
+
# Constituents of non-isolated-nodes rule.
|
|
7323
|
+
non_isolated_rule_uv_constraint = [pair_subset_between(node_u, node_v)]
|
|
7324
|
+
count_neighbor_u_rel = self._count_neighbor_of(first_position_subset)
|
|
7325
|
+
count_neighbor_v_rel = self._count_neighbor_of(second_position_subset)
|
|
6755
7326
|
|
|
6756
|
-
|
|
7327
|
+
# Constituents of u-isolated rule.
|
|
7328
|
+
isolated_u_rel = self._isolated_node_of(first_position_subset)
|
|
7329
|
+
isolated_u_rule_uv_constraint = [pair_subset_between(node_u, node_v)]
|
|
7330
|
+
|
|
7331
|
+
# Constituents of v-isolated rule.
|
|
7332
|
+
isolated_v_rel = self._isolated_node_of(second_position_subset)
|
|
7333
|
+
isolated_v_rule_uv_constraint = [pair_subset_between(node_u, node_v)]
|
|
7334
|
+
|
|
7335
|
+
# Handle the `from_` case.
|
|
7336
|
+
elif node_subset_from is not None and node_subset_to is None:
|
|
7337
|
+
# NOTE: It isn't necessary to compute _count_neighbor_of
|
|
7338
|
+
# and _isolated_node_of for node_subset_from, given
|
|
7339
|
+
# we have to compute _count_neighbor and _isolated_node
|
|
7340
|
+
# for the unconstrained second position anyway. That does
|
|
7341
|
+
# require additional constraints as seen below, though.
|
|
7342
|
+
#
|
|
7343
|
+
# It's not clear to this author that there is a more clever
|
|
7344
|
+
# way to do this, given that in preferential attachment,
|
|
7345
|
+
# constraining one position implies no constraint on the
|
|
7346
|
+
# other position, unlike in, e.g., common neighbor?
|
|
7347
|
+
|
|
7348
|
+
# Constituents of non-isolated-nodes rule.
|
|
7349
|
+
non_isolated_rule_uv_constraint = [node_subset_from(node_u)]
|
|
7350
|
+
count_neighbor_u_rel = self._count_neighbor
|
|
7351
|
+
count_neighbor_v_rel = self._count_neighbor
|
|
7352
|
+
|
|
7353
|
+
# Constituents of u-isolated rule.
|
|
7354
|
+
isolated_u_rel = self._isolated_node
|
|
7355
|
+
isolated_u_rule_uv_constraint = [
|
|
7356
|
+
node_subset_from(node_u),
|
|
7357
|
+
self.Node(node_v)
|
|
7358
|
+
]
|
|
7359
|
+
|
|
7360
|
+
# Constituents of v-isolated rule.
|
|
7361
|
+
isolated_v_rel = self._isolated_node
|
|
7362
|
+
isolated_v_rule_uv_constraint = [node_subset_from(node_u)]
|
|
7363
|
+
|
|
7364
|
+
# Handle the `from_`/`to` case.
|
|
7365
|
+
elif node_subset_from is not None and node_subset_to is not None:
|
|
7366
|
+
# Check for object identity optimization.
|
|
7367
|
+
if node_subset_from is node_subset_to:
|
|
7368
|
+
# Constituents of non-isolated-nodes rule.
|
|
7369
|
+
non_isolated_rule_uv_constraint = []
|
|
7370
|
+
count_neighbor_u_rel = self._count_neighbor_of(node_subset_from)
|
|
7371
|
+
count_neighbor_v_rel = count_neighbor_u_rel
|
|
7372
|
+
|
|
7373
|
+
# Constituents of u-isolated rule.
|
|
7374
|
+
isolated_u_rel = self._isolated_node_of(node_subset_from)
|
|
7375
|
+
isolated_u_rule_uv_constraint = [node_subset_to(node_v)]
|
|
7376
|
+
|
|
7377
|
+
# Constituents of v-isolated rule.
|
|
7378
|
+
isolated_v_rel = isolated_u_rel
|
|
7379
|
+
isolated_v_rule_uv_constraint = [node_subset_from(node_u)]
|
|
7380
|
+
else:
|
|
7381
|
+
# Constituents of non-isolated-nodes rule.
|
|
7382
|
+
non_isolated_rule_uv_constraint = []
|
|
7383
|
+
count_neighbor_u_rel = self._count_neighbor_of(node_subset_from)
|
|
7384
|
+
count_neighbor_v_rel = self._count_neighbor_of(node_subset_to)
|
|
7385
|
+
|
|
7386
|
+
# Constituents of u-isolated rule.
|
|
7387
|
+
isolated_u_rel = self._isolated_node_of(node_subset_from)
|
|
7388
|
+
isolated_u_rule_uv_constraint = [node_subset_to(node_v)]
|
|
7389
|
+
|
|
7390
|
+
# Constituents of v-isolated rule.
|
|
7391
|
+
isolated_v_rel = self._isolated_node_of(node_subset_to)
|
|
7392
|
+
isolated_v_rule_uv_constraint = [node_subset_from(node_u)]
|
|
7393
|
+
|
|
7394
|
+
|
|
7395
|
+
# Handle the `full` case.
|
|
7396
|
+
else:
|
|
7397
|
+
# Constituents of non-isolated-nodes rule.
|
|
7398
|
+
non_isolated_rule_uv_constraint = []
|
|
7399
|
+
count_neighbor_u_rel = self._count_neighbor
|
|
7400
|
+
count_neighbor_v_rel = self._count_neighbor
|
|
7401
|
+
|
|
7402
|
+
# Constituents of u-isolated rule.
|
|
7403
|
+
isolated_u_rel = self._isolated_node
|
|
7404
|
+
isolated_u_rule_uv_constraint = [self.Node(node_v)]
|
|
7405
|
+
|
|
7406
|
+
# Constituents of v-isolated rule.
|
|
7407
|
+
isolated_v_rel = self._isolated_node
|
|
7408
|
+
isolated_v_rule_uv_constraint = [self.Node(node_u)]
|
|
7409
|
+
|
|
7410
|
+
# Define shared logic, which has three cases.
|
|
7411
|
+
count_u, count_v = Integer.ref(), Integer.ref()
|
|
7412
|
+
|
|
7413
|
+
# Case where node u is isolated, and node v is any node (respecting constraints): score 0.
|
|
6757
7414
|
where(
|
|
6758
|
-
|
|
6759
|
-
|
|
7415
|
+
isolated_u_rel(node_u),
|
|
7416
|
+
*isolated_u_rule_uv_constraint,
|
|
6760
7417
|
).define(_preferential_attachment_rel(node_u, node_v, 0))
|
|
6761
7418
|
|
|
6762
|
-
# Case where node u is any node, and node v is isolated: score 0.
|
|
7419
|
+
# Case where node u is any node (respecting constraints), and node v is isolated: score 0.
|
|
6763
7420
|
where(
|
|
6764
|
-
|
|
6765
|
-
|
|
7421
|
+
*isolated_v_rule_uv_constraint,
|
|
7422
|
+
isolated_v_rel(node_v)
|
|
6766
7423
|
).define(_preferential_attachment_rel(node_u, node_v, 0))
|
|
6767
7424
|
|
|
6768
7425
|
# Case where neither node is isolated: score is count_neighbor[u] * count_neighbor[v].
|
|
6769
7426
|
where(
|
|
6770
|
-
|
|
6771
|
-
|
|
7427
|
+
*non_isolated_rule_uv_constraint,
|
|
7428
|
+
count_neighbor_u_rel(node_u, count_u),
|
|
7429
|
+
count_neighbor_v_rel(node_v, count_v)
|
|
6772
7430
|
).define(_preferential_attachment_rel(node_u, node_v, count_u * count_v))
|
|
6773
7431
|
|
|
6774
7432
|
return _preferential_attachment_rel
|
|
6775
7433
|
|
|
7434
|
+
|
|
6776
7435
|
@cached_property
|
|
6777
7436
|
def _isolated_node(self):
|
|
7437
|
+
"""Lazily define and cache the self._isolated_node relationship."""
|
|
7438
|
+
return self._create_isolated_node_relationship()
|
|
7439
|
+
|
|
7440
|
+
def _isolated_node_of(self, node_subset: Relationship):
|
|
6778
7441
|
"""
|
|
6779
|
-
|
|
6780
|
-
|
|
7442
|
+
Create an _isolated_node relationship constrained to the subset of nodes
|
|
7443
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
7444
|
+
specific to the callsite.
|
|
7445
|
+
"""
|
|
7446
|
+
return self._create_isolated_node_relationship(node_subset=node_subset)
|
|
7447
|
+
|
|
7448
|
+
def _create_isolated_node_relationship(
|
|
7449
|
+
self,
|
|
7450
|
+
*,
|
|
7451
|
+
node_subset: Optional[Relationship] = None,
|
|
7452
|
+
):
|
|
7453
|
+
"""
|
|
7454
|
+
Create _isolated_node relationship, optionally constrained by
|
|
7455
|
+
the provided node subset.
|
|
6781
7456
|
"""
|
|
6782
7457
|
_isolated_node_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} is isolated")
|
|
6783
7458
|
|
|
6784
7459
|
neighbor_node = self.Node.ref()
|
|
7460
|
+
if node_subset is not None:
|
|
7461
|
+
neighbor_rel = self._neighbor_of(node_subset)
|
|
7462
|
+
node_constraint = node_subset(self.Node)
|
|
7463
|
+
else:
|
|
7464
|
+
neighbor_rel = self._neighbor
|
|
7465
|
+
node_constraint = self.Node
|
|
7466
|
+
|
|
6785
7467
|
where(
|
|
6786
|
-
|
|
6787
|
-
not_(
|
|
7468
|
+
node_constraint,
|
|
7469
|
+
not_(neighbor_rel(self.Node, neighbor_node))
|
|
6788
7470
|
).define(_isolated_node_rel(self.Node))
|
|
6789
7471
|
|
|
6790
7472
|
return _isolated_node_rel
|