relationalai 0.12.1__py3-none-any.whl → 0.12.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- relationalai/clients/direct_access_client.py +5 -0
- relationalai/clients/snowflake.py +31 -8
- relationalai/clients/use_index_poller.py +24 -7
- relationalai/experimental/solvers.py +296 -93
- relationalai/semantics/internal/internal.py +8 -8
- relationalai/semantics/lqp/executor.py +36 -3
- relationalai/semantics/lqp/model2lqp.py +3 -2
- relationalai/semantics/lqp/rewrite/extract_common.py +4 -2
- relationalai/semantics/metamodel/builtins.py +6 -6
- relationalai/semantics/metamodel/dependency.py +40 -20
- relationalai/semantics/metamodel/helpers.py +7 -6
- relationalai/semantics/metamodel/rewrite/extract_nested_logicals.py +1 -4
- relationalai/semantics/reasoners/graph/core.py +731 -95
- relationalai/semantics/rel/executor.py +11 -2
- relationalai/semantics/std/math.py +2 -2
- {relationalai-0.12.1.dist-info → relationalai-0.12.3.dist-info}/METADATA +1 -1
- {relationalai-0.12.1.dist-info → relationalai-0.12.3.dist-info}/RECORD +20 -20
- {relationalai-0.12.1.dist-info → relationalai-0.12.3.dist-info}/WHEEL +0 -0
- {relationalai-0.12.1.dist-info → relationalai-0.12.3.dist-info}/entry_points.txt +0 -0
- {relationalai-0.12.1.dist-info → relationalai-0.12.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -1332,21 +1332,6 @@ class Graph():
|
|
|
1332
1332
|
# presently in use by the `cosine_similarity` and
|
|
1333
1333
|
# `jaccard_similarity` relationships.
|
|
1334
1334
|
|
|
1335
|
-
def _count_common_outneighbor_fragment(self, node_u, node_v):
|
|
1336
|
-
"""
|
|
1337
|
-
Helper for cosine_similarity and jaccard_similarity that returns a fragment
|
|
1338
|
-
that counts the common outneighbors of given nodes `node_u` and `node_v`.
|
|
1339
|
-
"""
|
|
1340
|
-
common_outneighbor_node = self.Node.ref()
|
|
1341
|
-
return (
|
|
1342
|
-
count(common_outneighbor_node)
|
|
1343
|
-
.per(node_u, node_v)
|
|
1344
|
-
.where(
|
|
1345
|
-
self._outneighbor(node_u, common_outneighbor_node),
|
|
1346
|
-
self._outneighbor(node_v, common_outneighbor_node),
|
|
1347
|
-
)
|
|
1348
|
-
)
|
|
1349
|
-
|
|
1350
1335
|
def _wu_dot_wv_fragment(self, node_u, node_v):
|
|
1351
1336
|
"""
|
|
1352
1337
|
Helper for cosine_similarity that returns a fragment that produces an
|
|
@@ -5592,18 +5577,71 @@ class Graph():
|
|
|
5592
5577
|
|
|
5593
5578
|
|
|
5594
5579
|
@include_in_docs
|
|
5595
|
-
def jaccard_similarity(
|
|
5596
|
-
|
|
5580
|
+
def jaccard_similarity(
|
|
5581
|
+
self,
|
|
5582
|
+
*,
|
|
5583
|
+
full: Optional[bool] = None,
|
|
5584
|
+
from_: Optional[Relationship] = None,
|
|
5585
|
+
to: Optional[Relationship] = None,
|
|
5586
|
+
between: Optional[Relationship] = None,
|
|
5587
|
+
):
|
|
5588
|
+
"""Returns a ternary relationship containing
|
|
5589
|
+
the Jaccard similarity for pairs of nodes.
|
|
5597
5590
|
|
|
5598
5591
|
The Jaccard similarity is a measure between two nodes that ranges from
|
|
5599
5592
|
0.0 to 1.0, where higher values indicate greater similarity.
|
|
5600
5593
|
|
|
5594
|
+
Parameters
|
|
5595
|
+
----------
|
|
5596
|
+
full : bool, optional
|
|
5597
|
+
If ``True``, computes the Jaccard similarity for all pairs
|
|
5598
|
+
of nodes in the graph. This computation can be expensive for large graphs,
|
|
5599
|
+
as the result can scale quadratically in the number of nodes. Mutually exclusive
|
|
5600
|
+
with other parameters.
|
|
5601
|
+
Default is ``None``.
|
|
5602
|
+
from_ : Relationship, optional
|
|
5603
|
+
A unary relationship containing a subset of the graph's nodes. When
|
|
5604
|
+
provided, constrains the domain of the Jaccard similarity computation: only
|
|
5605
|
+
Jaccard similarity scores for node pairs where the first node is
|
|
5606
|
+
in this relationship are computed and returned. Mutually exclusive with
|
|
5607
|
+
``full`` and ``between``.
|
|
5608
|
+
Default is ``None``.
|
|
5609
|
+
to : Relationship, optional
|
|
5610
|
+
A unary relationship containing a subset of the graph's nodes. Can only
|
|
5611
|
+
be used together with the ``from_`` parameter. When provided with ``from_``,
|
|
5612
|
+
constrains the domain of the Jaccard similarity computation: only
|
|
5613
|
+
Jaccard similarity scores for node pairs where the first node is
|
|
5614
|
+
in ``from_`` and the second node is in ``to`` are computed and returned.
|
|
5615
|
+
Default is ``None``.
|
|
5616
|
+
between : Relationship, optional
|
|
5617
|
+
A binary relationship containing pairs of nodes. When provided,
|
|
5618
|
+
constrains the domain of the Jaccard similarity computation: only
|
|
5619
|
+
Jaccard similarity scores for the specific node pairs in
|
|
5620
|
+
this relationship are computed and returned. Mutually exclusive
|
|
5621
|
+
with other parameters.
|
|
5622
|
+
Default is ``None``.
|
|
5623
|
+
|
|
5601
5624
|
Returns
|
|
5602
5625
|
-------
|
|
5603
5626
|
Relationship
|
|
5604
5627
|
A ternary relationship where each tuple represents a pair of nodes
|
|
5605
5628
|
and their Jaccard similarity.
|
|
5606
5629
|
|
|
5630
|
+
Raises
|
|
5631
|
+
------
|
|
5632
|
+
ValueError
|
|
5633
|
+
If ``full`` is provided with any other parameter.
|
|
5634
|
+
If ``between`` is provided with any other parameter.
|
|
5635
|
+
If ``from_`` is provided with any parameter other than ``to``.
|
|
5636
|
+
If none of ``full``, ``from_``, or ``between`` is provided.
|
|
5637
|
+
If ``full`` is not ``True`` or ``None``.
|
|
5638
|
+
AssertionError
|
|
5639
|
+
If ``from_``, ``to``, or ``between`` is not a ``Relationship``.
|
|
5640
|
+
If ``from_``, ``to``, or ``between`` is not attached to the same model as the graph.
|
|
5641
|
+
If ``from_``, ``to``, or ``between`` does not contain the graph's ``Node`` concept.
|
|
5642
|
+
If ``from_`` or ``to`` is not a unary relationship.
|
|
5643
|
+
If ``between`` is not a binary relationship.
|
|
5644
|
+
|
|
5607
5645
|
Relationship Schema
|
|
5608
5646
|
-------------------
|
|
5609
5647
|
``jaccard_similarity(node_u, node_v, score)``
|
|
@@ -5652,6 +5690,40 @@ class Graph():
|
|
|
5652
5690
|
The weighted Jaccard similarity between node 1 and 2 is then:
|
|
5653
5691
|
`0.46 / (1.6 + 1.6 + 1.4) = 0.1`.
|
|
5654
5692
|
|
|
5693
|
+
Edge weights are assumed to be non-negative, so the neighborhood
|
|
5694
|
+
vectors contain only non-negative elements. Therefore, the Jaccard
|
|
5695
|
+
similarity score is always between 0.0 and 1.0, inclusive.
|
|
5696
|
+
|
|
5697
|
+
The ``jaccard_similarity(full=True)`` method computes and caches
|
|
5698
|
+
the full Jaccard similarity relationship for all pairs of nodes,
|
|
5699
|
+
providing efficient reuse across multiple calls. This can be expensive
|
|
5700
|
+
as the result can contain O(|V|²) tuples.
|
|
5701
|
+
|
|
5702
|
+
Calling ``jaccard_similarity()`` without arguments raises a ``ValueError``,
|
|
5703
|
+
to ensure awareness and explicit acknowledgement (``full=True``) of this cost.
|
|
5704
|
+
|
|
5705
|
+
In contrast, ``jaccard_similarity(from_=subset)`` constrains the computation to
|
|
5706
|
+
tuples with the first position in the passed-in ``subset``. The result is
|
|
5707
|
+
not cached; it is specific to the call site. When a significant fraction of
|
|
5708
|
+
the Jaccard similarity relation is needed across a program,
|
|
5709
|
+
``jaccard_similarity(full=True)`` is typically more efficient. Use
|
|
5710
|
+
``jaccard_similarity(from_=subset)`` only when small subsets of
|
|
5711
|
+
the Jaccard similarity relationship are needed
|
|
5712
|
+
collectively across the program.
|
|
5713
|
+
|
|
5714
|
+
The ``to`` parameter can be used together with ``from_`` to further
|
|
5715
|
+
constrain the computation: ``jaccard_similarity(from_=subset_a, to=subset_b)``
|
|
5716
|
+
computes Jaccard similarity scores only for node pairs where the first node is in
|
|
5717
|
+
``subset_a`` and the second node is in ``subset_b``. (Since ``jaccard_similarity``
|
|
5718
|
+
is symmetric in its first two positions, using ``to`` without ``from_`` would
|
|
5719
|
+
be functionally redundant, and is not allowed.)
|
|
5720
|
+
|
|
5721
|
+
The ``between`` parameter provides another way to constrain the computation.
|
|
5722
|
+
Unlike ``from_`` and ``to``, which allow you to independently constrain the first
|
|
5723
|
+
and second positions in ``jaccard_similarity`` tuples to sets of nodes, ``between``
|
|
5724
|
+
allows you constrain the first and second positions, jointly, to specific pairs
|
|
5725
|
+
of nodes.
|
|
5726
|
+
|
|
5655
5727
|
Examples
|
|
5656
5728
|
--------
|
|
5657
5729
|
**Unweighted Graph Examples**
|
|
@@ -5673,8 +5745,8 @@ class Graph():
|
|
|
5673
5745
|
... Edge.new(src=n4, dst=n3),
|
|
5674
5746
|
... )
|
|
5675
5747
|
>>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
|
|
5676
|
-
>>>
|
|
5677
|
-
>>> select(score).where(
|
|
5748
|
+
>>> jaccard_similarity = graph.jaccard_similarity(full=True)
|
|
5749
|
+
>>> select(score).where(jaccard_similarity(u, v, score), u.id == 2, v.id == 4).inspect()
|
|
5678
5750
|
▰▰▰▰ Setup complete
|
|
5679
5751
|
score
|
|
5680
5752
|
0 0.25
|
|
@@ -5696,8 +5768,8 @@ class Graph():
|
|
|
5696
5768
|
... Edge.new(src=n4, dst=n3),
|
|
5697
5769
|
... )
|
|
5698
5770
|
>>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
|
|
5699
|
-
>>>
|
|
5700
|
-
>>> select(score).where(
|
|
5771
|
+
>>> jaccard_similarity = graph.jaccard_similarity(full=True)
|
|
5772
|
+
>>> select(score).where(jaccard_similarity(u, v, score), u.id == 2, v.id == 4).inspect()
|
|
5701
5773
|
▰▰▰▰ Setup complete
|
|
5702
5774
|
score
|
|
5703
5775
|
0 0.5
|
|
@@ -5724,12 +5796,57 @@ class Graph():
|
|
|
5724
5796
|
>>>
|
|
5725
5797
|
>>> # 3. Select the weighted Jaccard similarity for the pair (1, 2)
|
|
5726
5798
|
>>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
|
|
5727
|
-
>>>
|
|
5728
|
-
>>> select(score).where(
|
|
5799
|
+
>>> jaccard_similarity = graph.jaccard_similarity(full=True)
|
|
5800
|
+
>>> select(score).where(jaccard_similarity(u, v, score), u.id == 1, v.id == 2).inspect()
|
|
5729
5801
|
▰▰▰▰ Setup complete
|
|
5730
5802
|
score
|
|
5731
5803
|
0 0.1
|
|
5732
5804
|
|
|
5805
|
+
**Domain Constraint Examples**
|
|
5806
|
+
|
|
5807
|
+
>>> # Use 'from_' parameter to constrain the set of nodes for the first position
|
|
5808
|
+
>>> # Using the same undirected unweighted graph from above
|
|
5809
|
+
>>> from relationalai.semantics import where
|
|
5810
|
+
>>> subset = model.Relationship(f"{{node:{Node}}} is in subset")
|
|
5811
|
+
>>> node = Node.ref()
|
|
5812
|
+
>>> where(node.id == 2).define(subset(node))
|
|
5813
|
+
>>>
|
|
5814
|
+
>>> # Get Jaccard similarity scores only for pairs where first node is in subset
|
|
5815
|
+
>>> constrained_jaccard_similarity = graph.jaccard_similarity(from_=subset)
|
|
5816
|
+
>>> select(u.id, v.id, score).where(constrained_jaccard_similarity(u, v, score)).inspect()
|
|
5817
|
+
▰▰▰▰ Setup complete
|
|
5818
|
+
id id2 score
|
|
5819
|
+
0 2 2 1.00
|
|
5820
|
+
1 2 3 0.50
|
|
5821
|
+
2 2 4 0.25
|
|
5822
|
+
|
|
5823
|
+
>>> # Use both 'from_' and 'to' parameters to constrain both positions
|
|
5824
|
+
>>> from_subset = model.Relationship(f"{{node:{Node}}} is in from_subset")
|
|
5825
|
+
>>> to_subset = model.Relationship(f"{{node:{Node}}} is in to_subset")
|
|
5826
|
+
>>> where(node.id == 2).define(from_subset(node))
|
|
5827
|
+
>>> where(node.id == 4).define(to_subset(node))
|
|
5828
|
+
>>>
|
|
5829
|
+
>>> # Get Jaccard similarity scores only where first node is in from_subset and second node is in to_subset
|
|
5830
|
+
>>> constrained_jaccard_similarity = graph.jaccard_similarity(from_=from_subset, to=to_subset)
|
|
5831
|
+
>>> select(u.id, v.id, score).where(constrained_jaccard_similarity(u, v, score)).inspect()
|
|
5832
|
+
▰▰▰▰ Setup complete
|
|
5833
|
+
id id2 score
|
|
5834
|
+
0 2 4 0.25
|
|
5835
|
+
|
|
5836
|
+
>>> # Use 'between' parameter to constrain to specific pairs of nodes
|
|
5837
|
+
>>> pairs = model.Relationship(f"{{node_a:{Node}}} and {{node_b:{Node}}} are a pair")
|
|
5838
|
+
>>> node_a, node_b = Node.ref(), Node.ref()
|
|
5839
|
+
>>> where(node_a.id == 2, node_b.id == 4).define(pairs(node_a, node_b))
|
|
5840
|
+
>>> where(node_a.id == 3, node_b.id == 4).define(pairs(node_a, node_b))
|
|
5841
|
+
>>>
|
|
5842
|
+
>>> # Get Jaccard similarity scores only for the specific pairs (2, 4) and (3, 4)
|
|
5843
|
+
>>> constrained_jaccard_similarity = graph.jaccard_similarity(between=pairs)
|
|
5844
|
+
>>> select(u.id, v.id, score).where(constrained_jaccard_similarity(u, v, score)).inspect()
|
|
5845
|
+
▰▰▰▰ Setup complete
|
|
5846
|
+
id id2 score
|
|
5847
|
+
0 2 4 0.25
|
|
5848
|
+
1 3 4 0.50
|
|
5849
|
+
|
|
5733
5850
|
References
|
|
5734
5851
|
----------
|
|
5735
5852
|
Frigo M, Cruciani E, Coudert D, Deriche R, Natale E, Deslauriers-Gauthier S.
|
|
@@ -5738,57 +5855,242 @@ class Graph():
|
|
|
5738
5855
|
doi: 10.1162/netn_a_00199. PMID: 34746624; PMCID: PMC8567827.
|
|
5739
5856
|
|
|
5740
5857
|
"""
|
|
5741
|
-
|
|
5742
|
-
|
|
5743
|
-
|
|
5744
|
-
"of all pairs of nodes of the graph. To provide better control over "
|
|
5745
|
-
"the computed subset, `jaccard_similarity`'s interface will soon "
|
|
5746
|
-
"need to change."
|
|
5747
|
-
),
|
|
5748
|
-
FutureWarning,
|
|
5749
|
-
stacklevel=2
|
|
5858
|
+
# Validate domain constraint parameters.
|
|
5859
|
+
self._validate_domain_constraint_parameters(
|
|
5860
|
+
'jaccard_similarity', full, from_, to, between
|
|
5750
5861
|
)
|
|
5862
|
+
|
|
5863
|
+
# At this point, exactly one of `full`, `from_`, or `between`
|
|
5864
|
+
# has been provided, and if `to` is provided, `from_` is also provided.
|
|
5865
|
+
|
|
5866
|
+
# Handle `between`.
|
|
5867
|
+
if between is not None:
|
|
5868
|
+
self._validate_pair_subset_parameter(between)
|
|
5869
|
+
return self._jaccard_similarity_between(between)
|
|
5870
|
+
|
|
5871
|
+
# Handle `from_` (and potentially `to`).
|
|
5872
|
+
if from_ is not None:
|
|
5873
|
+
self._validate_node_subset_parameter('from_', from_)
|
|
5874
|
+
if to is not None:
|
|
5875
|
+
self._validate_node_subset_parameter('to', to)
|
|
5876
|
+
return self._jaccard_similarity_from_to(from_, to)
|
|
5877
|
+
return self._jaccard_similarity_from(from_)
|
|
5878
|
+
|
|
5879
|
+
# Handle `full`.
|
|
5751
5880
|
return self._jaccard_similarity
|
|
5752
5881
|
|
|
5753
5882
|
@cached_property
|
|
5754
5883
|
def _jaccard_similarity(self):
|
|
5755
|
-
"""Lazily define and cache the
|
|
5756
|
-
_jaccard_similarity_rel = self.
|
|
5884
|
+
"""Lazily define and cache the full jaccard_similarity relationship."""
|
|
5885
|
+
_jaccard_similarity_rel = self._create_jaccard_similarity_relationship()
|
|
5757
5886
|
_jaccard_similarity_rel.annotate(annotations.track("graphs", "jaccard_similarity"))
|
|
5887
|
+
return _jaccard_similarity_rel
|
|
5758
5888
|
|
|
5759
|
-
|
|
5760
|
-
|
|
5761
|
-
|
|
5762
|
-
|
|
5763
|
-
|
|
5764
|
-
|
|
5765
|
-
|
|
5766
|
-
|
|
5767
|
-
|
|
5768
|
-
|
|
5889
|
+
def _jaccard_similarity_from(self, node_subset_from: Relationship):
|
|
5890
|
+
"""
|
|
5891
|
+
Create a jaccard_similarity relationship, with the first position in each
|
|
5892
|
+
tuple constrained to be in the given subset of nodes. Note this relationship
|
|
5893
|
+
is not cached; it is specific to the callsite.
|
|
5894
|
+
"""
|
|
5895
|
+
_jaccard_similarity_rel = self._create_jaccard_similarity_relationship(
|
|
5896
|
+
node_subset_from=node_subset_from
|
|
5897
|
+
)
|
|
5898
|
+
_jaccard_similarity_rel.annotate(annotations.track("graphs", "jaccard_similarity_from"))
|
|
5899
|
+
return _jaccard_similarity_rel
|
|
5900
|
+
|
|
5901
|
+
def _jaccard_similarity_from_to(self, node_subset_from: Relationship, node_subset_to: Relationship):
|
|
5902
|
+
"""
|
|
5903
|
+
Create a jaccard_similarity relationship, with the first position in each
|
|
5904
|
+
tuple constrained to be in `node_subset_from`, and the second position in
|
|
5905
|
+
each tuple constrained to be in `node_subset_to`. Note this relationship
|
|
5906
|
+
is not cached; it is specific to the callsite.
|
|
5907
|
+
"""
|
|
5908
|
+
_jaccard_similarity_rel = self._create_jaccard_similarity_relationship(
|
|
5909
|
+
node_subset_from=node_subset_from,
|
|
5910
|
+
node_subset_to=node_subset_to
|
|
5911
|
+
)
|
|
5912
|
+
_jaccard_similarity_rel.annotate(annotations.track("graphs", "jaccard_similarity_from_to"))
|
|
5913
|
+
return _jaccard_similarity_rel
|
|
5914
|
+
|
|
5915
|
+
def _jaccard_similarity_between(self, pair_subset_between: Relationship):
|
|
5916
|
+
"""
|
|
5917
|
+
Create a jaccard_similarity relationship, with the first and second position
|
|
5918
|
+
in each tuple jointly constrained to be in the given set of pairs
|
|
5919
|
+
of nodes. Note this relationship is not cached;
|
|
5920
|
+
it is specific to the callsite.
|
|
5921
|
+
"""
|
|
5922
|
+
_jaccard_similarity_rel = self._create_jaccard_similarity_relationship(
|
|
5923
|
+
pair_subset_between=pair_subset_between
|
|
5924
|
+
)
|
|
5925
|
+
_jaccard_similarity_rel.annotate(annotations.track("graphs", "jaccard_similarity_between"))
|
|
5926
|
+
return _jaccard_similarity_rel
|
|
5927
|
+
|
|
5928
|
+
def _create_jaccard_similarity_relationship(
|
|
5929
|
+
self,
|
|
5930
|
+
*,
|
|
5931
|
+
node_subset_from: Optional[Relationship] = None,
|
|
5932
|
+
node_subset_to: Optional[Relationship] = None,
|
|
5933
|
+
pair_subset_between: Optional[Relationship] = None,
|
|
5934
|
+
):
|
|
5935
|
+
"""
|
|
5936
|
+
Create jaccard_similarity relationship, optionally constrained by
|
|
5937
|
+
the provided node subsets or pair subset.
|
|
5938
|
+
"""
|
|
5939
|
+
_jaccard_similarity_rel = self._model.Relationship(
|
|
5940
|
+
f"{{node_u:{self._NodeConceptStr}}} has a Jaccard similarity to "
|
|
5941
|
+
f"{{node_v:{self._NodeConceptStr}}} of {{score:Float}}"
|
|
5942
|
+
)
|
|
5943
|
+
|
|
5944
|
+
# Branch by case to select appropriate count_outneighbor,
|
|
5945
|
+
# outneighbor, and weighted_outdegree relationships, and build
|
|
5946
|
+
# appropriate constraints on the domain of the nodes.
|
|
5947
|
+
node_u, node_v = self.Node.ref(), self.Node.ref()
|
|
5948
|
+
|
|
5949
|
+
# TODO: Optimization opportunity. In a number of branches below,
|
|
5950
|
+
# we compute _count_outneighbor_of, which transitively computes
|
|
5951
|
+
# _outneighbor_of, and then compute _outneighbor_of directly;
|
|
5952
|
+
# the present code structure makes this a developer-time-efficient
|
|
5953
|
+
# way to get this off the ground, but of course involves redundant
|
|
5954
|
+
# work. In future this redundant work could be eliminated.
|
|
5955
|
+
|
|
5956
|
+
# Handle the `between` case.
|
|
5957
|
+
if pair_subset_between is not None:
|
|
5958
|
+
# Extract first-position and second-position nodes.
|
|
5959
|
+
first_position_subset = self._model.Relationship(f"{{node:{self._NodeConceptStr}}}")
|
|
5960
|
+
second_position_subset = self._model.Relationship(f"{{node:{self._NodeConceptStr}}}")
|
|
5961
|
+
node_x, node_y = self.Node.ref(), self.Node.ref()
|
|
5962
|
+
where(
|
|
5963
|
+
pair_subset_between(node_x, node_y)
|
|
5964
|
+
).define(
|
|
5965
|
+
first_position_subset(node_x),
|
|
5966
|
+
second_position_subset(node_y)
|
|
5967
|
+
)
|
|
5968
|
+
|
|
5969
|
+
if not self.weighted:
|
|
5970
|
+
count_outneighbor_u_rel = self._count_outneighbor_of(first_position_subset)
|
|
5971
|
+
count_outneighbor_v_rel = self._count_outneighbor_of(second_position_subset)
|
|
5972
|
+
outneighbor_u_rel = self._outneighbor_of(first_position_subset)
|
|
5973
|
+
outneighbor_v_rel = self._outneighbor_of(second_position_subset)
|
|
5974
|
+
else: # self.weighted
|
|
5975
|
+
weighted_outdegree_u_rel = self._weighted_outdegree_of(first_position_subset)
|
|
5976
|
+
weighted_outdegree_v_rel = self._weighted_outdegree_of(second_position_subset)
|
|
5977
|
+
|
|
5978
|
+
node_constraints = [pair_subset_between(node_u, node_v)]
|
|
5979
|
+
|
|
5980
|
+
# Handle the `from_` case.
|
|
5981
|
+
elif node_subset_from is not None and node_subset_to is None:
|
|
5982
|
+
if not self.weighted:
|
|
5983
|
+
count_outneighbor_u_rel = self._count_outneighbor_of(node_subset_from)
|
|
5984
|
+
count_outneighbor_v_rel = self._count_outneighbor
|
|
5985
|
+
outneighbor_u_rel = self._outneighbor_of(node_subset_from)
|
|
5986
|
+
outneighbor_v_rel = self._outneighbor
|
|
5987
|
+
else: # self.weighted
|
|
5988
|
+
weighted_outdegree_u_rel = self._weighted_outdegree_of(node_subset_from)
|
|
5989
|
+
weighted_outdegree_v_rel = self._weighted_outdegree
|
|
5990
|
+
|
|
5991
|
+
# TODO: Implement depth-two traversal strategy for better performance.
|
|
5992
|
+
# See similar comments on related similarity metrics.
|
|
5993
|
+
|
|
5994
|
+
node_constraints = [node_subset_from(node_u)]
|
|
5995
|
+
|
|
5996
|
+
# Handle the `from_`/`to` case.
|
|
5997
|
+
elif node_subset_from is not None and node_subset_to is not None:
|
|
5998
|
+
# Check for object identity optimization.
|
|
5999
|
+
if node_subset_from is node_subset_to:
|
|
6000
|
+
if not self.weighted:
|
|
6001
|
+
count_outneighbor_u_rel = self._count_outneighbor_of(node_subset_from)
|
|
6002
|
+
count_outneighbor_v_rel = count_outneighbor_u_rel
|
|
6003
|
+
outneighbor_u_rel = self._outneighbor_of(node_subset_from)
|
|
6004
|
+
outneighbor_v_rel = outneighbor_u_rel
|
|
6005
|
+
else: # self.weighted
|
|
6006
|
+
weighted_outdegree_u_rel = self._weighted_outdegree_of(node_subset_from)
|
|
6007
|
+
weighted_outdegree_v_rel = weighted_outdegree_u_rel
|
|
6008
|
+
else:
|
|
6009
|
+
if not self.weighted:
|
|
6010
|
+
count_outneighbor_u_rel = self._count_outneighbor_of(node_subset_from)
|
|
6011
|
+
count_outneighbor_v_rel = self._count_outneighbor_of(node_subset_to)
|
|
6012
|
+
outneighbor_u_rel = self._outneighbor_of(node_subset_from)
|
|
6013
|
+
outneighbor_v_rel = self._outneighbor_of(node_subset_to)
|
|
6014
|
+
else: # self.weighted
|
|
6015
|
+
weighted_outdegree_u_rel = self._weighted_outdegree_of(node_subset_from)
|
|
6016
|
+
weighted_outdegree_v_rel = self._weighted_outdegree_of(node_subset_to)
|
|
6017
|
+
|
|
6018
|
+
node_constraints = [node_subset_from(node_u), node_subset_to(node_v)]
|
|
6019
|
+
|
|
6020
|
+
# Handle the `full` case.
|
|
5769
6021
|
else:
|
|
5770
|
-
|
|
6022
|
+
if not self.weighted:
|
|
6023
|
+
count_outneighbor_u_rel = self._count_outneighbor
|
|
6024
|
+
count_outneighbor_v_rel = self._count_outneighbor
|
|
6025
|
+
outneighbor_u_rel = self._outneighbor
|
|
6026
|
+
outneighbor_v_rel = self._outneighbor
|
|
6027
|
+
else: # self.weighted
|
|
6028
|
+
weighted_outdegree_u_rel = self._weighted_outdegree
|
|
6029
|
+
weighted_outdegree_v_rel = self._weighted_outdegree
|
|
5771
6030
|
|
|
6031
|
+
node_constraints = []
|
|
6032
|
+
|
|
6033
|
+
# Define Jaccard similarity logic for weighted and unweighted cases.
|
|
6034
|
+
if not self.weighted:
|
|
6035
|
+
num_u_outneigbor, num_v_outneigbor = Integer.ref(), Integer.ref()
|
|
6036
|
+
common_outneighbor_node = self.Node.ref()
|
|
6037
|
+
num_union_outneighbors = Integer.ref()
|
|
6038
|
+
score = Float.ref()
|
|
6039
|
+
|
|
6040
|
+
where(
|
|
6041
|
+
*node_constraints,
|
|
6042
|
+
count_outneighbor_u_rel(node_u, num_u_outneigbor), # type: ignore[possibly-unbound]
|
|
6043
|
+
count_outneighbor_v_rel(node_v, num_v_outneigbor), # type: ignore[possibly-unbound]
|
|
6044
|
+
num_common_outneighbor := count(common_outneighbor_node).per(node_u, node_v).where(
|
|
6045
|
+
outneighbor_u_rel(node_u, common_outneighbor_node), # type: ignore[possibly-unbound]
|
|
6046
|
+
outneighbor_v_rel(node_v, common_outneighbor_node), # type: ignore[possibly-unbound]
|
|
6047
|
+
),
|
|
6048
|
+
num_union_outneighbors := num_u_outneigbor + num_v_outneigbor - num_common_outneighbor,
|
|
6049
|
+
score := num_common_outneighbor / num_union_outneighbors,
|
|
6050
|
+
).define(
|
|
6051
|
+
_jaccard_similarity_rel(node_u, node_v, score)
|
|
6052
|
+
)
|
|
6053
|
+
else:
|
|
5772
6054
|
# (1) The numerator: For every node `k` in the graph, find the minimum weight of
|
|
5773
6055
|
# the out-edges from `u` and `v` to `k`, and sum those minimum weights.
|
|
5774
6056
|
|
|
5775
6057
|
# Note that for any node `k` that is not a common out-neighbor of nodes `u` and `v`,
|
|
5776
6058
|
# the minimum weight of the out-edges from `u` and `v` to `k` is zero/empty,
|
|
5777
6059
|
# so the sum here reduces to a sum over the common out-neighbors of `u` and `v`.
|
|
5778
|
-
min_weight_to_common_outneighbor = self._model.Relationship(
|
|
6060
|
+
min_weight_to_common_outneighbor = self._model.Relationship(
|
|
6061
|
+
f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} "
|
|
6062
|
+
f"have common outneighbor {{node_k:{self._NodeConceptStr}}} "
|
|
6063
|
+
f"with minimum weight {{minweight:Float}}"
|
|
6064
|
+
)
|
|
5779
6065
|
|
|
5780
|
-
|
|
5781
|
-
w = union(
|
|
5782
|
-
|
|
5783
|
-
|
|
5784
|
-
|
|
5785
|
-
|
|
6066
|
+
node_k, w1, w2 = self.Node.ref(), Float.ref(), Float.ref()
|
|
6067
|
+
w = union(
|
|
6068
|
+
where(self._weight(node_u, node_k, w1)).select(w1),
|
|
6069
|
+
where(self._weight(node_v, node_k, w2)).select(w2)
|
|
6070
|
+
)
|
|
6071
|
+
where(
|
|
6072
|
+
*node_constraints,
|
|
6073
|
+
self._edge(node_u, node_k),
|
|
6074
|
+
self._edge(node_v, node_k)
|
|
6075
|
+
).define(
|
|
6076
|
+
min_weight_to_common_outneighbor(
|
|
6077
|
+
node_u, node_v, node_k, min(w).per(node_u, node_v, node_k)
|
|
6078
|
+
)
|
|
6079
|
+
)
|
|
5786
6080
|
|
|
5787
|
-
sum_of_min_weights_to_common_outneighbors = self._model.Relationship(
|
|
6081
|
+
sum_of_min_weights_to_common_outneighbors = self._model.Relationship(
|
|
6082
|
+
f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} "
|
|
6083
|
+
f"have a sum of minweights of {{minsum:Float}}"
|
|
6084
|
+
)
|
|
5788
6085
|
|
|
5789
6086
|
minweight = Float.ref()
|
|
5790
|
-
where(
|
|
5791
|
-
|
|
6087
|
+
where(
|
|
6088
|
+
min_weight_to_common_outneighbor(node_u, node_v, node_k, minweight)
|
|
6089
|
+
).define(
|
|
6090
|
+
sum_of_min_weights_to_common_outneighbors(
|
|
6091
|
+
node_u, node_v, sum(node_k, minweight).per(node_u, node_v)
|
|
6092
|
+
)
|
|
6093
|
+
)
|
|
5792
6094
|
|
|
5793
6095
|
# (2) The denominator: For every node `k` in the graph, find the maximum weight of
|
|
5794
6096
|
# the out-edges from `u` and `v` to `k`, and sum those maximum weights.
|
|
@@ -5827,20 +6129,31 @@ class Graph():
|
|
|
5827
6129
|
# self._weighted_outdegree(u) +
|
|
5828
6130
|
# self._weighted_outdegree(v) -
|
|
5829
6131
|
# _sum_of_min_weights_to_common_outneighbors(u, v)
|
|
5830
|
-
sum_of_max_weights_to_other_nodes = self._model.Relationship(
|
|
6132
|
+
sum_of_max_weights_to_other_nodes = self._model.Relationship(
|
|
6133
|
+
f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} "
|
|
6134
|
+
f"have a maxsum of {{maxsum:Float}}"
|
|
6135
|
+
)
|
|
5831
6136
|
|
|
5832
6137
|
u_outdegree, v_outdegree, maxsum, minsum = Float.ref(), Float.ref(), Float.ref(), Float.ref()
|
|
5833
|
-
where(
|
|
5834
|
-
|
|
5835
|
-
|
|
5836
|
-
|
|
5837
|
-
|
|
6138
|
+
where(
|
|
6139
|
+
*node_constraints,
|
|
6140
|
+
weighted_outdegree_u_rel(node_u, u_outdegree), # type: ignore[possibly-unbound]
|
|
6141
|
+
weighted_outdegree_v_rel(node_v, v_outdegree), # type: ignore[possibly-unbound]
|
|
6142
|
+
sum_of_min_weights_to_common_outneighbors(node_u, node_v, minsum),
|
|
6143
|
+
maxsum == u_outdegree + v_outdegree - minsum
|
|
6144
|
+
).define(
|
|
6145
|
+
sum_of_max_weights_to_other_nodes(node_u, node_v, maxsum)
|
|
6146
|
+
)
|
|
5838
6147
|
|
|
6148
|
+
# Combination of (1) and (2) to produce score.
|
|
5839
6149
|
score = Float.ref()
|
|
5840
|
-
where(
|
|
5841
|
-
|
|
5842
|
-
|
|
5843
|
-
|
|
6150
|
+
where(
|
|
6151
|
+
sum_of_min_weights_to_common_outneighbors(node_u, node_v, minsum),
|
|
6152
|
+
sum_of_max_weights_to_other_nodes(node_u, node_v, maxsum),
|
|
6153
|
+
score == minsum / maxsum
|
|
6154
|
+
).define(
|
|
6155
|
+
_jaccard_similarity_rel(node_u, node_v, score)
|
|
6156
|
+
)
|
|
5844
6157
|
|
|
5845
6158
|
return _jaccard_similarity_rel
|
|
5846
6159
|
|
|
@@ -6662,19 +6975,72 @@ class Graph():
|
|
|
6662
6975
|
|
|
6663
6976
|
|
|
6664
6977
|
@include_in_docs
|
|
6665
|
-
def preferential_attachment(
|
|
6666
|
-
|
|
6978
|
+
def preferential_attachment(
|
|
6979
|
+
self,
|
|
6980
|
+
*,
|
|
6981
|
+
full: Optional[bool] = None,
|
|
6982
|
+
from_: Optional[Relationship] = None,
|
|
6983
|
+
to: Optional[Relationship] = None,
|
|
6984
|
+
between: Optional[Relationship] = None,
|
|
6985
|
+
):
|
|
6986
|
+
"""Returns a ternary relationship containing
|
|
6987
|
+
the preferential attachment score for pairs of nodes.
|
|
6667
6988
|
|
|
6668
6989
|
The preferential attachment score between two nodes `u` and `v` is the
|
|
6669
6990
|
number of nodes adjacent to `u` multiplied by the number of nodes
|
|
6670
6991
|
adjacent to `v`.
|
|
6671
6992
|
|
|
6993
|
+
Parameters
|
|
6994
|
+
----------
|
|
6995
|
+
full : bool, optional
|
|
6996
|
+
If ``True``, computes the preferential attachment score for all pairs
|
|
6997
|
+
of nodes in the graph. This computation can be expensive for large graphs,
|
|
6998
|
+
as the result can scale quadratically in the number of nodes. Mutually exclusive
|
|
6999
|
+
with other parameters.
|
|
7000
|
+
Default is ``None``.
|
|
7001
|
+
from_ : Relationship, optional
|
|
7002
|
+
A unary relationship containing a subset of the graph's nodes. When
|
|
7003
|
+
provided, constrains the domain of the preferential attachment computation: only
|
|
7004
|
+
preferential attachment scores for node pairs where the first node is
|
|
7005
|
+
in this relationship are computed and returned. Mutually exclusive with
|
|
7006
|
+
``full`` and ``between``.
|
|
7007
|
+
Default is ``None``.
|
|
7008
|
+
to : Relationship, optional
|
|
7009
|
+
A unary relationship containing a subset of the graph's nodes. Can only
|
|
7010
|
+
be used together with the ``from_`` parameter. When provided with ``from_``,
|
|
7011
|
+
constrains the domain of the preferential attachment computation: only
|
|
7012
|
+
preferential attachment scores for node pairs where the first node is
|
|
7013
|
+
in ``from_`` and the second node is in ``to`` are computed and returned.
|
|
7014
|
+
Default is ``None``.
|
|
7015
|
+
between : Relationship, optional
|
|
7016
|
+
A binary relationship containing pairs of nodes. When provided,
|
|
7017
|
+
constrains the domain of the preferential attachment computation: only
|
|
7018
|
+
preferential attachment scores for the specific node pairs in
|
|
7019
|
+
this relationship are computed and returned. Mutually exclusive
|
|
7020
|
+
with other parameters.
|
|
7021
|
+
Default is ``None``.
|
|
7022
|
+
|
|
6672
7023
|
Returns
|
|
6673
7024
|
-------
|
|
6674
7025
|
Relationship
|
|
6675
7026
|
A ternary relationship where each tuple represents a pair of nodes
|
|
6676
7027
|
and their preferential attachment score.
|
|
6677
7028
|
|
|
7029
|
+
Raises
|
|
7030
|
+
------
|
|
7031
|
+
ValueError
|
|
7032
|
+
If ``full`` is provided with any other parameter.
|
|
7033
|
+
If ``between`` is provided with any other parameter.
|
|
7034
|
+
If ``from_`` is provided with any parameter other than ``to``.
|
|
7035
|
+
If none of ``full``, ``from_``, or ``between`` is provided.
|
|
7036
|
+
If ``full`` is not ``True`` or ``None``.
|
|
7037
|
+
AssertionError
|
|
7038
|
+
If ``from_``, ``to``, or ``between`` is not a ``Relationship``.
|
|
7039
|
+
If ``from_``, ``to``, or ``between`` is not attached to the same model as the graph.
|
|
7040
|
+
If ``from_``, ``to``, or ``between`` does not contain the graph's ``Node`` concept.
|
|
7041
|
+
If ``from_`` or ``to`` is not a unary relationship.
|
|
7042
|
+
If ``between`` is not a binary relationship.
|
|
7043
|
+
|
|
6678
7044
|
Relationship Schema
|
|
6679
7045
|
-------------------
|
|
6680
7046
|
``preferential_attachment(node_u, node_v, score)``
|
|
@@ -6691,6 +7057,38 @@ class Graph():
|
|
|
6691
7057
|
| Directed | Yes | |
|
|
6692
7058
|
| Weighted | Yes | Weights are ignored. |
|
|
6693
7059
|
|
|
7060
|
+
Notes
|
|
7061
|
+
-----
|
|
7062
|
+
The ``preferential_attachment(full=True)`` method computes and caches
|
|
7063
|
+
the full preferential attachment relationship for all pairs of nodes,
|
|
7064
|
+
providing efficient reuse across multiple calls. This can be expensive
|
|
7065
|
+
as the result contains O(|V|²) tuples.
|
|
7066
|
+
|
|
7067
|
+
Calling ``preferential_attachment()`` without arguments raises a ``ValueError``,
|
|
7068
|
+
to ensure awareness and explicit acknowledgement (``full=True``) of this cost.
|
|
7069
|
+
|
|
7070
|
+
In contrast, ``preferential_attachment(from_=subset)`` constrains the computation to
|
|
7071
|
+
tuples with the first position in the passed-in ``subset``. The result is
|
|
7072
|
+
not cached; it is specific to the call site. When a significant fraction of
|
|
7073
|
+
the preferential attachment relation is needed across a program,
|
|
7074
|
+
``preferential_attachment(full=True)`` is typically more efficient. Use
|
|
7075
|
+
``preferential_attachment(from_=subset)`` only when small subsets of
|
|
7076
|
+
the preferential attachment relationship are needed
|
|
7077
|
+
collectively across the program.
|
|
7078
|
+
|
|
7079
|
+
The ``to`` parameter can be used together with ``from_`` to further
|
|
7080
|
+
constrain the computation: ``preferential_attachment(from_=subset_a, to=subset_b)``
|
|
7081
|
+
computes preferential attachment scores only for node pairs where the first node is in
|
|
7082
|
+
``subset_a`` and the second node is in ``subset_b``. (Since ``preferential_attachment``
|
|
7083
|
+
is symmetric in its first two positions, using ``to`` without ``from_``would
|
|
7084
|
+
be functionally redundant, and is not allowed.)
|
|
7085
|
+
|
|
7086
|
+
The ``between`` parameter provides another way to constrain the computation.
|
|
7087
|
+
Unlike ``from_`` and ``to``, which allow you to independently constrain the first
|
|
7088
|
+
and second positions in ``preferential_attachment`` tuples to sets of nodes, ``between``
|
|
7089
|
+
allows you constrain the first and second positions, jointly, to specific pairs
|
|
7090
|
+
of nodes.
|
|
7091
|
+
|
|
6694
7092
|
Examples
|
|
6695
7093
|
--------
|
|
6696
7094
|
>>> from relationalai.semantics import Model, define, select, Integer
|
|
@@ -6712,10 +7110,10 @@ class Graph():
|
|
|
6712
7110
|
... Edge.new(src=n4, dst=n3),
|
|
6713
7111
|
... )
|
|
6714
7112
|
>>>
|
|
6715
|
-
>>> # 3. Select the preferential attachment
|
|
7113
|
+
>>> # 3. Select the preferential attachment scores from the full relationship
|
|
6716
7114
|
>>> u, v = Node.ref("u"), Node.ref("v")
|
|
6717
7115
|
>>> score = Integer.ref("score")
|
|
6718
|
-
>>> preferential_attachment = graph.preferential_attachment()
|
|
7116
|
+
>>> preferential_attachment = graph.preferential_attachment(full=True)
|
|
6719
7117
|
>>> select(
|
|
6720
7118
|
... u.id, v.id, score,
|
|
6721
7119
|
... ).where(
|
|
@@ -6727,64 +7125,302 @@ class Graph():
|
|
|
6727
7125
|
id id2 score
|
|
6728
7126
|
0 1 3 3
|
|
6729
7127
|
|
|
7128
|
+
>>> # 4. Use 'from_' parameter to constrain the set of nodes for the first position
|
|
7129
|
+
>>> # Define a subset containing only node 1
|
|
7130
|
+
>>> from relationalai.semantics import where
|
|
7131
|
+
>>> subset = model.Relationship(f"{{node:{Node}}} is in subset")
|
|
7132
|
+
>>> node = Node.ref()
|
|
7133
|
+
>>> where(node.id == 1).define(subset(node))
|
|
7134
|
+
>>>
|
|
7135
|
+
>>> # Get preferential attachment scores only for pairs where first node is in subset
|
|
7136
|
+
>>> constrained_preferential_attachment = graph.preferential_attachment(from_=subset)
|
|
7137
|
+
>>> select(u.id, v.id, score).where(constrained_preferential_attachment(u, v, score)).inspect()
|
|
7138
|
+
▰▰▰▰ Setup complete
|
|
7139
|
+
id id2 score
|
|
7140
|
+
0 1 1 1
|
|
7141
|
+
1 1 2 3
|
|
7142
|
+
2 1 3 3
|
|
7143
|
+
3 1 4 3
|
|
7144
|
+
|
|
7145
|
+
>>> # 5. Use both 'from_' and 'to' parameters to constrain both positions
|
|
7146
|
+
>>> from_subset = model.Relationship(f"{{node:{Node}}} is in from_subset")
|
|
7147
|
+
>>> to_subset = model.Relationship(f"{{node:{Node}}} is in to_subset")
|
|
7148
|
+
>>> where(node.id == 1).define(from_subset(node))
|
|
7149
|
+
>>> where(node.id == 3).define(to_subset(node))
|
|
7150
|
+
>>>
|
|
7151
|
+
>>> # Get preferential attachment scores only where first node is in from_subset and second node is in to_subset
|
|
7152
|
+
>>> constrained_preferential_attachment = graph.preferential_attachment(from_=from_subset, to=to_subset)
|
|
7153
|
+
>>> select(u.id, v.id, score).where(constrained_preferential_attachment(u, v, score)).inspect()
|
|
7154
|
+
▰▰▰▰ Setup complete
|
|
7155
|
+
id id2 score
|
|
7156
|
+
0 1 3 3
|
|
7157
|
+
|
|
7158
|
+
>>> # 6. Use 'between' parameter to constrain to specific pairs of nodes
|
|
7159
|
+
>>> pairs = model.Relationship(f"{{node_a:{Node}}} and {{node_b:{Node}}} are a pair")
|
|
7160
|
+
>>> node_a, node_b = Node.ref(), Node.ref()
|
|
7161
|
+
>>> where(node_a.id == 1, node_b.id == 3).define(pairs(node_a, node_b))
|
|
7162
|
+
>>> where(node_a.id == 2, node_b.id == 4).define(pairs(node_a, node_b))
|
|
7163
|
+
>>>
|
|
7164
|
+
>>> # Get preferential attachment scores only for the specific pairs (1, 3) and (2, 4)
|
|
7165
|
+
>>> constrained_preferential_attachment = graph.preferential_attachment(between=pairs)
|
|
7166
|
+
>>> select(u.id, v.id, score).where(constrained_preferential_attachment(u, v, score)).inspect()
|
|
7167
|
+
▰▰▰▰ Setup complete
|
|
7168
|
+
id id2 score
|
|
7169
|
+
0 1 3 3
|
|
7170
|
+
1 2 4 6
|
|
7171
|
+
|
|
6730
7172
|
"""
|
|
6731
|
-
|
|
6732
|
-
|
|
6733
|
-
|
|
6734
|
-
"of all pairs of nodes of the graph. To provide better control over "
|
|
6735
|
-
"the computed subset, `preferential_attachment`'s interface will soon "
|
|
6736
|
-
"need to change."
|
|
6737
|
-
),
|
|
6738
|
-
FutureWarning,
|
|
6739
|
-
stacklevel=2
|
|
7173
|
+
# Validate domain constraint parameters.
|
|
7174
|
+
self._validate_domain_constraint_parameters(
|
|
7175
|
+
'preferential_attachment', full, from_, to, between
|
|
6740
7176
|
)
|
|
6741
7177
|
|
|
7178
|
+
# At this point, exactly one of `full`, `from_`, or `between`
|
|
7179
|
+
# has been provided, and if `to` is provided, `from_` is also provided.
|
|
7180
|
+
|
|
7181
|
+
# Handle `between`.
|
|
7182
|
+
if between is not None:
|
|
7183
|
+
self._validate_pair_subset_parameter(between)
|
|
7184
|
+
return self._preferential_attachment_between(between)
|
|
7185
|
+
|
|
7186
|
+
# Handle `from_` (and potentially `to`).
|
|
7187
|
+
if from_ is not None:
|
|
7188
|
+
self._validate_node_subset_parameter('from_', from_)
|
|
7189
|
+
if to is not None:
|
|
7190
|
+
self._validate_node_subset_parameter('to', to)
|
|
7191
|
+
return self._preferential_attachment_from_to(from_, to)
|
|
7192
|
+
return self._preferential_attachment_from(from_)
|
|
7193
|
+
|
|
7194
|
+
# Handle `full`.
|
|
6742
7195
|
return self._preferential_attachment
|
|
6743
7196
|
|
|
6744
7197
|
@cached_property
|
|
6745
7198
|
def _preferential_attachment(self):
|
|
6746
|
-
"""Lazily define and cache the
|
|
6747
|
-
_preferential_attachment_rel = self.
|
|
7199
|
+
"""Lazily define and cache the full preferential_attachment relationship."""
|
|
7200
|
+
_preferential_attachment_rel = self._create_preferential_attachment_relationship()
|
|
6748
7201
|
_preferential_attachment_rel.annotate(annotations.track("graphs", "preferential_attachment"))
|
|
7202
|
+
return _preferential_attachment_rel
|
|
7203
|
+
|
|
7204
|
+
def _preferential_attachment_from(self, node_subset_from: Relationship):
|
|
7205
|
+
"""
|
|
7206
|
+
Create a preferential_attachment relationship, with the first position in each
|
|
7207
|
+
tuple constrained to be in the given subset of nodes. Note this relationship
|
|
7208
|
+
is not cached; it is specific to the callsite.
|
|
7209
|
+
"""
|
|
7210
|
+
_preferential_attachment_rel = self._create_preferential_attachment_relationship(
|
|
7211
|
+
node_subset_from=node_subset_from
|
|
7212
|
+
)
|
|
7213
|
+
_preferential_attachment_rel.annotate(annotations.track("graphs", "preferential_attachment_from"))
|
|
7214
|
+
return _preferential_attachment_rel
|
|
7215
|
+
|
|
7216
|
+
def _preferential_attachment_from_to(self, node_subset_from: Relationship, node_subset_to: Relationship):
|
|
7217
|
+
"""
|
|
7218
|
+
Create a preferential_attachment relationship, with the first position in each
|
|
7219
|
+
tuple constrained to be in `node_subset_from`, and the second position in
|
|
7220
|
+
each tuple constrained to be in `node_subset_to`. Note this relationship
|
|
7221
|
+
is not cached; it is specific to the callsite.
|
|
7222
|
+
"""
|
|
7223
|
+
_preferential_attachment_rel = self._create_preferential_attachment_relationship(
|
|
7224
|
+
node_subset_from=node_subset_from,
|
|
7225
|
+
node_subset_to=node_subset_to
|
|
7226
|
+
)
|
|
7227
|
+
_preferential_attachment_rel.annotate(annotations.track("graphs", "preferential_attachment_from_to"))
|
|
7228
|
+
return _preferential_attachment_rel
|
|
7229
|
+
|
|
7230
|
+
def _preferential_attachment_between(self, pair_subset_between: Relationship):
|
|
7231
|
+
"""
|
|
7232
|
+
Create a preferential_attachment relationship, with the first and second position
|
|
7233
|
+
in each tuple jointly constrained to be in the given set of pairs
|
|
7234
|
+
of nodes. Note this relationship is not cached;
|
|
7235
|
+
it is specific to the callsite.
|
|
7236
|
+
"""
|
|
7237
|
+
_preferential_attachment_rel = self._create_preferential_attachment_relationship(
|
|
7238
|
+
pair_subset_between=pair_subset_between
|
|
7239
|
+
)
|
|
7240
|
+
_preferential_attachment_rel.annotate(annotations.track("graphs", "preferential_attachment_between"))
|
|
7241
|
+
return _preferential_attachment_rel
|
|
6749
7242
|
|
|
7243
|
+
def _create_preferential_attachment_relationship(
|
|
7244
|
+
self,
|
|
7245
|
+
*,
|
|
7246
|
+
node_subset_from: Optional[Relationship] = None,
|
|
7247
|
+
node_subset_to: Optional[Relationship] = None,
|
|
7248
|
+
pair_subset_between: Optional[Relationship] = None,
|
|
7249
|
+
):
|
|
7250
|
+
"""
|
|
7251
|
+
Create preferential_attachment relationship, optionally constrained by
|
|
7252
|
+
the provided node subsets or pair subset.
|
|
7253
|
+
"""
|
|
7254
|
+
_preferential_attachment_rel = self._model.Relationship(
|
|
7255
|
+
f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} "
|
|
7256
|
+
f"have preferential attachment score {{score:Integer}}"
|
|
7257
|
+
)
|
|
7258
|
+
|
|
7259
|
+
# Branch by case to select appropriate count_neighbor and isolated_node relationships,
|
|
7260
|
+
# and to define relevant constraints on the separate and joint domains of node_u and node_v.
|
|
6750
7261
|
node_u, node_v = self.Node.ref(), self.Node.ref()
|
|
6751
|
-
count_u, count_v = Integer.ref(), Integer.ref()
|
|
6752
7262
|
|
|
6753
|
-
#
|
|
6754
|
-
|
|
7263
|
+
# Handle the `between` case.
|
|
7264
|
+
if pair_subset_between is not None:
|
|
7265
|
+
# Collect nodes that appear in the subset by position.
|
|
7266
|
+
first_position_subset = self._model.Relationship(f"{{node:{self._NodeConceptStr}}}")
|
|
7267
|
+
second_position_subset = self._model.Relationship(f"{{node:{self._NodeConceptStr}}}")
|
|
7268
|
+
node_x, node_y = self.Node.ref(), self.Node.ref()
|
|
7269
|
+
where(
|
|
7270
|
+
pair_subset_between(node_x, node_y)
|
|
7271
|
+
).define(
|
|
7272
|
+
first_position_subset(node_x),
|
|
7273
|
+
second_position_subset(node_y)
|
|
7274
|
+
)
|
|
7275
|
+
|
|
7276
|
+
# Constituents of non-isolated-nodes rule.
|
|
7277
|
+
non_isolated_rule_uv_constraint = [pair_subset_between(node_u, node_v)]
|
|
7278
|
+
count_neighbor_u_rel = self._count_neighbor_of(first_position_subset)
|
|
7279
|
+
count_neighbor_v_rel = self._count_neighbor_of(second_position_subset)
|
|
7280
|
+
|
|
7281
|
+
# Constituents of u-isolated rule.
|
|
7282
|
+
isolated_u_rel = self._isolated_node_of(first_position_subset)
|
|
7283
|
+
isolated_u_rule_uv_constraint = [pair_subset_between(node_u, node_v)]
|
|
7284
|
+
|
|
7285
|
+
# Constituents of v-isolated rule.
|
|
7286
|
+
isolated_v_rel = self._isolated_node_of(second_position_subset)
|
|
7287
|
+
isolated_v_rule_uv_constraint = [pair_subset_between(node_u, node_v)]
|
|
7288
|
+
|
|
7289
|
+
# Handle the `from_` case.
|
|
7290
|
+
elif node_subset_from is not None and node_subset_to is None:
|
|
7291
|
+
# NOTE: It isn't necessary to compute _count_neighbor_of
|
|
7292
|
+
# and _isolated_node_of for node_subset_from, given
|
|
7293
|
+
# we have to compute _count_neighbor and _isolated_node
|
|
7294
|
+
# for the unconstrained second position anyway. That does
|
|
7295
|
+
# require additional constraints as seen below, though.
|
|
7296
|
+
#
|
|
7297
|
+
# It's not clear to this author that there is a more clever
|
|
7298
|
+
# way to do this, given that in preferential attachment,
|
|
7299
|
+
# constraining one position implies no constraint on the
|
|
7300
|
+
# other position, unlike in, e.g., common neighbor?
|
|
7301
|
+
|
|
7302
|
+
# Constituents of non-isolated-nodes rule.
|
|
7303
|
+
non_isolated_rule_uv_constraint = [node_subset_from(node_u)]
|
|
7304
|
+
count_neighbor_u_rel = self._count_neighbor
|
|
7305
|
+
count_neighbor_v_rel = self._count_neighbor
|
|
7306
|
+
|
|
7307
|
+
# Constituents of u-isolated rule.
|
|
7308
|
+
isolated_u_rel = self._isolated_node
|
|
7309
|
+
isolated_u_rule_uv_constraint = [
|
|
7310
|
+
node_subset_from(node_u),
|
|
7311
|
+
self.Node(node_v)
|
|
7312
|
+
]
|
|
7313
|
+
|
|
7314
|
+
# Constituents of v-isolated rule.
|
|
7315
|
+
isolated_v_rel = self._isolated_node
|
|
7316
|
+
isolated_v_rule_uv_constraint = [node_subset_from(node_u)]
|
|
6755
7317
|
|
|
6756
|
-
#
|
|
7318
|
+
# Handle the `from_`/`to` case.
|
|
7319
|
+
elif node_subset_from is not None and node_subset_to is not None:
|
|
7320
|
+
# Check for object identity optimization.
|
|
7321
|
+
if node_subset_from is node_subset_to:
|
|
7322
|
+
# Constituents of non-isolated-nodes rule.
|
|
7323
|
+
non_isolated_rule_uv_constraint = []
|
|
7324
|
+
count_neighbor_u_rel = self._count_neighbor_of(node_subset_from)
|
|
7325
|
+
count_neighbor_v_rel = count_neighbor_u_rel
|
|
7326
|
+
|
|
7327
|
+
# Constituents of u-isolated rule.
|
|
7328
|
+
isolated_u_rel = self._isolated_node_of(node_subset_from)
|
|
7329
|
+
isolated_u_rule_uv_constraint = [node_subset_to(node_v)]
|
|
7330
|
+
|
|
7331
|
+
# Constituents of v-isolated rule.
|
|
7332
|
+
isolated_v_rel = isolated_u_rel
|
|
7333
|
+
isolated_v_rule_uv_constraint = [node_subset_from(node_u)]
|
|
7334
|
+
else:
|
|
7335
|
+
# Constituents of non-isolated-nodes rule.
|
|
7336
|
+
non_isolated_rule_uv_constraint = []
|
|
7337
|
+
count_neighbor_u_rel = self._count_neighbor_of(node_subset_from)
|
|
7338
|
+
count_neighbor_v_rel = self._count_neighbor_of(node_subset_to)
|
|
7339
|
+
|
|
7340
|
+
# Constituents of u-isolated rule.
|
|
7341
|
+
isolated_u_rel = self._isolated_node_of(node_subset_from)
|
|
7342
|
+
isolated_u_rule_uv_constraint = [node_subset_to(node_v)]
|
|
7343
|
+
|
|
7344
|
+
# Constituents of v-isolated rule.
|
|
7345
|
+
isolated_v_rel = self._isolated_node_of(node_subset_to)
|
|
7346
|
+
isolated_v_rule_uv_constraint = [node_subset_from(node_u)]
|
|
7347
|
+
|
|
7348
|
+
|
|
7349
|
+
# Handle the `full` case.
|
|
7350
|
+
else:
|
|
7351
|
+
# Constituents of non-isolated-nodes rule.
|
|
7352
|
+
non_isolated_rule_uv_constraint = []
|
|
7353
|
+
count_neighbor_u_rel = self._count_neighbor
|
|
7354
|
+
count_neighbor_v_rel = self._count_neighbor
|
|
7355
|
+
|
|
7356
|
+
# Constituents of u-isolated rule.
|
|
7357
|
+
isolated_u_rel = self._isolated_node
|
|
7358
|
+
isolated_u_rule_uv_constraint = [self.Node(node_v)]
|
|
7359
|
+
|
|
7360
|
+
# Constituents of v-isolated rule.
|
|
7361
|
+
isolated_v_rel = self._isolated_node
|
|
7362
|
+
isolated_v_rule_uv_constraint = [self.Node(node_u)]
|
|
7363
|
+
|
|
7364
|
+
# Define shared logic, which has three cases.
|
|
7365
|
+
count_u, count_v = Integer.ref(), Integer.ref()
|
|
7366
|
+
|
|
7367
|
+
# Case where node u is isolated, and node v is any node (respecting constraints): score 0.
|
|
6757
7368
|
where(
|
|
6758
|
-
|
|
6759
|
-
|
|
7369
|
+
isolated_u_rel(node_u),
|
|
7370
|
+
*isolated_u_rule_uv_constraint,
|
|
6760
7371
|
).define(_preferential_attachment_rel(node_u, node_v, 0))
|
|
6761
7372
|
|
|
6762
|
-
# Case where node u is any node, and node v is isolated: score 0.
|
|
7373
|
+
# Case where node u is any node (respecting constraints), and node v is isolated: score 0.
|
|
6763
7374
|
where(
|
|
6764
|
-
|
|
6765
|
-
|
|
7375
|
+
*isolated_v_rule_uv_constraint,
|
|
7376
|
+
isolated_v_rel(node_v)
|
|
6766
7377
|
).define(_preferential_attachment_rel(node_u, node_v, 0))
|
|
6767
7378
|
|
|
6768
7379
|
# Case where neither node is isolated: score is count_neighbor[u] * count_neighbor[v].
|
|
6769
7380
|
where(
|
|
6770
|
-
|
|
6771
|
-
|
|
7381
|
+
*non_isolated_rule_uv_constraint,
|
|
7382
|
+
count_neighbor_u_rel(node_u, count_u),
|
|
7383
|
+
count_neighbor_v_rel(node_v, count_v)
|
|
6772
7384
|
).define(_preferential_attachment_rel(node_u, node_v, count_u * count_v))
|
|
6773
7385
|
|
|
6774
7386
|
return _preferential_attachment_rel
|
|
6775
7387
|
|
|
7388
|
+
|
|
6776
7389
|
@cached_property
|
|
6777
7390
|
def _isolated_node(self):
|
|
7391
|
+
"""Lazily define and cache the self._isolated_node relationship."""
|
|
7392
|
+
return self._create_isolated_node_relationship()
|
|
7393
|
+
|
|
7394
|
+
def _isolated_node_of(self, node_subset: Relationship):
|
|
6778
7395
|
"""
|
|
6779
|
-
|
|
6780
|
-
|
|
7396
|
+
Create an _isolated_node relationship constrained to the subset of nodes
|
|
7397
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
7398
|
+
specific to the callsite.
|
|
7399
|
+
"""
|
|
7400
|
+
return self._create_isolated_node_relationship(node_subset=node_subset)
|
|
7401
|
+
|
|
7402
|
+
def _create_isolated_node_relationship(
|
|
7403
|
+
self,
|
|
7404
|
+
*,
|
|
7405
|
+
node_subset: Optional[Relationship] = None,
|
|
7406
|
+
):
|
|
7407
|
+
"""
|
|
7408
|
+
Create _isolated_node relationship, optionally constrained by
|
|
7409
|
+
the provided node subset.
|
|
6781
7410
|
"""
|
|
6782
7411
|
_isolated_node_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} is isolated")
|
|
6783
7412
|
|
|
6784
7413
|
neighbor_node = self.Node.ref()
|
|
7414
|
+
if node_subset is not None:
|
|
7415
|
+
neighbor_rel = self._neighbor_of(node_subset)
|
|
7416
|
+
node_constraint = node_subset(self.Node)
|
|
7417
|
+
else:
|
|
7418
|
+
neighbor_rel = self._neighbor
|
|
7419
|
+
node_constraint = self.Node
|
|
7420
|
+
|
|
6785
7421
|
where(
|
|
6786
|
-
|
|
6787
|
-
not_(
|
|
7422
|
+
node_constraint,
|
|
7423
|
+
not_(neighbor_rel(self.Node, neighbor_node))
|
|
6788
7424
|
).define(_isolated_node_rel(self.Node))
|
|
6789
7425
|
|
|
6790
7426
|
return _isolated_node_rel
|