relationalai 0.12.0__py3-none-any.whl → 0.12.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1056,6 +1056,181 @@ class Graph():
1056
1056
 
1057
1057
  # End Visualization --------------------------------------------------------
1058
1058
 
1059
+ # The following three helper methods validate
1060
+ # `from_`, `to`, and `between`
1061
+ # parameters to public methods that accept them.
1062
+
1063
+ def _validate_domain_constraint_parameters(
1064
+ self,
1065
+ method_name: str,
1066
+ full: Optional[bool],
1067
+ from_: Optional[Relationship],
1068
+ to: Optional[Relationship],
1069
+ between: Optional[Relationship],
1070
+ ):
1071
+ """
1072
+ Validate the domain constraint parameters for methods that accept
1073
+ `full`, `from_`, `to`, and `between` parameters.
1074
+
1075
+ This helper method performs common validation logic that applies
1076
+ across multiple graph methods (e.g., common_neighbor, adamic_adar).
1077
+
1078
+ Parameters
1079
+ ----------
1080
+ method_name : str
1081
+ The name of the method being validated (for error messages).
1082
+ full : bool, optional
1083
+ The full parameter value.
1084
+ from_ : Relationship, optional
1085
+ The from_ parameter value.
1086
+ to : Relationship, optional
1087
+ The to parameter value.
1088
+ between : Relationship, optional
1089
+ The between parameter value.
1090
+
1091
+ Raises
1092
+ ------
1093
+ ValueError
1094
+ If parameter combinations are invalid.
1095
+ """
1096
+ # Confirm that `full` was not provided with any other parameter.
1097
+ if (
1098
+ full is not None
1099
+ and (
1100
+ from_ is not None or
1101
+ to is not None or
1102
+ between is not None
1103
+ )
1104
+ ):
1105
+ raise ValueError(
1106
+ "The 'full' parameter is mutually exclusive with 'from_', 'to', and 'between'. "
1107
+ f"Use 'full=True' to compute {method_name} for all node pairs, "
1108
+ "or use 'from_'/'to'/'between' to constrain computation to "
1109
+ "specific nodes or pairs."
1110
+ )
1111
+
1112
+ # Confirm that `between` was not provided with any other parameter.
1113
+ if (between is not None
1114
+ and (
1115
+ from_ is not None or
1116
+ to is not None
1117
+ # `full` is implied by the preceding check.
1118
+ )
1119
+ ):
1120
+ raise ValueError(
1121
+ "The 'between' parameter is mutually exclusive with 'from_' and 'to'. "
1122
+ "Use 'between' to constrain computation to specific node pairs, "
1123
+ "or use 'from_'/'to' to constrain by position."
1124
+ )
1125
+
1126
+ # Confirm that 'to' is only used with 'from_'.
1127
+ if to is not None and from_ is None:
1128
+ raise ValueError(
1129
+ "The 'to' parameter can only be used together with the 'from_' parameter. "
1130
+ f"The 'from_' parameter constrains the first position in {method_name} tuples, "
1131
+ f"while 'to' constrains the second position. Since {method_name} is symmetric "
1132
+ "in its first two positions, 'to' without 'from_' would be functionally redundant. "
1133
+ "Please either provide both 'from_' and 'to' parameters, or only 'from_'."
1134
+ )
1135
+
1136
+ # If no parameters are provided, raise an exception
1137
+ # to avoid unintentional, potentially expensive full computation.
1138
+ if (
1139
+ full is None and
1140
+ from_ is None and
1141
+ between is None
1142
+ ):
1143
+ raise ValueError(
1144
+ f"Computing {method_name} for all pairs of nodes can be expensive. "
1145
+ f"To compute the full {method_name} relationship, "
1146
+ f"please call `{method_name}(full=True)`. To constrain computation to specific nodes, "
1147
+ f"please use `{method_name}(from_=node_subset)`, "
1148
+ f"`{method_name}(from_=node_subset_a, to=node_subset_b)`, "
1149
+ f"or `{method_name}(between=node_pairs)`."
1150
+ )
1151
+
1152
+ # Validate that full is True (not just not None).
1153
+ # This check is only reached if full is not None
1154
+ # and no other parameters are provided.
1155
+ if full is not None and full is not True:
1156
+ raise ValueError(
1157
+ f"Invalid value (`{full}`) for 'full' parameter. Use `full=True` "
1158
+ f"to compute the full {method_name} relationship, or use 'from_', "
1159
+ "'from_' and 'to', or 'between' to constrain computation."
1160
+ )
1161
+
1162
+ def _validate_node_subset_parameter(
1163
+ self,
1164
+ parameter_name: str,
1165
+ node_subset_relation: Relationship,
1166
+ ):
1167
+ """
1168
+ Validate that a parameter identifying a subset of nodes of interest is
1169
+ is a unary relationship, of nodes, attached to the same model
1170
+ that the graph is attached to.
1171
+ """
1172
+ # Validate that the parameter is a relationship.
1173
+ assert isinstance(node_subset_relation, Relationship), (
1174
+ f"The '{parameter_name}' parameter must be a `Relationship`, "
1175
+ f"but is a `{type(node_subset_relation).__name__}`."
1176
+ )
1177
+
1178
+ # Validate that the relationship is attached to the same model as the graph.
1179
+ assert node_subset_relation._model is self._model, (
1180
+ f"The given '{parameter_name}' relationship must "
1181
+ "be attached to the same model as the graph."
1182
+ )
1183
+
1184
+ # Validate that it's a unary relationship (has exactly one field).
1185
+ assert len(node_subset_relation._fields) == 1, (
1186
+ f"The '{parameter_name}' parameter must be a unary relationship, "
1187
+ f"but it has {len(node_subset_relation._fields)} fields."
1188
+ )
1189
+
1190
+ # Validate that the concept type matches the graph's Node concept.
1191
+ assert node_subset_relation._fields[0].type_str == self.Node._name, (
1192
+ f"The '{parameter_name}' relationship must be over "
1193
+ f"the graph's Node concept ('{self.Node._name}'), "
1194
+ f"but is over '{node_subset_relation._fields[0].type_str}'."
1195
+ )
1196
+
1197
+ # No parameter name at this time, as pertains only to `between` for now.
1198
+ def _validate_pair_subset_parameter(self, pairs_relation):
1199
+ """
1200
+ Validate that a parameter identifying pairs of nodes of interest is
1201
+ a binary relationship, of pairs of nodes, attached to the same model
1202
+ that the graph is attached to.
1203
+ """
1204
+ # Validate that the parameter is a relationship.
1205
+ assert isinstance(pairs_relation, Relationship), (
1206
+ "The 'between' parameter must be a `Relationship`, "
1207
+ f"but is a `{type(pairs_relation).__name__}`."
1208
+ )
1209
+
1210
+ # Validate that the relationship is attached to the same model as the graph.
1211
+ assert pairs_relation._model is self._model, (
1212
+ "The given 'between' relationship must be "
1213
+ "attached to the same model as the graph."
1214
+ )
1215
+
1216
+ # Validate that it's a binary relationship (has exactly two fields).
1217
+ assert len(pairs_relation._fields) == 2, (
1218
+ "The 'between' parameter must be a binary relationship, "
1219
+ f"but it has {len(pairs_relation._fields)} fields."
1220
+ )
1221
+
1222
+ # Validate that both fields are typed as the graph's Node concept.
1223
+ assert pairs_relation._fields[0].type_str == self.Node._name, (
1224
+ "The 'between' relationship's first field must be "
1225
+ f"the graph's Node concept ('{self.Node._name}'), "
1226
+ f"but is '{pairs_relation._fields[0].type_str}'."
1227
+ )
1228
+ assert pairs_relation._fields[1].type_str == self.Node._name, (
1229
+ f"The 'between' relationship's second field must be "
1230
+ f"the graph's Node concept ('{self.Node._name}'), "
1231
+ f"but is '{pairs_relation._fields[1].type_str}'."
1232
+ )
1233
+
1059
1234
 
1060
1235
  # The following three `_count_[in,out]neighbor` relationships are
1061
1236
  # primarily for internal consumption. They differ from corresponding
@@ -1065,26 +1240,26 @@ class Graph():
1065
1240
  @cached_property
1066
1241
  def _count_neighbor(self):
1067
1242
  """Lazily define and cache the self._count_neighbor relationship."""
1068
- return self._create_count_neighbor_relationship(nodes_subset=None)
1243
+ return self._create_count_neighbor_relationship(node_subset=None)
1069
1244
 
1070
- def _count_neighbor_of(self, nodes_subset: Relationship):
1245
+ def _count_neighbor_of(self, node_subset: Relationship):
1071
1246
  """
1072
1247
  Create a _count_neighbor relationship constrained to the subset of nodes
1073
- in `nodes_subset`. Note this relationship is not cached; it is
1248
+ in `node_subset`. Note this relationship is not cached; it is
1074
1249
  specific to the callsite.
1075
1250
  """
1076
- return self._create_count_neighbor_relationship(nodes_subset=nodes_subset)
1251
+ return self._create_count_neighbor_relationship(node_subset=node_subset)
1077
1252
 
1078
- def _create_count_neighbor_relationship(self, *, nodes_subset: Optional[Relationship]):
1253
+ def _create_count_neighbor_relationship(self, *, node_subset: Optional[Relationship]):
1079
1254
  _count_neighbor_rel = self._model.Relationship(f"{{src:{self._NodeConceptStr}}} has neighbor count {{count:Integer}}")
1080
1255
 
1081
1256
  # Choose the appropriate neighbor relationship based on whether we have constraints
1082
- if nodes_subset is None:
1257
+ if node_subset is None:
1083
1258
  # No constraint - use cached neighbor relationship
1084
1259
  neighbor_rel = self._neighbor
1085
1260
  else:
1086
1261
  # Constrained to nodes in the subset - use constrained neighbor relationship
1087
- neighbor_rel = self._neighbor_of(nodes_subset)
1262
+ neighbor_rel = self._neighbor_of(node_subset)
1088
1263
 
1089
1264
  # Apply the same counting logic for both cases
1090
1265
  src, dst = self.Node.ref(), self.Node.ref()
@@ -1095,26 +1270,26 @@ class Graph():
1095
1270
  @cached_property
1096
1271
  def _count_inneighbor(self):
1097
1272
  """Lazily define and cache the self._count_inneighbor relationship."""
1098
- return self._create_count_inneighbor_relationship(nodes_subset=None)
1273
+ return self._create_count_inneighbor_relationship(node_subset=None)
1099
1274
 
1100
- def _count_inneighbor_of(self, nodes_subset: Relationship):
1275
+ def _count_inneighbor_of(self, node_subset: Relationship):
1101
1276
  """
1102
1277
  Create a _count_inneighbor relationship constrained to the subset of nodes
1103
- in `nodes_subset`. Note this relationship is not cached; it is
1278
+ in `node_subset`. Note this relationship is not cached; it is
1104
1279
  specific to the callsite.
1105
1280
  """
1106
- return self._create_count_inneighbor_relationship(nodes_subset=nodes_subset)
1281
+ return self._create_count_inneighbor_relationship(node_subset=node_subset)
1107
1282
 
1108
- def _create_count_inneighbor_relationship(self, *, nodes_subset: Optional[Relationship]):
1283
+ def _create_count_inneighbor_relationship(self, *, node_subset: Optional[Relationship]):
1109
1284
  _count_inneighbor_rel = self._model.Relationship(f"{{dst:{self._NodeConceptStr}}} has inneighbor count {{count:Integer}}")
1110
1285
 
1111
1286
  # Choose the appropriate inneighbor relationship based on whether we have constraints
1112
- if nodes_subset is None:
1287
+ if node_subset is None:
1113
1288
  # No constraint - use cached inneighbor relationship
1114
1289
  inneighbor_rel = self._inneighbor
1115
1290
  else:
1116
1291
  # Constrained to nodes in the subset - use constrained inneighbor relationship
1117
- inneighbor_rel = self._inneighbor_of(nodes_subset)
1292
+ inneighbor_rel = self._inneighbor_of(node_subset)
1118
1293
 
1119
1294
  # Apply the same counting logic for both cases
1120
1295
  dst, src = self.Node.ref(), self.Node.ref()
@@ -1125,26 +1300,26 @@ class Graph():
1125
1300
  @cached_property
1126
1301
  def _count_outneighbor(self):
1127
1302
  """Lazily define and cache the self._count_outneighbor relationship."""
1128
- return self._create_count_outneighbor_relationship(nodes_subset=None)
1303
+ return self._create_count_outneighbor_relationship(node_subset=None)
1129
1304
 
1130
- def _count_outneighbor_of(self, nodes_subset: Relationship):
1305
+ def _count_outneighbor_of(self, node_subset: Relationship):
1131
1306
  """
1132
1307
  Create a _count_outneighbor relationship constrained to the subset of nodes
1133
- in `nodes_subset`. Note this relationship is not cached; it is
1308
+ in `node_subset`. Note this relationship is not cached; it is
1134
1309
  specific to the callsite.
1135
1310
  """
1136
- return self._create_count_outneighbor_relationship(nodes_subset=nodes_subset)
1311
+ return self._create_count_outneighbor_relationship(node_subset=node_subset)
1137
1312
 
1138
- def _create_count_outneighbor_relationship(self, *, nodes_subset: Optional[Relationship]):
1313
+ def _create_count_outneighbor_relationship(self, *, node_subset: Optional[Relationship]):
1139
1314
  _count_outneighbor_rel = self._model.Relationship(f"{{src:{self._NodeConceptStr}}} has outneighbor count {{count:Integer}}")
1140
1315
 
1141
1316
  # Choose the appropriate outneighbor relationship based on whether we have constraints
1142
- if nodes_subset is None:
1317
+ if node_subset is None:
1143
1318
  # No constraint - use cached outneighbor relationship
1144
1319
  outneighbor_rel = self._outneighbor
1145
1320
  else:
1146
1321
  # Constrained to nodes in the subset - use constrained outneighbor relationship
1147
- outneighbor_rel = self._outneighbor_of(nodes_subset)
1322
+ outneighbor_rel = self._outneighbor_of(node_subset)
1148
1323
 
1149
1324
  # Apply the same counting logic for both cases
1150
1325
  src, dst = self.Node.ref(), self.Node.ref()
@@ -1437,56 +1612,27 @@ class Graph():
1437
1612
  return self._neighbor
1438
1613
  else:
1439
1614
  # Validate the 'of' parameter
1440
- self._validate_node_subset_parameter(of)
1615
+ self._validate_node_subset_parameter('of', of)
1441
1616
  return self._neighbor_of(of)
1442
1617
 
1443
- def _validate_node_subset_parameter(self, of_relation):
1444
- """
1445
- Validate that a parameter identifying a subset of nodes of interest is
1446
- is a unary relationship containing nodes that is attached to
1447
- the same model that the graph is attached to.
1448
- """
1449
- # Validate that the parameter is a relationship.
1450
- assert isinstance(of_relation, Relationship), (
1451
- "The 'of' parameter must be a `Relationship`, "
1452
- f"but is a `{type(of_relation).__name__}`."
1453
- )
1454
-
1455
- # Validate that the relationship is attached to the same model as the graph.
1456
- assert of_relation._model is self._model, (
1457
- "The given 'of' relationship must be attached to the same model as the graph."
1458
- )
1459
-
1460
- # Validate that it's a unary relationship (has exactly one field).
1461
- assert len(of_relation._fields) == 1, (
1462
- "The 'of' parameter must be a unary relationship, "
1463
- f"but it has {len(of_relation._fields)} fields."
1464
- )
1465
-
1466
- # Validate that the concept type matches the graph's Node concept.
1467
- assert of_relation._fields[0].type_str == self.Node._name, (
1468
- f"The 'of' relationship must be over the graph's Node concept ('{self.Node._name}'), "
1469
- f"but is over '{of_relation._fields[0].type_str}'."
1470
- )
1471
-
1472
1618
  @cached_property
1473
1619
  def _neighbor(self):
1474
1620
  """Lazily define and cache the self._neighbor relationship."""
1475
- _neighbor_rel = self._create_neighbor_relationship(nodes_subset=None)
1621
+ _neighbor_rel = self._create_neighbor_relationship(node_subset=None)
1476
1622
  _neighbor_rel.annotate(annotations.track("graphs", "neighbor"))
1477
1623
  return _neighbor_rel
1478
1624
 
1479
- def _neighbor_of(self, nodes_subset: Relationship):
1625
+ def _neighbor_of(self, node_subset: Relationship):
1480
1626
  """
1481
1627
  Create a neighbor relationship constrained to the subset of nodes
1482
- in `nodes_subset`. Note this relationship is not cached; it is
1628
+ in `node_subset`. Note this relationship is not cached; it is
1483
1629
  specific to the callsite.
1484
1630
  """
1485
- _neighbor_rel = self._create_neighbor_relationship(nodes_subset=nodes_subset)
1631
+ _neighbor_rel = self._create_neighbor_relationship(node_subset=node_subset)
1486
1632
  _neighbor_rel.annotate(annotations.track("graphs", "neighbor_of"))
1487
1633
  return _neighbor_rel
1488
1634
 
1489
- def _create_neighbor_relationship(self, *, nodes_subset: Optional[Relationship]):
1635
+ def _create_neighbor_relationship(self, *, node_subset: Optional[Relationship]):
1490
1636
  _neighbor_rel = self._model.Relationship(f"{{src:{self._NodeConceptStr}}} has neighbor {{dst:{self._NodeConceptStr}}}")
1491
1637
  src, dst = self.Node.ref(), self.Node.ref()
1492
1638
 
@@ -1497,14 +1643,14 @@ class Graph():
1497
1643
  # Capture out-neighbors.
1498
1644
  where(
1499
1645
  self._edge(src, dst),
1500
- *([nodes_subset(src)] if nodes_subset else [])
1646
+ *([node_subset(src)] if node_subset else [])
1501
1647
  ).define(
1502
1648
  _neighbor_rel(src, dst)
1503
1649
  )
1504
1650
  # Capture in-neighbors.
1505
1651
  where(
1506
1652
  self._edge(src, dst),
1507
- *([nodes_subset(dst)] if nodes_subset else [])
1653
+ *([node_subset(dst)] if node_subset else [])
1508
1654
  ).define(
1509
1655
  _neighbor_rel(dst, src)
1510
1656
  )
@@ -1513,7 +1659,7 @@ class Graph():
1513
1659
  # so a single rule suffices to capture all neighbors.
1514
1660
  where(
1515
1661
  self._edge(src, dst),
1516
- *([nodes_subset(src)] if nodes_subset else [])
1662
+ *([node_subset(src)] if node_subset else [])
1517
1663
  ).define(
1518
1664
  _neighbor_rel(src, dst)
1519
1665
  )
@@ -1628,27 +1774,27 @@ class Graph():
1628
1774
  return self._inneighbor
1629
1775
  else:
1630
1776
  # Validate the 'of' parameter
1631
- self._validate_node_subset_parameter(of)
1777
+ self._validate_node_subset_parameter('of', of)
1632
1778
  return self._inneighbor_of(of)
1633
1779
 
1634
1780
  @cached_property
1635
1781
  def _inneighbor(self):
1636
1782
  """Lazily define and cache the self._inneighbor relationship."""
1637
- _inneighbor_rel = self._create_inneighbor_relationship(nodes_subset=None)
1783
+ _inneighbor_rel = self._create_inneighbor_relationship(node_subset=None)
1638
1784
  _inneighbor_rel.annotate(annotations.track("graphs", "inneighbor"))
1639
1785
  return _inneighbor_rel
1640
1786
 
1641
- def _inneighbor_of(self, nodes_subset: Relationship):
1787
+ def _inneighbor_of(self, node_subset: Relationship):
1642
1788
  """
1643
1789
  Create an inneighbor relationship constrained to the subset of nodes
1644
- in `nodes_subset`. Note this relationship is not cached; it is
1790
+ in `node_subset`. Note this relationship is not cached; it is
1645
1791
  specific to the callsite.
1646
1792
  """
1647
- _inneighbor_rel = self._create_inneighbor_relationship(nodes_subset=nodes_subset)
1793
+ _inneighbor_rel = self._create_inneighbor_relationship(node_subset=node_subset)
1648
1794
  _inneighbor_rel.annotate(annotations.track("graphs", "inneighbor_of"))
1649
1795
  return _inneighbor_rel
1650
1796
 
1651
- def _create_inneighbor_relationship(self, *, nodes_subset: Optional[Relationship]):
1797
+ def _create_inneighbor_relationship(self, *, node_subset: Optional[Relationship]):
1652
1798
  _inneighbor_rel = self._model.Relationship(f"{{dst:{self._NodeConceptStr}}} has inneighbor {{src:{self._NodeConceptStr}}}")
1653
1799
  src, dst = self.Node.ref(), self.Node.ref()
1654
1800
 
@@ -1657,7 +1803,7 @@ class Graph():
1657
1803
  # have an edge to the destination nodes in our subset.
1658
1804
  where(
1659
1805
  self._edge(src, dst),
1660
- *([nodes_subset(dst)] if nodes_subset else [])
1806
+ *([node_subset(dst)] if node_subset else [])
1661
1807
  ).define(
1662
1808
  _inneighbor_rel(dst, src)
1663
1809
  )
@@ -1666,7 +1812,7 @@ class Graph():
1666
1812
  # so neighbors and in-neighbors are the same.
1667
1813
  where(
1668
1814
  self._edge(src, dst),
1669
- *([nodes_subset(dst)] if nodes_subset else [])
1815
+ *([node_subset(dst)] if node_subset else [])
1670
1816
  ).define(
1671
1817
  _inneighbor_rel(dst, src)
1672
1818
  )
@@ -1783,27 +1929,27 @@ class Graph():
1783
1929
  return self._outneighbor
1784
1930
  else:
1785
1931
  # Validate the 'of' parameter
1786
- self._validate_node_subset_parameter(of)
1932
+ self._validate_node_subset_parameter('of', of)
1787
1933
  return self._outneighbor_of(of)
1788
1934
 
1789
1935
  @cached_property
1790
1936
  def _outneighbor(self):
1791
1937
  """Lazily define and cache the self._outneighbor relationship."""
1792
- _outneighbor_rel = self._create_outneighbor_relationship(nodes_subset=None)
1938
+ _outneighbor_rel = self._create_outneighbor_relationship(node_subset=None)
1793
1939
  _outneighbor_rel.annotate(annotations.track("graphs", "outneighbor"))
1794
1940
  return _outneighbor_rel
1795
1941
 
1796
- def _outneighbor_of(self, nodes_subset: Relationship):
1942
+ def _outneighbor_of(self, node_subset: Relationship):
1797
1943
  """
1798
1944
  Create an outneighbor relationship constrained to the subset of nodes
1799
- in `nodes_subset`. Note this relationship is not cached; it is
1945
+ in `node_subset`. Note this relationship is not cached; it is
1800
1946
  specific to the callsite.
1801
1947
  """
1802
- _outneighbor_rel = self._create_outneighbor_relationship(nodes_subset=nodes_subset)
1948
+ _outneighbor_rel = self._create_outneighbor_relationship(node_subset=node_subset)
1803
1949
  _outneighbor_rel.annotate(annotations.track("graphs", "outneighbor_of"))
1804
1950
  return _outneighbor_rel
1805
1951
 
1806
- def _create_outneighbor_relationship(self, *, nodes_subset: Optional[Relationship]):
1952
+ def _create_outneighbor_relationship(self, *, node_subset: Optional[Relationship]):
1807
1953
  _outneighbor_rel = self._model.Relationship(f"{{src:{self._NodeConceptStr}}} has outneighbor {{dst:{self._NodeConceptStr}}}")
1808
1954
  src, dst = self.Node.ref(), self.Node.ref()
1809
1955
 
@@ -1812,7 +1958,7 @@ class Graph():
1812
1958
  # have an edge from the source nodes in our subset.
1813
1959
  where(
1814
1960
  self._edge(src, dst),
1815
- *([nodes_subset(src)] if nodes_subset else [])
1961
+ *([node_subset(src)] if node_subset else [])
1816
1962
  ).define(
1817
1963
  _outneighbor_rel(src, dst)
1818
1964
  )
@@ -1821,7 +1967,7 @@ class Graph():
1821
1967
  # so neighbors and out-neighbors are the same.
1822
1968
  where(
1823
1969
  self._edge(src, dst),
1824
- *([nodes_subset(src)] if nodes_subset else [])
1970
+ *([node_subset(src)] if node_subset else [])
1825
1971
  ).define(
1826
1972
  _outneighbor_rel(src, dst)
1827
1973
  )
@@ -1830,18 +1976,67 @@ class Graph():
1830
1976
 
1831
1977
 
1832
1978
  @include_in_docs
1833
- def common_neighbor(self):
1834
- """Returns a ternary relationship of all common neighbor triplets.
1979
+ def common_neighbor(self,
1980
+ *,
1981
+ full: Optional[bool] = None,
1982
+ from_: Optional[Relationship] = None,
1983
+ to: Optional[Relationship] = None,
1984
+ between: Optional[Relationship] = None,
1985
+ ):
1986
+ """Returns a ternary relationship of common neighbor triplets.
1835
1987
 
1836
1988
  A node `w` is a common neighbor of a pair of nodes `u` and `v` if
1837
1989
  `w` is a neighbor of both `u` and `v`.
1838
1990
 
1991
+ Parameters
1992
+ ----------
1993
+ full : bool, optional
1994
+ If ``True``, computes common neighbors for all pairs of nodes in
1995
+ the graph. This computation can be expensive for large graphs, as the
1996
+ result can scale quadratically in the number of edges or cubically in
1997
+ the number of nodes. Mutually exclusive with other parameters.
1998
+ Default is ``None``.
1999
+ from_ : Relationship, optional
2000
+ A unary relationship containing a subset of the graph's nodes. When
2001
+ provided, constrains the domain of the common neighbor computation: only
2002
+ common neighbors of node pairs where the first node is in this relationship
2003
+ are computed and returned. Mutually exclusive with ``full`` and ``between``.
2004
+ Default is ``None``.
2005
+ to : Relationship, optional
2006
+ A unary relationship containing a subset of the graph's nodes. Can only
2007
+ be used together with the ``from_`` parameter. When provided with ``from_``,
2008
+ constrains the domain of the common neighbor computation: only common
2009
+ neighbors of node pairs where the first node is in ``from_`` and the
2010
+ second node is in ``to`` are computed and returned.
2011
+ Default is ``None``.
2012
+ between : Relationship, optional
2013
+ A binary relationship containing pairs of nodes. When provided,
2014
+ constrains the domain of the common neighbor computation: only common
2015
+ neighbors for the specific node pairs in this relationship are computed
2016
+ and returned. Mutually exclusive with other parameters.
2017
+ Default is ``None``.
2018
+
1839
2019
  Returns
1840
2020
  -------
1841
2021
  Relationship
1842
2022
  A ternary relationship where each tuple represents a pair of nodes
1843
2023
  and one of their common neighbors.
1844
2024
 
2025
+ Raises
2026
+ ------
2027
+ ValueError
2028
+ If ``full`` is provided with any other parameter.
2029
+ If ``between`` is provided with any other parameter.
2030
+ If ``from_`` is provided with any parameter other than ``to``.
2031
+ If none of ``full``, ``from_``, or ``between`` is provided.
2032
+ If ``full`` is not ``True`` or ``None``.
2033
+ AssertionError
2034
+ If ``from_``, ``to``, or ``between`` is not a ``Relationship``.
2035
+ If ``from_``, ``to``, or ``between`` is not attached to the same model as the graph.
2036
+ If ``from_``, ``to``, or ``between`` does not contain the graph's ``Node`` concept.
2037
+ If ``from_`` or ``to`` is not a unary relationship.
2038
+ If ``between`` is not a binary relationship.
2039
+
1845
2040
  Relationship Schema
1846
2041
  -------------------
1847
2042
  ``common_neighbor(node_u, node_v, common_neighbor_node)``
@@ -1858,6 +2053,37 @@ class Graph():
1858
2053
  | Directed | Yes | |
1859
2054
  | Weighted | Yes | Weights are ignored. |
1860
2055
 
2056
+ Notes
2057
+ -----
2058
+ The ``common_neighbor(full=True)`` method computes and caches the full common
2059
+ neighbor relationship for all pairs of nodes, providing efficient reuse across
2060
+ multiple calls. This can be expensive as the result can contain O(|E|²) or
2061
+ O(|V|³) tuples depending on graph density.
2062
+
2063
+ Calling ``common_neighbor()`` without arguments raises a ``ValueError``,
2064
+ to ensure awareness and explicit acknowledgement (``full=True``) of this cost.
2065
+
2066
+ In contrast, ``common_neighbor(from_=subset)`` constrains the computation to
2067
+ tuples with the first position in the passed-in ``subset``. The result is
2068
+ not cached; it is specific to the call site. When a significant fraction of
2069
+ the common neighbor relation is needed across a program, ``common_neighbor(full=True)``
2070
+ is typically more efficient. Use ``common_neighbor(from_=subset)`` only
2071
+ when small subsets of the common neighbor relationship are needed
2072
+ collectively across the program.
2073
+
2074
+ The ``to`` parameter can be used together with ``from_`` to further
2075
+ constrain the computation: ``common_neighbor(from_=subset_a, to=subset_b)``
2076
+ computes common neighbors only for node pairs where the first node is in
2077
+ ``subset_a`` and the second node is in ``subset_b``. (Since ``common_neighbor``
2078
+ is symmetric in its first two positions, using ``to`` without ``from_`` would
2079
+ be functionally redundant, and is not allowed.)
2080
+
2081
+ The ``between`` parameter provides another way to constrain the computation:
2082
+ Unlike ``from_`` and ``to``, which allow you to independently constrain the first
2083
+ and second positions in ``common_neighbor`` tuples to sets of nodes, ``between``
2084
+ allows you to constrain the first and second positions, jointly, to specific pairs
2085
+ of nodes.
2086
+
1861
2087
  Examples
1862
2088
  --------
1863
2089
  >>> from relationalai.semantics import Model, define, select
@@ -1881,7 +2107,7 @@ class Graph():
1881
2107
  >>>
1882
2108
  >>> # 3. Select the IDs from the common_neighbor relationship and inspect
1883
2109
  >>> u, v, w = Node.ref("u"), Node.ref("v"), Node.ref("w")
1884
- >>> common_neighbor = graph.common_neighbor()
2110
+ >>> common_neighbor = graph.common_neighbor(full=True)
1885
2111
  >>> select(
1886
2112
  ... u.id, v.id, w.id
1887
2113
  ... ).where(
@@ -1913,27 +2139,230 @@ class Graph():
1913
2139
  21 4 4 2
1914
2140
  22 4 4 3
1915
2141
 
2142
+ >>> # 4. Use 'from_' parameter to constrain the set of nodes to compute common neighbors for
2143
+ >>> # Define a subset containing only node 1
2144
+ >>> subset = model.Relationship(f"{{node:{Node}}} is in subset")
2145
+ >>> node = Node.ref()
2146
+ >>> where(node.id == 1).define(subset(node))
2147
+ >>>
2148
+ >>> # Get common neighbors only for pairs where first node is in subset
2149
+ >>> constrained_common_neighbor = graph.common_neighbor(from_=subset)
2150
+ >>> select(u.id, v.id, w.id).where(constrained_common_neighbor(u, v, w)).inspect()
2151
+ ▰▰▰▰ Setup complete
2152
+ id id2 id3
2153
+ 0 1 1 2
2154
+ 1 1 3 2
2155
+ 2 1 4 2
2156
+
2157
+ >>> # 5. Use both 'from_' and 'to' parameters to constrain the first two positions
2158
+ >>> subset_a = model.Relationship(f"{{node:{Node}}} is in subset_a")
2159
+ >>> subset_b = model.Relationship(f"{{node:{Node}}} is in subset_b")
2160
+ >>> where(node.id == 1).define(subset_a(node))
2161
+ >>> where(node.id == 3).define(subset_b(node))
2162
+ >>>
2163
+ >>> # Get common neighbors only where the first node is in subset_a and the second node is in subset_b
2164
+ >>> constrained_common_neighbor = graph.common_neighbor(from_=subset_a, to=subset_b)
2165
+ >>> select(u.id, v.id, w.id).where(constrained_common_neighbor(u, v, w)).inspect()
2166
+ ▰▰▰▰ Setup complete
2167
+ id id2 id3
2168
+ 0 1 3 2
2169
+
2170
+ >>> # 6. Use 'between' parameter to constrain to specific pairs of nodes
2171
+ >>> pairs = model.Relationship(f"{{node_a:{Node}}} and {{node_b:{Node}}} are a pair")
2172
+ >>> node_a, node_b = Node.ref(), Node.ref()
2173
+ >>> where(node_a.id == 1, node_b.id == 3).define(pairs(node_a, node_b))
2174
+ >>> where(node_a.id == 2, node_b.id == 4).define(pairs(node_a, node_b))
2175
+ >>>
2176
+ >>> # Get common neighbors only for the specific pairs (1, 3) and (2, 4)
2177
+ >>> constrained_common_neighbor = graph.common_neighbor(between=pairs)
2178
+ >>> select(u.id, v.id, w.id).where(constrained_common_neighbor(u, v, w)).inspect()
2179
+ ▰▰▰▰ Setup complete
2180
+ id id2 id3
2181
+ 0 1 3 2
2182
+ 1 2 4 3
2183
+
1916
2184
  """
1917
- warnings.warn(
1918
- (
1919
- "`common_neighbor` presently always computes common neighbors "
1920
- "for all pairs of nodes in the graph. To provide better control "
1921
- "over the computed subset, `common_neighbor`'s interface "
1922
- "will soon need to change."
1923
- ),
1924
- FutureWarning,
1925
- stacklevel=2
2185
+ # Validate domain constraint parameters.
2186
+ self._validate_domain_constraint_parameters(
2187
+ 'common_neighbor', full, from_, to, between
1926
2188
  )
2189
+
2190
+ # At this point, exactly one of `full`, `from_`, or `between`
2191
+ # has been provided, and if `to` is provided, `from_` is also provided.
2192
+
2193
+ # Handle `between`.
2194
+ if between is not None:
2195
+ self._validate_pair_subset_parameter(between)
2196
+ return self._common_neighbor_between(between)
2197
+
2198
+ # Handle `from_` (and potentially `to`).
2199
+ if from_ is not None:
2200
+ self._validate_node_subset_parameter('from_', from_)
2201
+ if to is not None:
2202
+ self._validate_node_subset_parameter('to', to)
2203
+ return self._common_neighbor_from_to(from_, to)
2204
+ return self._common_neighbor_from(from_)
2205
+
2206
+ # Handle `full`.
1927
2207
  return self._common_neighbor
1928
2208
 
1929
2209
  @cached_property
1930
2210
  def _common_neighbor(self):
1931
- """Lazily define and cache the self._common_neighbor relationship."""
1932
- _common_neighbor_rel = self._model.Relationship(f"{{node_a:{self._NodeConceptStr}}} and {{node_b:{self._NodeConceptStr}}} have common neighbor {{node_c:{self._NodeConceptStr}}}")
2211
+ """Lazily define and cache the full common_neighbor relationship."""
2212
+ _common_neighbor_rel = self._create_common_neighbor_relationship()
1933
2213
  _common_neighbor_rel.annotate(annotations.track("graphs", "common_neighbor"))
2214
+ return _common_neighbor_rel
2215
+
2216
+ def _common_neighbor_from(self, node_subset_from: Relationship):
2217
+ """
2218
+ Create a common_neighbor relationship, with the first position in each
2219
+ tuple constrained to be in the given subset of nodes. Note this relationship
2220
+ is not cached; it is specific to the callsite.
2221
+ """
2222
+ _common_neighbor_rel = self._create_common_neighbor_relationship(
2223
+ node_subset_from=node_subset_from
2224
+ )
2225
+ _common_neighbor_rel.annotate(annotations.track("graphs", "common_neighbor_from"))
2226
+ return _common_neighbor_rel
2227
+
2228
+ def _common_neighbor_from_to(self, node_subset_from: Relationship, node_subset_to: Relationship):
2229
+ """
2230
+ Create a common_neighbor relationship, with the first position in each
2231
+ tuple constrained to be in `node_subset_from`, and the second position in
2232
+ each tuple constrained to be in `node_subset_to`. Note this relationship
2233
+ is not cached; it is specific to the callsite.
2234
+ """
2235
+ _common_neighbor_rel = self._create_common_neighbor_relationship(
2236
+ node_subset_from=node_subset_from,
2237
+ node_subset_to=node_subset_to
2238
+ )
2239
+ _common_neighbor_rel.annotate(annotations.track("graphs", "common_neighbor_from_to"))
2240
+ return _common_neighbor_rel
2241
+
2242
+ def _common_neighbor_between(self, pair_subset: Relationship):
2243
+ """
2244
+ Create a common_neighbor relationship, with the first and second position
2245
+ in each tuple jointly constrained to be in the given set of pairs
2246
+ of nodes. Note this relationship is not cached;
2247
+ it is specific to the callsite.
2248
+ """
2249
+ _common_neighbor_rel = self._create_common_neighbor_relationship(
2250
+ pair_subset_between=pair_subset
2251
+ )
2252
+ _common_neighbor_rel.annotate(annotations.track("graphs", "common_neighbor_between"))
2253
+ return _common_neighbor_rel
2254
+
2255
+ def _create_common_neighbor_relationship(
2256
+ self,
2257
+ *,
2258
+ node_subset_from: Optional[Relationship] = None,
2259
+ node_subset_to: Optional[Relationship] = None,
2260
+ pair_subset_between: Optional[Relationship] = None,
2261
+ ):
2262
+ """
2263
+ Create common_neighbor relationship, optionally constrained by the provided
2264
+ node subsets or pair subset.
2265
+ """
2266
+ _common_neighbor_rel = self._model.Relationship(
2267
+ f"{{node_a:{self._NodeConceptStr}}} and {{node_b:{self._NodeConceptStr}}} "
2268
+ f"have common neighbor {{neighbor_node:{self._NodeConceptStr}}}"
2269
+ )
2270
+ node_a, node_b, neighbor_node = self.Node.ref(), self.Node.ref(), self.Node.ref()
2271
+
2272
+ # Handle the `between` case.
2273
+ if pair_subset_between is not None:
2274
+ # Extract all nodes that appear in any position of the pairs relationship
2275
+ # into a unary relation that we can use to constrain the neighbor computation.
2276
+ nodes_in_pairs = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} is in pairs subset")
2277
+ node_x, node_y = self.Node.ref(), self.Node.ref()
2278
+ where(
2279
+ pair_subset_between(node_x, node_y)
2280
+ ).define(
2281
+ nodes_in_pairs(node_x),
2282
+ nodes_in_pairs(node_y)
2283
+ )
2284
+
2285
+ # Create a neighbor relation constrained to the nodes that appear in the pairs.
2286
+ neighbor_rel = self._neighbor_of(nodes_in_pairs)
2287
+ neighbor_a_rel = neighbor_rel
2288
+ neighbor_b_rel = neighbor_rel
2289
+
2290
+ # The constraint fragment ensures we only compute common neighbors for the
2291
+ # specific pairs provided, not for all combinations of nodes in those pairs.
2292
+ node_constraint = [pair_subset_between(node_a, node_b)]
2293
+
2294
+ # Handle the `from_` case.
2295
+ elif node_subset_from is not None and node_subset_to is None:
2296
+ # Note that in this case we must compute all of `_neighbor` anyway,
2297
+ # as the second position in each tuple is unconstrained. Given that,
2298
+ # computing `_neighbor_of` for `node_subset_from` to constrain the
2299
+ # first position that way would be less efficient than using
2300
+ # `_neighbor` and joining the relevant variable with `node_subset_from`.
2301
+ neighbor_a_rel = self._neighbor
2302
+ neighbor_b_rel = self._neighbor
2303
+ node_constraint = [node_subset_from(node_a)]
2304
+ # TODO: Nice observation from @rygao: We can instead implement this
2305
+ # as a depth-2 traversal starting from `node_subset_from`. Candidate code:
2306
+
2307
+ # neighbor_a_rel = self._neighbor_of(node_subset_from)
2308
+ #
2309
+ # domain_w = Relationship(f"{{node:{self._NodeConceptStr}}} is the domain of `w` in `common_neighbor(u, v, w)`")
2310
+ # node_x, node_y = graph.Node.ref(), graph.Node.ref()
2311
+ # where(neighbor_a_rel(node_x, node_y)).define(domain_w(node_y))
2312
+ # neighbor_b_rel = self._neighbor_of(domain_w)
2313
+ #
2314
+ # node_constraint = []
2315
+ #
2316
+ # # need to reverse the args of `neighbor_b_rel()`, due to its domain constraint
2317
+ # # relies on the symmetry of `neighbor`
2318
+ # where(
2319
+ # *node_constraint,
2320
+ # neighbor_a_rel(node_a, neighbor_node),
2321
+ # neighbor_b_rel(neighbor_node, node_b)
2322
+ # ).define(_common_neighbor_rel(node_a, node_b, neighbor_node))
2323
+
2324
+ # Handle the `from_`/`to` case.
2325
+ elif node_subset_from is not None and node_subset_to is not None:
2326
+ # There are two cases:
2327
+ #
2328
+ # NOTE: For both of the following branches, spiritually we are applying
2329
+ # `node_constraint = [node_subset_from(node_a), node_subset_to(node_b)]`,
2330
+ # but these are already enforced by the use of the constrained
2331
+ # `_neighbor_of` relationships, so we don't need to include them
2332
+ # again in `node_constraint`.
2333
+ if node_subset_from is node_subset_to:
2334
+ # If `node_subset_from` and `node_subset_to` are object-identical,
2335
+ # we can compute `_neighbor_of` once, use it for both positions,
2336
+ # and apply no further constraint.
2337
+ neighbor_rel = self._neighbor_of(node_subset_from)
2338
+ neighbor_a_rel = neighbor_rel
2339
+ neighbor_b_rel = neighbor_rel
2340
+ node_constraint = []
2341
+ else:
2342
+ # Otherwise, we have two options: 1) compute `_neighbor_of` twice,
2343
+ # once for each node subset; or 2) compute `_neighbor` once, over
2344
+ # the union of both subsets, and apply constraints to each position.
2345
+ # Which of these is more efficient depends on the detailed nature
2346
+ # of the subsets, which we don't have knowledge of here. Here
2347
+ # we choose the simpler/cleaner of the two options (1) as such:
2348
+ neighbor_a_rel = self._neighbor_of(node_subset_from)
2349
+ neighbor_b_rel = self._neighbor_of(node_subset_to)
2350
+ node_constraint = []
2351
+
2352
+ # Handle the `full` case.
2353
+ else:
2354
+ neighbor_a_rel = self._neighbor
2355
+ neighbor_b_rel = self._neighbor
2356
+ node_constraint = []
2357
+
2358
+ # Define the common neighbor relationship using the neighbor relations and
2359
+ # constraints determined above. This logic is shared across all constraint types.
2360
+ where(
2361
+ *node_constraint,
2362
+ neighbor_a_rel(node_a, neighbor_node),
2363
+ neighbor_b_rel(node_b, neighbor_node)
2364
+ ).define(_common_neighbor_rel(node_a, node_b, neighbor_node))
1934
2365
 
1935
- node_a, node_b, node_c = self.Node.ref(), self.Node.ref(), self.Node.ref()
1936
- where(self._neighbor(node_a, node_c), self._neighbor(node_b, node_c)).define(_common_neighbor_rel(node_a, node_b, node_c))
1937
2366
  return _common_neighbor_rel
1938
2367
 
1939
2368
 
@@ -2084,37 +2513,37 @@ class Graph():
2084
2513
  return self._degree
2085
2514
  else:
2086
2515
  # Validate the 'of' parameter
2087
- self._validate_node_subset_parameter(of)
2516
+ self._validate_node_subset_parameter('of', of)
2088
2517
  return self._degree_of(of)
2089
2518
 
2090
2519
  @cached_property
2091
2520
  def _degree(self):
2092
2521
  """Lazily define and cache the self._degree relationship."""
2093
- _degree_rel = self._create_degree_relationship(nodes_subset=None)
2522
+ _degree_rel = self._create_degree_relationship(node_subset=None)
2094
2523
  _degree_rel.annotate(annotations.track("graphs", "degree"))
2095
2524
  return _degree_rel
2096
2525
 
2097
- def _degree_of(self, nodes_subset: Relationship):
2526
+ def _degree_of(self, node_subset: Relationship):
2098
2527
  """
2099
2528
  Create a degree relationship constrained to the subset of nodes
2100
- in `nodes_subset`. Note this relationship is not cached; it is
2529
+ in `node_subset`. Note this relationship is not cached; it is
2101
2530
  specific to the callsite.
2102
2531
  """
2103
- _degree_rel = self._create_degree_relationship(nodes_subset=nodes_subset)
2532
+ _degree_rel = self._create_degree_relationship(node_subset=node_subset)
2104
2533
  _degree_rel.annotate(annotations.track("graphs", "degree_of"))
2105
2534
  return _degree_rel
2106
2535
 
2107
- def _create_degree_relationship(self, *, nodes_subset: Optional[Relationship]):
2536
+ def _create_degree_relationship(self, *, node_subset: Optional[Relationship]):
2108
2537
  _degree_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has degree {{count:Integer}}")
2109
2538
 
2110
2539
  if self.directed:
2111
2540
  # For directed graphs, degree is the sum of indegree and outdegree.
2112
- if nodes_subset is None:
2541
+ if node_subset is None:
2113
2542
  indegree_rel = self._indegree
2114
2543
  outdegree_rel = self._outdegree
2115
2544
  else:
2116
- indegree_rel = self._indegree_of(nodes_subset)
2117
- outdegree_rel = self._outdegree_of(nodes_subset)
2545
+ indegree_rel = self._indegree_of(node_subset)
2546
+ outdegree_rel = self._outdegree_of(node_subset)
2118
2547
 
2119
2548
  incount, outcount = Integer.ref(), Integer.ref()
2120
2549
  where(
@@ -2123,12 +2552,12 @@ class Graph():
2123
2552
  ).define(_degree_rel(self.Node, incount + outcount))
2124
2553
  else:
2125
2554
  # For undirected graphs, degree is the count of neighbors.
2126
- if nodes_subset is None:
2555
+ if node_subset is None:
2127
2556
  node_set = self.Node
2128
2557
  count_neighbor_rel = self._count_neighbor
2129
2558
  else:
2130
- node_set = nodes_subset
2131
- count_neighbor_rel = self._count_neighbor_of(nodes_subset)
2559
+ node_set = node_subset
2560
+ count_neighbor_rel = self._count_neighbor_of(node_subset)
2132
2561
 
2133
2562
  where(
2134
2563
  node_set(self.Node), # Necessary given the match on the following line.
@@ -2279,38 +2708,38 @@ class Graph():
2279
2708
  return self._indegree
2280
2709
  else:
2281
2710
  # Validate the 'of' parameter
2282
- self._validate_node_subset_parameter(of)
2711
+ self._validate_node_subset_parameter('of', of)
2283
2712
  return self._indegree_of(of)
2284
2713
 
2285
2714
  @cached_property
2286
2715
  def _indegree(self):
2287
2716
  """Lazily define and cache the self._indegree relationship."""
2288
- _indegree_rel = self._create_indegree_relationship(nodes_subset=None)
2717
+ _indegree_rel = self._create_indegree_relationship(node_subset=None)
2289
2718
  _indegree_rel.annotate(annotations.track("graphs", "indegree"))
2290
2719
  return _indegree_rel
2291
2720
 
2292
- def _indegree_of(self, nodes_subset: Relationship):
2721
+ def _indegree_of(self, node_subset: Relationship):
2293
2722
  """
2294
2723
  Create an indegree relationship constrained to the subset of nodes
2295
- in `nodes_subset`. Note this relationship is not cached; it is
2724
+ in `node_subset`. Note this relationship is not cached; it is
2296
2725
  specific to the callsite.
2297
2726
  """
2298
- _indegree_rel = self._create_indegree_relationship(nodes_subset=nodes_subset)
2727
+ _indegree_rel = self._create_indegree_relationship(node_subset=node_subset)
2299
2728
  _indegree_rel.annotate(annotations.track("graphs", "indegree_of"))
2300
2729
  return _indegree_rel
2301
2730
 
2302
- def _create_indegree_relationship(self, *, nodes_subset: Optional[Relationship]):
2731
+ def _create_indegree_relationship(self, *, node_subset: Optional[Relationship]):
2303
2732
  _indegree_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has indegree {{count:Integer}}")
2304
2733
 
2305
2734
  # Choose the appropriate count_inneighbor relationship and node set
2306
- if nodes_subset is None:
2735
+ if node_subset is None:
2307
2736
  # No constraint - use cached count_inneighbor relationship and all nodes
2308
2737
  count_inneighbor_rel = self._count_inneighbor
2309
2738
  node_set = self.Node
2310
2739
  else:
2311
2740
  # Constrained to nodes in the subset - use constrained count_inneighbor relationship
2312
- count_inneighbor_rel = self._count_inneighbor_of(nodes_subset)
2313
- node_set = nodes_subset
2741
+ count_inneighbor_rel = self._count_inneighbor_of(node_subset)
2742
+ node_set = node_subset
2314
2743
 
2315
2744
  # Apply the same indegree logic for both cases
2316
2745
  where(
@@ -2463,38 +2892,38 @@ class Graph():
2463
2892
  return self._outdegree
2464
2893
  else:
2465
2894
  # Validate the 'of' parameter
2466
- self._validate_node_subset_parameter(of)
2895
+ self._validate_node_subset_parameter('of', of)
2467
2896
  return self._outdegree_of(of)
2468
2897
 
2469
2898
  @cached_property
2470
2899
  def _outdegree(self):
2471
2900
  """Lazily define and cache the self._outdegree relationship."""
2472
- _outdegree_rel = self._create_outdegree_relationship(nodes_subset=None)
2901
+ _outdegree_rel = self._create_outdegree_relationship(node_subset=None)
2473
2902
  _outdegree_rel.annotate(annotations.track("graphs", "outdegree"))
2474
2903
  return _outdegree_rel
2475
2904
 
2476
- def _outdegree_of(self, nodes_subset: Relationship):
2905
+ def _outdegree_of(self, node_subset: Relationship):
2477
2906
  """
2478
2907
  Create an outdegree relationship constrained to the subset of nodes
2479
- in `nodes_subset`. Note this relationship is not cached; it is
2908
+ in `node_subset`. Note this relationship is not cached; it is
2480
2909
  specific to the callsite.
2481
2910
  """
2482
- _outdegree_rel = self._create_outdegree_relationship(nodes_subset=nodes_subset)
2911
+ _outdegree_rel = self._create_outdegree_relationship(node_subset=node_subset)
2483
2912
  _outdegree_rel.annotate(annotations.track("graphs", "outdegree_of"))
2484
2913
  return _outdegree_rel
2485
2914
 
2486
- def _create_outdegree_relationship(self, *, nodes_subset: Optional[Relationship]):
2915
+ def _create_outdegree_relationship(self, *, node_subset: Optional[Relationship]):
2487
2916
  _outdegree_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has outdegree {{count:Integer}}")
2488
2917
 
2489
2918
  # Choose the appropriate count_outneighbor relationship and node set
2490
- if nodes_subset is None:
2919
+ if node_subset is None:
2491
2920
  # No constraint - use cached count_outneighbor relationship and all nodes
2492
2921
  count_outneighbor_rel = self._count_outneighbor
2493
2922
  node_set = self.Node
2494
2923
  else:
2495
2924
  # Constrained to nodes in the subset - use constrained count_outneighbor relationship
2496
- count_outneighbor_rel = self._count_outneighbor_of(nodes_subset)
2497
- node_set = nodes_subset
2925
+ count_outneighbor_rel = self._count_outneighbor_of(node_subset)
2926
+ node_set = node_subset
2498
2927
 
2499
2928
  # Apply the same outdegree logic for both cases
2500
2929
  where(
@@ -2612,37 +3041,37 @@ class Graph():
2612
3041
  return self._weighted_degree
2613
3042
  else:
2614
3043
  # Validate the 'of' parameter
2615
- self._validate_node_subset_parameter(of)
3044
+ self._validate_node_subset_parameter('of', of)
2616
3045
  return self._weighted_degree_of(of)
2617
3046
 
2618
3047
  @cached_property
2619
3048
  def _weighted_degree(self):
2620
3049
  """Lazily define and cache the self._weighted_degree relationship."""
2621
- _weighted_degree_rel = self._create_weighted_degree_relationship(nodes_subset=None)
3050
+ _weighted_degree_rel = self._create_weighted_degree_relationship(node_subset=None)
2622
3051
  _weighted_degree_rel.annotate(annotations.track("graphs", "weighted_degree"))
2623
3052
  return _weighted_degree_rel
2624
3053
 
2625
- def _weighted_degree_of(self, nodes_subset: Relationship):
3054
+ def _weighted_degree_of(self, node_subset: Relationship):
2626
3055
  """
2627
3056
  Create a weighted degree relationship constrained to the subset of nodes
2628
- in `nodes_subset`. Note this relationship is not cached; it is
3057
+ in `node_subset`. Note this relationship is not cached; it is
2629
3058
  specific to the callsite.
2630
3059
  """
2631
- _weighted_degree_rel = self._create_weighted_degree_relationship(nodes_subset=nodes_subset)
3060
+ _weighted_degree_rel = self._create_weighted_degree_relationship(node_subset=node_subset)
2632
3061
  _weighted_degree_rel.annotate(annotations.track("graphs", "weighted_degree_of"))
2633
3062
  return _weighted_degree_rel
2634
3063
 
2635
- def _create_weighted_degree_relationship(self, *, nodes_subset: Optional[Relationship]):
3064
+ def _create_weighted_degree_relationship(self, *, node_subset: Optional[Relationship]):
2636
3065
  _weighted_degree_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has weighted degree {{weight:Float}}")
2637
3066
 
2638
3067
  if self.directed:
2639
3068
  # For directed graphs, weighted degree is the sum of weighted indegree and weighted outdegree.
2640
- if nodes_subset is None:
3069
+ if node_subset is None:
2641
3070
  weighted_indegree_rel = self._weighted_indegree
2642
3071
  weighted_outdegree_rel = self._weighted_outdegree
2643
3072
  else:
2644
- weighted_indegree_rel = self._weighted_indegree_of(nodes_subset)
2645
- weighted_outdegree_rel = self._weighted_outdegree_of(nodes_subset)
3073
+ weighted_indegree_rel = self._weighted_indegree_of(node_subset)
3074
+ weighted_outdegree_rel = self._weighted_outdegree_of(node_subset)
2646
3075
 
2647
3076
  inweight, outweight = Float.ref(), Float.ref()
2648
3077
  where(
@@ -2651,12 +3080,12 @@ class Graph():
2651
3080
  ).define(_weighted_degree_rel(self.Node, inweight + outweight))
2652
3081
  elif not self.directed:
2653
3082
  # Choose the appropriate node set
2654
- if nodes_subset is None:
3083
+ if node_subset is None:
2655
3084
  # No constraint - use all nodes
2656
3085
  node_set = self.Node
2657
3086
  else:
2658
3087
  # Constrained to nodes in the subset
2659
- node_set = nodes_subset
3088
+ node_set = node_subset
2660
3089
 
2661
3090
  dst, weight = self.Node.ref(), Float.ref()
2662
3091
  where(
@@ -2772,36 +3201,36 @@ class Graph():
2772
3201
  return self._weighted_indegree
2773
3202
  else:
2774
3203
  # Validate the 'of' parameter
2775
- self._validate_node_subset_parameter(of)
3204
+ self._validate_node_subset_parameter('of', of)
2776
3205
  return self._weighted_indegree_of(of)
2777
3206
 
2778
3207
  @cached_property
2779
3208
  def _weighted_indegree(self):
2780
3209
  """Lazily define and cache the self._weighted_indegree relationship."""
2781
- _weighted_indegree_rel = self._create_weighted_indegree_relationship(nodes_subset=None)
3210
+ _weighted_indegree_rel = self._create_weighted_indegree_relationship(node_subset=None)
2782
3211
  _weighted_indegree_rel.annotate(annotations.track("graphs", "weighted_indegree"))
2783
3212
  return _weighted_indegree_rel
2784
3213
 
2785
- def _weighted_indegree_of(self, nodes_subset: Relationship):
3214
+ def _weighted_indegree_of(self, node_subset: Relationship):
2786
3215
  """
2787
3216
  Create a weighted indegree relationship constrained to the subset of nodes
2788
- in `nodes_subset`. Note this relationship is not cached; it is
3217
+ in `node_subset`. Note this relationship is not cached; it is
2789
3218
  specific to the callsite.
2790
3219
  """
2791
- _weighted_indegree_rel = self._create_weighted_indegree_relationship(nodes_subset=nodes_subset)
3220
+ _weighted_indegree_rel = self._create_weighted_indegree_relationship(node_subset=node_subset)
2792
3221
  _weighted_indegree_rel.annotate(annotations.track("graphs", "weighted_indegree_of"))
2793
3222
  return _weighted_indegree_rel
2794
3223
 
2795
- def _create_weighted_indegree_relationship(self, *, nodes_subset: Optional[Relationship]):
3224
+ def _create_weighted_indegree_relationship(self, *, node_subset: Optional[Relationship]):
2796
3225
  _weighted_indegree_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has weighted indegree {{weight:Float}}")
2797
3226
 
2798
3227
  # Choose the appropriate node set
2799
- if nodes_subset is None:
3228
+ if node_subset is None:
2800
3229
  # No constraint - use all nodes
2801
3230
  node_set = self.Node
2802
3231
  else:
2803
3232
  # Constrained to nodes in the subset
2804
- node_set = nodes_subset
3233
+ node_set = node_subset
2805
3234
  # TODO: In a future cleanup pass, replace `node_set` with a `node_constraint`
2806
3235
  # that replaces the `node_set(self.Node)` in the where clause below,
2807
3236
  # and generates only `self.Node` (rather than `self.Node(self.Node)`)
@@ -2924,36 +3353,36 @@ class Graph():
2924
3353
  return self._weighted_outdegree
2925
3354
  else:
2926
3355
  # Validate the 'of' parameter
2927
- self._validate_node_subset_parameter(of)
3356
+ self._validate_node_subset_parameter('of', of)
2928
3357
  return self._weighted_outdegree_of(of)
2929
3358
 
2930
3359
  @cached_property
2931
3360
  def _weighted_outdegree(self):
2932
3361
  """Lazily define and cache the self._weighted_outdegree relationship."""
2933
- _weighted_outdegree_rel = self._create_weighted_outdegree_relationship(nodes_subset=None)
3362
+ _weighted_outdegree_rel = self._create_weighted_outdegree_relationship(node_subset=None)
2934
3363
  _weighted_outdegree_rel.annotate(annotations.track("graphs", "weighted_outdegree"))
2935
3364
  return _weighted_outdegree_rel
2936
3365
 
2937
- def _weighted_outdegree_of(self, nodes_subset: Relationship):
3366
+ def _weighted_outdegree_of(self, node_subset: Relationship):
2938
3367
  """
2939
3368
  Create a weighted outdegree relationship constrained to the subset of nodes
2940
- in `nodes_subset`. Note this relationship is not cached; it is
3369
+ in `node_subset`. Note this relationship is not cached; it is
2941
3370
  specific to the callsite.
2942
3371
  """
2943
- _weighted_outdegree_rel = self._create_weighted_outdegree_relationship(nodes_subset=nodes_subset)
3372
+ _weighted_outdegree_rel = self._create_weighted_outdegree_relationship(node_subset=node_subset)
2944
3373
  _weighted_outdegree_rel.annotate(annotations.track("graphs", "weighted_outdegree_of"))
2945
3374
  return _weighted_outdegree_rel
2946
3375
 
2947
- def _create_weighted_outdegree_relationship(self, *, nodes_subset: Optional[Relationship]):
3376
+ def _create_weighted_outdegree_relationship(self, *, node_subset: Optional[Relationship]):
2948
3377
  _weighted_outdegree_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has weighted outdegree {{weight:Float}}")
2949
3378
 
2950
3379
  # Choose the appropriate node set
2951
- if nodes_subset is None:
3380
+ if node_subset is None:
2952
3381
  # No constraint - use all nodes
2953
3382
  node_set = self.Node
2954
3383
  else:
2955
3384
  # Constrained to nodes in the subset
2956
- node_set = nodes_subset
3385
+ node_set = node_subset
2957
3386
 
2958
3387
  # Apply the weighted outdegree logic for both cases
2959
3388
  dst, outweight = self.Node.ref(), Float.ref()
@@ -3103,36 +3532,36 @@ class Graph():
3103
3532
  return self._degree_centrality
3104
3533
  else:
3105
3534
  # Validate the 'of' parameter
3106
- self._validate_node_subset_parameter(of)
3535
+ self._validate_node_subset_parameter('of', of)
3107
3536
  return self._degree_centrality_of(of)
3108
3537
 
3109
3538
  @cached_property
3110
3539
  def _degree_centrality(self):
3111
3540
  """Lazily define and cache the self._degree_centrality relationship."""
3112
- _degree_centrality_rel = self._create_degree_centrality_relationship(nodes_subset=None)
3541
+ _degree_centrality_rel = self._create_degree_centrality_relationship(node_subset=None)
3113
3542
  _degree_centrality_rel.annotate(annotations.track("graphs", "degree_centrality"))
3114
3543
  return _degree_centrality_rel
3115
3544
 
3116
- def _degree_centrality_of(self, nodes_subset: Relationship):
3545
+ def _degree_centrality_of(self, node_subset: Relationship):
3117
3546
  """
3118
3547
  Create a degree centrality relationship constrained to the subset of nodes
3119
- in `nodes_subset`. Note this relationship is not cached; it is
3548
+ in `node_subset`. Note this relationship is not cached; it is
3120
3549
  specific to the callsite.
3121
3550
  """
3122
- _degree_centrality_rel = self._create_degree_centrality_relationship(nodes_subset=nodes_subset)
3551
+ _degree_centrality_rel = self._create_degree_centrality_relationship(node_subset=node_subset)
3123
3552
  _degree_centrality_rel.annotate(annotations.track("graphs", "degree_centrality_of"))
3124
3553
  return _degree_centrality_rel
3125
3554
 
3126
- def _create_degree_centrality_relationship(self, *, nodes_subset: Optional[Relationship]):
3555
+ def _create_degree_centrality_relationship(self, *, node_subset: Optional[Relationship]):
3127
3556
  """Create a degree centrality relationship, optionally constrained to a subset of nodes."""
3128
3557
  _degree_centrality_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has {{degree_centrality:Float}}")
3129
3558
 
3130
- if nodes_subset is None:
3559
+ if node_subset is None:
3131
3560
  degree_rel = self._degree
3132
3561
  node_constraint = [] # No constraint on nodes.
3133
3562
  else:
3134
- degree_rel = self._degree_of(nodes_subset)
3135
- node_constraint = [nodes_subset(self.Node)] # Nodes constrained to given subset.
3563
+ degree_rel = self._degree_of(node_subset)
3564
+ node_constraint = [node_subset(self.Node)] # Nodes constrained to given subset.
3136
3565
 
3137
3566
  degree = Integer.ref()
3138
3567
 
@@ -3154,10 +3583,10 @@ class Graph():
3154
3583
  # General case, i.e. with more than one node.
3155
3584
  if self.weighted:
3156
3585
  maybe_weighted_degree = Float.ref()
3157
- if nodes_subset is None:
3586
+ if node_subset is None:
3158
3587
  maybe_weighted_degree_rel = self._weighted_degree
3159
3588
  else:
3160
- maybe_weighted_degree_rel = self._weighted_degree_of(nodes_subset)
3589
+ maybe_weighted_degree_rel = self._weighted_degree_of(node_subset)
3161
3590
  else: # not self.weighted
3162
3591
  maybe_weighted_degree = Integer.ref()
3163
3592
  maybe_weighted_degree_rel = degree_rel
@@ -4015,35 +4444,35 @@ class Graph():
4015
4444
 
4016
4445
  """
4017
4446
  if of is not None:
4018
- self._validate_node_subset_parameter(of)
4447
+ self._validate_node_subset_parameter('of', of)
4019
4448
  return self._triangle_count_of(of)
4020
4449
  return self._triangle_count
4021
4450
 
4022
4451
  @cached_property
4023
4452
  def _triangle_count(self):
4024
4453
  """Lazily define and cache the self._triangle_count relationship."""
4025
- _triangle_count_rel = self._create_triangle_count_relationship(nodes_subset=None)
4454
+ _triangle_count_rel = self._create_triangle_count_relationship(node_subset=None)
4026
4455
  _triangle_count_rel.annotate(annotations.track("graphs", "triangle_count"))
4027
4456
  return _triangle_count_rel
4028
4457
 
4029
- def _triangle_count_of(self, nodes_subset: Relationship):
4458
+ def _triangle_count_of(self, node_subset: Relationship):
4030
4459
  """
4031
4460
  Create a triangle count relationship constrained to the subset of nodes
4032
- in `nodes_subset`. Note this relationship is not cached; it is
4461
+ in `node_subset`. Note this relationship is not cached; it is
4033
4462
  specific to the callsite.
4034
4463
  """
4035
- _triangle_count_rel = self._create_triangle_count_relationship(nodes_subset=nodes_subset)
4464
+ _triangle_count_rel = self._create_triangle_count_relationship(node_subset=node_subset)
4036
4465
  _triangle_count_rel.annotate(annotations.track("graphs", "triangle_count_of"))
4037
4466
  return _triangle_count_rel
4038
4467
 
4039
- def _create_triangle_count_relationship(self, *, nodes_subset: Optional[Relationship]):
4468
+ def _create_triangle_count_relationship(self, *, node_subset: Optional[Relationship]):
4040
4469
  """Create a triangle count relationship, optionally constrained to a subset of nodes."""
4041
4470
  _triangle_count_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} belongs to {{count:Integer}} triangles")
4042
4471
 
4043
- if nodes_subset is None:
4472
+ if node_subset is None:
4044
4473
  node_constraint = self.Node # No constraint on nodes.
4045
4474
  else:
4046
- node_constraint = nodes_subset(self.Node) # Nodes constrained to given subset.
4475
+ node_constraint = node_subset(self.Node) # Nodes constrained to given subset.
4047
4476
 
4048
4477
  where(
4049
4478
  node_constraint,
@@ -4293,41 +4722,41 @@ class Graph():
4293
4722
  )
4294
4723
 
4295
4724
  if of is not None:
4296
- self._validate_node_subset_parameter(of)
4725
+ self._validate_node_subset_parameter('of', of)
4297
4726
  return self._local_clustering_coefficient_of(of)
4298
4727
  return self._local_clustering_coefficient
4299
4728
 
4300
4729
  @cached_property
4301
4730
  def _local_clustering_coefficient(self):
4302
4731
  """Lazily define and cache the self._local_clustering_coefficient relationship."""
4303
- _local_clustering_coefficient_rel = self._create_local_clustering_coefficient_relationship(nodes_subset=None)
4732
+ _local_clustering_coefficient_rel = self._create_local_clustering_coefficient_relationship(node_subset=None)
4304
4733
  _local_clustering_coefficient_rel.annotate(annotations.track("graphs", "local_clustering_coefficient"))
4305
4734
  return _local_clustering_coefficient_rel
4306
4735
 
4307
- def _local_clustering_coefficient_of(self, nodes_subset: Relationship):
4736
+ def _local_clustering_coefficient_of(self, node_subset: Relationship):
4308
4737
  """
4309
4738
  Create a local clustering coefficient relationship constrained to the subset of nodes
4310
- in `nodes_subset`. Note this relationship is not cached; it is
4739
+ in `node_subset`. Note this relationship is not cached; it is
4311
4740
  specific to the callsite.
4312
4741
  """
4313
- _local_clustering_coefficient_rel = self._create_local_clustering_coefficient_relationship(nodes_subset=nodes_subset)
4742
+ _local_clustering_coefficient_rel = self._create_local_clustering_coefficient_relationship(node_subset=node_subset)
4314
4743
  _local_clustering_coefficient_rel.annotate(annotations.track("graphs", "local_clustering_coefficient_of"))
4315
4744
  return _local_clustering_coefficient_rel
4316
4745
 
4317
- def _create_local_clustering_coefficient_relationship(self, *, nodes_subset: Optional[Relationship]):
4746
+ def _create_local_clustering_coefficient_relationship(self, *, node_subset: Optional[Relationship]):
4318
4747
  """Create a local clustering coefficient relationship, optionally constrained to a subset of nodes."""
4319
4748
  _local_clustering_coefficient_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has local clustering coefficient {{coefficient:Float}}")
4320
4749
 
4321
4750
  node = self.Node.ref()
4322
4751
 
4323
- if nodes_subset is None:
4752
+ if node_subset is None:
4324
4753
  degree_no_self_rel = self._degree_no_self
4325
4754
  triangle_count_rel = self._triangle_count
4326
4755
  node_constraint = node # No constraint on nodes.
4327
4756
  else:
4328
- degree_no_self_rel = self._degree_no_self_of(nodes_subset)
4329
- triangle_count_rel = self._triangle_count_of(nodes_subset)
4330
- node_constraint = nodes_subset(node) # Nodes constrained to given subset.
4757
+ degree_no_self_rel = self._degree_no_self_of(node_subset)
4758
+ triangle_count_rel = self._triangle_count_of(node_subset)
4759
+ node_constraint = node_subset(node) # Nodes constrained to given subset.
4331
4760
 
4332
4761
  degree_no_self = Integer.ref()
4333
4762
  triangle_count = Integer.ref()
@@ -4350,17 +4779,17 @@ class Graph():
4350
4779
  Lazily define and cache the self._degree_no_self relationship,
4351
4780
  a non-public helper for local_clustering_coefficient.
4352
4781
  """
4353
- return self._create_degree_no_self_relationship(nodes_subset=None)
4782
+ return self._create_degree_no_self_relationship(node_subset=None)
4354
4783
 
4355
- def _degree_no_self_of(self, nodes_subset: Relationship):
4784
+ def _degree_no_self_of(self, node_subset: Relationship):
4356
4785
  """
4357
4786
  Create a self-loop-exclusive degree relationship constrained to
4358
- the subset of nodes in `nodes_subset`. Note this relationship
4787
+ the subset of nodes in `node_subset`. Note this relationship
4359
4788
  is not cached; it is specific to the callsite.
4360
4789
  """
4361
- return self._create_degree_no_self_relationship(nodes_subset=nodes_subset)
4790
+ return self._create_degree_no_self_relationship(node_subset=node_subset)
4362
4791
 
4363
- def _create_degree_no_self_relationship(self, *, nodes_subset: Optional[Relationship]):
4792
+ def _create_degree_no_self_relationship(self, *, node_subset: Optional[Relationship]):
4364
4793
  """
4365
4794
  Create a self-loop-exclusive degree relationship,
4366
4795
  optionally constrained to a subset of nodes.
@@ -4369,10 +4798,10 @@ class Graph():
4369
4798
 
4370
4799
  node, neighbor = self.Node.ref(), self.Node.ref()
4371
4800
 
4372
- if nodes_subset is None:
4801
+ if node_subset is None:
4373
4802
  node_constraint = node # No constraint on nodes.
4374
4803
  else:
4375
- node_constraint = nodes_subset(node) # Nodes constrained to given subset.
4804
+ node_constraint = node_subset(node) # Nodes constrained to given subset.
4376
4805
 
4377
4806
  where(
4378
4807
  node_constraint,
@@ -5417,19 +5846,72 @@ class Graph():
5417
5846
 
5418
5847
 
5419
5848
  @include_in_docs
5420
- def cosine_similarity(self):
5421
- """Returns a ternary relationship containing the cosine similarity for all pairs of nodes.
5849
+ def cosine_similarity(
5850
+ self,
5851
+ *,
5852
+ full: Optional[bool] = None,
5853
+ from_: Optional[Relationship] = None,
5854
+ to: Optional[Relationship] = None,
5855
+ between: Optional[Relationship] = None,
5856
+ ):
5857
+ """Returns a ternary relationship containing
5858
+ the cosine similarity for pairs of nodes.
5422
5859
 
5423
5860
  The cosine similarity measures the similarity between two nodes based
5424
5861
  on the angle between their neighborhood vectors. The score ranges from
5425
5862
  0.0 to 1.0, inclusive, where 1.0 indicates identical sets of neighbors.
5426
5863
 
5864
+ Parameters
5865
+ ----------
5866
+ full : bool, optional
5867
+ If ``True``, computes the cosine similarity for all pairs
5868
+ of nodes in the graph. This computation can be expensive for large graphs,
5869
+ as the result can scale quadratically in the number of nodes. Mutually exclusive
5870
+ with other parameters.
5871
+ Default is ``None``.
5872
+ from_ : Relationship, optional
5873
+ A unary relationship containing a subset of the graph's nodes. When
5874
+ provided, constrains the domain of the cosine similarity computation: only
5875
+ cosine similarity scores for node pairs where the first node is
5876
+ in this relationship are computed and returned. Mutually exclusive with
5877
+ ``full`` and ``between``.
5878
+ Default is ``None``.
5879
+ to : Relationship, optional
5880
+ A unary relationship containing a subset of the graph's nodes. Can only
5881
+ be used together with the ``from_`` parameter. When provided with ``from_``,
5882
+ constrains the domain of the cosine similarity computation: only
5883
+ cosine similarity scores for node pairs where the first node is
5884
+ in ``from_`` and the second node is in ``to`` are computed and returned.
5885
+ Default is ``None``.
5886
+ between : Relationship, optional
5887
+ A binary relationship containing pairs of nodes. When provided,
5888
+ constrains the domain of the cosine similarity computation: only
5889
+ cosine similarity scores for the specific node pairs in
5890
+ this relationship are computed and returned. Mutually exclusive
5891
+ with other parameters.
5892
+ Default is ``None``.
5893
+
5427
5894
  Returns
5428
5895
  -------
5429
5896
  Relationship
5430
5897
  A ternary relationship where each tuple represents a pair of nodes
5431
5898
  and their cosine similarity.
5432
5899
 
5900
+ Raises
5901
+ ------
5902
+ ValueError
5903
+ If ``full`` is provided with any other parameter.
5904
+ If ``between`` is provided with any other parameter.
5905
+ If ``from_`` is provided with any parameter other than ``to``.
5906
+ If none of ``full``, ``from_``, or ``between`` is provided.
5907
+ If ``full`` is not ``True`` or ``None``.
5908
+ AssertionError
5909
+ If ``from_``, ``to``, or ``between`` is not a ``Relationship``.
5910
+ If ``from_``, ``to``, or ``between`` is not attached to the same model as the graph.
5911
+ If ``from_``, ``to``, or ``between`` does not contain the graph's ``Node`` concept.
5912
+ If ``from_`` or ``to`` is not a unary relationship.
5913
+ If ``between`` is not a binary relationship.
5914
+
5433
5915
  Relationship Schema
5434
5916
  -------------------
5435
5917
  ``cosine_similarity(node_u, node_v, score)``
@@ -5462,6 +5944,36 @@ class Graph():
5462
5944
  vectors contain only non-negative elements. Therefore, the cosine
5463
5945
  similarity score is always between 0.0 and 1.0, inclusive.
5464
5946
 
5947
+ The ``cosine_similarity(full=True)`` method computes and caches
5948
+ the full cosine similarity relationship for all pairs of nodes,
5949
+ providing efficient reuse across multiple calls. This can be expensive
5950
+ as the result can contain O(|V|²) tuples.
5951
+
5952
+ Calling ``cosine_similarity()`` without arguments raises a ``ValueError``,
5953
+ to ensure awareness and explicit acknowledgement (``full=True``) of this cost.
5954
+
5955
+ In contrast, ``cosine_similarity(from_=subset)`` constrains the computation to
5956
+ tuples with the first position in the passed-in ``subset``. The result is
5957
+ not cached; it is specific to the call site. When a significant fraction of
5958
+ the cosine similarity relation is needed across a program,
5959
+ ``cosine_similarity(full=True)`` is typically more efficient. Use
5960
+ ``cosine_similarity(from_=subset)`` only when small subsets of
5961
+ the cosine similarity relationship are needed
5962
+ collectively across the program.
5963
+
5964
+ The ``to`` parameter can be used together with ``from_`` to further
5965
+ constrain the computation: ``cosine_similarity(from_=subset_a, to=subset_b)``
5966
+ computes cosine similarity scores only for node pairs where the first node is in
5967
+ ``subset_a`` and the second node is in ``subset_b``. (Since ``cosine_similarity``
5968
+ is symmetric in its first two positions, using ``to`` without ``from_`` would
5969
+ be functionally redundant, and is not allowed.)
5970
+
5971
+ The ``between`` parameter provides another way to constrain the computation.
5972
+ Unlike ``from_`` and ``to``, which allow you to independently constrain the first
5973
+ and second positions in ``cosine_similarity`` tuples to sets of nodes, ``between``
5974
+ allows you constrain the first and second positions, jointly, to specific pairs
5975
+ of nodes.
5976
+
5465
5977
  Examples
5466
5978
  --------
5467
5979
  **Unweighted Graph Examples**
@@ -5483,7 +5995,7 @@ class Graph():
5483
5995
  ... Edge.new(src=n4, dst=n3),
5484
5996
  ... )
5485
5997
  >>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
5486
- >>> cosine_similarity = graph.cosine_similarity()
5998
+ >>> cosine_similarity = graph.cosine_similarity(full=True)
5487
5999
  >>> select(score).where(cosine_similarity(u, v, score), u.id == 2, v.id == 4).inspect()
5488
6000
  ▰▰▰▰ Setup complete
5489
6001
  score
@@ -5506,7 +6018,7 @@ class Graph():
5506
6018
  ... Edge.new(src=n4, dst=n3),
5507
6019
  ... )
5508
6020
  >>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
5509
- >>> cosine_similarity = graph.cosine_similarity()
6021
+ >>> cosine_similarity = graph.cosine_similarity(full=True)
5510
6022
  >>> select(score).where(cosine_similarity(u, v, score), u.id == 2, v.id == 4).inspect()
5511
6023
  ▰▰▰▰ Setup complete
5512
6024
  score
@@ -5531,7 +6043,7 @@ class Graph():
5531
6043
  ... Edge.new(src=n14, dst=n13, weight=1.0),
5532
6044
  ... )
5533
6045
  >>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
5534
- >>> cosine_similarity = graph.cosine_similarity()
6046
+ >>> cosine_similarity = graph.cosine_similarity(full=True)
5535
6047
  >>> select(score).where(cosine_similarity(u, v, score), u.id == 1, v.id == 2).inspect()
5536
6048
  ▰▰▰▰ Setup complete
5537
6049
  score
@@ -5553,49 +6065,246 @@ class Graph():
5553
6065
  ... Edge.new(src=n2, dst=n4, weight=5.0),
5554
6066
  ... )
5555
6067
  >>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
5556
- >>> cosine_similarity = graph.cosine_similarity()
6068
+ >>> cosine_similarity = graph.cosine_similarity(full=True)
5557
6069
  >>> select(score).where(cosine_similarity(u, v, score), u.id == 1, v.id == 2).inspect()
5558
6070
  ▰▰▰▰ Setup complete
5559
6071
  score
5560
6072
  0 0.996241
5561
6073
 
6074
+ **Domain Constraint Examples**
6075
+
6076
+ >>> # Use 'from_' parameter to constrain the set of nodes for the first position
6077
+ >>> # Using the same undirected unweighted graph from above
6078
+ >>> from relationalai.semantics import where
6079
+ >>> subset = model.Relationship(f"{{node:{Node}}} is in subset")
6080
+ >>> node = Node.ref()
6081
+ >>> where(node.id == 2).define(subset(node))
6082
+ >>>
6083
+ >>> # Get cosine similarity scores only for pairs where first node is in subset
6084
+ >>> constrained_cosine_similarity = graph.cosine_similarity(from_=subset)
6085
+ >>> select(u.id, v.id, score).where(constrained_cosine_similarity(u, v, score)).inspect()
6086
+ ▰▰▰▰ Setup complete
6087
+ id id2 score
6088
+ 0 2 2 1.000000
6089
+ 1 2 3 0.707107
6090
+ 2 2 4 0.408248
6091
+
6092
+ >>> # Use both 'from_' and 'to' parameters to constrain both positions
6093
+ >>> from_subset = model.Relationship(f"{{node:{Node}}} is in from_subset")
6094
+ >>> to_subset = model.Relationship(f"{{node:{Node}}} is in to_subset")
6095
+ >>> where(node.id == 2).define(from_subset(node))
6096
+ >>> where(node.id == 4).define(to_subset(node))
6097
+ >>>
6098
+ >>> # Get cosine similarity scores only where first node is in from_subset and second node is in to_subset
6099
+ >>> constrained_cosine_similarity = graph.cosine_similarity(from_=from_subset, to=to_subset)
6100
+ >>> select(u.id, v.id, score).where(constrained_cosine_similarity(u, v, score)).inspect()
6101
+ ▰▰▰▰ Setup complete
6102
+ id id2 score
6103
+ 0 2 4 0.408248
6104
+
6105
+ >>> # Use 'between' parameter to constrain to specific pairs of nodes
6106
+ >>> pairs = model.Relationship(f"{{node_a:{Node}}} and {{node_b:{Node}}} are a pair")
6107
+ >>> node_a, node_b = Node.ref(), Node.ref()
6108
+ >>> where(node_a.id == 2, node_b.id == 4).define(pairs(node_a, node_b))
6109
+ >>> where(node_a.id == 3, node_b.id == 4).define(pairs(node_a, node_b))
6110
+ >>>
6111
+ >>> # Get cosine similarity scores only for the specific pairs (2, 4) and (3, 4)
6112
+ >>> constrained_cosine_similarity = graph.cosine_similarity(between=pairs)
6113
+ >>> select(u.id, v.id, score).where(constrained_cosine_similarity(u, v, score)).inspect()
6114
+ ▰▰▰▰ Setup complete
6115
+ id id2 score
6116
+ 0 2 4 0.408248
6117
+ 1 3 4 0.707107
6118
+
5562
6119
  """
5563
- warnings.warn(
5564
- (
5565
- "`cosine_similarity` presently always computes the similarity "
5566
- "of all pairs of nodes of the graph. To provide better control over "
5567
- "the computed subset, `cosine_similarity`'s interface will soon "
5568
- "need to change."
5569
- ),
5570
- FutureWarning,
5571
- stacklevel=2
6120
+ # Validate domain constraint parameters.
6121
+ self._validate_domain_constraint_parameters(
6122
+ 'cosine_similarity', full, from_, to, between
5572
6123
  )
5573
6124
 
6125
+ # At this point, exactly one of `full`, `from_`, or `between`
6126
+ # has been provided, and if `to` is provided, `from_` is also provided.
6127
+
6128
+ # Handle `between`.
6129
+ if between is not None:
6130
+ self._validate_pair_subset_parameter(between)
6131
+ return self._cosine_similarity_between(between)
6132
+
6133
+ # Handle `from_` (and potentially `to`).
6134
+ if from_ is not None:
6135
+ self._validate_node_subset_parameter('from_', from_)
6136
+ if to is not None:
6137
+ self._validate_node_subset_parameter('to', to)
6138
+ return self._cosine_similarity_from_to(from_, to)
6139
+ return self._cosine_similarity_from(from_)
6140
+
6141
+ # Handle `full`.
5574
6142
  return self._cosine_similarity
5575
6143
 
5576
6144
  @cached_property
5577
6145
  def _cosine_similarity(self):
5578
- """Lazily define and cache the self._cosine_similarity relationship."""
5579
- _cosine_similarity_rel = self._model.Relationship(f"{{node_u:{self._NodeConceptStr}}} has a cosine similarity to {{node_v:{self._NodeConceptStr}}} of {{score:Float}}")
6146
+ """Lazily define and cache the full cosine_similarity relationship."""
6147
+ _cosine_similarity_rel = self._create_cosine_similarity_relationship()
5580
6148
  _cosine_similarity_rel.annotate(annotations.track("graphs", "cosine_similarity"))
6149
+ return _cosine_similarity_rel
5581
6150
 
6151
+ def _cosine_similarity_from(self, node_subset_from: Relationship):
6152
+ """
6153
+ Create a cosine_similarity relationship, with the first position in each
6154
+ tuple constrained to be in the given subset of nodes. Note this relationship
6155
+ is not cached; it is specific to the callsite.
6156
+ """
6157
+ _cosine_similarity_rel = self._create_cosine_similarity_relationship(
6158
+ node_subset_from=node_subset_from
6159
+ )
6160
+ _cosine_similarity_rel.annotate(annotations.track("graphs", "cosine_similarity_from"))
6161
+ return _cosine_similarity_rel
6162
+
6163
+ def _cosine_similarity_from_to(self, node_subset_from: Relationship, node_subset_to: Relationship):
6164
+ """
6165
+ Create a cosine_similarity relationship, with the first position in each
6166
+ tuple constrained to be in `node_subset_from`, and the second position in
6167
+ each tuple constrained to be in `node_subset_to`. Note this relationship
6168
+ is not cached; it is specific to the callsite.
6169
+ """
6170
+ _cosine_similarity_rel = self._create_cosine_similarity_relationship(
6171
+ node_subset_from=node_subset_from,
6172
+ node_subset_to=node_subset_to
6173
+ )
6174
+ _cosine_similarity_rel.annotate(annotations.track("graphs", "cosine_similarity_from_to"))
6175
+ return _cosine_similarity_rel
6176
+
6177
+ def _cosine_similarity_between(self, pair_subset_between: Relationship):
6178
+ """
6179
+ Create a cosine_similarity relationship, with the first and second position
6180
+ in each tuple jointly constrained to be in the given set of pairs
6181
+ of nodes. Note this relationship is not cached;
6182
+ it is specific to the callsite.
6183
+ """
6184
+ _cosine_similarity_rel = self._create_cosine_similarity_relationship(
6185
+ pair_subset_between=pair_subset_between
6186
+ )
6187
+ _cosine_similarity_rel.annotate(annotations.track("graphs", "cosine_similarity_between"))
6188
+ return _cosine_similarity_rel
6189
+
6190
+ def _create_cosine_similarity_relationship(
6191
+ self,
6192
+ *,
6193
+ node_subset_from: Optional[Relationship] = None,
6194
+ node_subset_to: Optional[Relationship] = None,
6195
+ pair_subset_between: Optional[Relationship] = None,
6196
+ ):
6197
+ """
6198
+ Create cosine_similarity relationship, optionally constrained by
6199
+ the provided node subsets or pair subset.
6200
+ """
6201
+ _cosine_similarity_rel = self._model.Relationship(
6202
+ f"{{node_u:{self._NodeConceptStr}}} has a cosine similarity to "
6203
+ f"{{node_v:{self._NodeConceptStr}}} of {{score:Float}}"
6204
+ )
6205
+
6206
+ # TODO: Optimization opportunity. In a number of branches below,
6207
+ # we compute _count_outneighbor_of, which transitively computes
6208
+ # _outneighbor_of, and then compute _outneighbor_of directly;
6209
+ # the present code structure makes this a developer-time-efficient
6210
+ # way to get this off the ground, but of course involves redundant
6211
+ # work. In future this redundant work could be eliminated.
6212
+
6213
+ # TODO: Optimization opportunity. In some of the cases below
6214
+ # (unweighted in particular), the node_constraint is redundant with
6215
+ # the constraints baked into the _count_outneigherbor_of and
6216
+ # _outneighbor_of relationships. The join with node_constraint
6217
+ # could be eliminated in those cases. Possibly also relevant to
6218
+ # other domain-constrained relations.
6219
+
6220
+ # Branch by case to select appropriate count_outneighbor and
6221
+ # outneighbor relationships, and build appropriate constraints
6222
+ # on the domain of the nodes.
6223
+ node_u, node_v = self.Node.ref(), self.Node.ref()
6224
+
6225
+ # Handle the `between` case.
6226
+ if pair_subset_between is not None:
6227
+ # Extract first-position and second-position nodes.
6228
+ first_position_subset = self._model.Relationship(f"{{node:{self._NodeConceptStr}}}")
6229
+ second_position_subset = self._model.Relationship(f"{{node:{self._NodeConceptStr}}}")
6230
+ node_x, node_y = self.Node.ref(), self.Node.ref()
6231
+ where(
6232
+ pair_subset_between(node_x, node_y)
6233
+ ).define(
6234
+ first_position_subset(node_x),
6235
+ second_position_subset(node_y)
6236
+ )
6237
+
6238
+ count_outneighbor_u_rel = self._count_outneighbor_of(first_position_subset)
6239
+ count_outneighbor_v_rel = self._count_outneighbor_of(second_position_subset)
6240
+ outneighbor_u_rel = self._outneighbor_of(first_position_subset)
6241
+ outneighbor_v_rel = self._outneighbor_of(second_position_subset)
6242
+
6243
+ node_constraints = [pair_subset_between(node_u, node_v)]
6244
+
6245
+ # Handle the `from_` case.
6246
+ elif node_subset_from is not None and node_subset_to is None:
6247
+ count_outneighbor_u_rel = self._count_outneighbor_of(node_subset_from)
6248
+ count_outneighbor_v_rel = self._count_outneighbor
6249
+ outneighbor_u_rel = self._outneighbor_of(node_subset_from)
6250
+ outneighbor_v_rel = self._outneighbor
6251
+ # TODO: This case could be optimized via an analog of
6252
+ # the depth-2 traversal strategy suggested for the equivalent
6253
+ # case of common_neighbor, but for another day.
6254
+
6255
+ node_constraints = [node_subset_from(node_u)]
6256
+
6257
+ # Handle the `from_`/`to` case.
6258
+ elif node_subset_from is not None and node_subset_to is not None:
6259
+ # Check for object identity optimization.
6260
+ if node_subset_from is node_subset_to:
6261
+ count_outneighbor_u_rel = self._count_outneighbor_of(node_subset_from)
6262
+ count_outneighbor_v_rel = count_outneighbor_u_rel
6263
+ outneighbor_u_rel = self._outneighbor_of(node_subset_from)
6264
+ outneighbor_v_rel = outneighbor_u_rel
6265
+ else:
6266
+ count_outneighbor_u_rel = self._count_outneighbor_of(node_subset_from)
6267
+ count_outneighbor_v_rel = self._count_outneighbor_of(node_subset_to)
6268
+ outneighbor_u_rel = self._outneighbor_of(node_subset_from)
6269
+ outneighbor_v_rel = self._outneighbor_of(node_subset_to)
6270
+
6271
+ node_constraints = [node_subset_from(node_u), node_subset_to(node_v)]
6272
+
6273
+ # Handle the `full` case.
6274
+ else:
6275
+ count_outneighbor_u_rel = self._count_outneighbor
6276
+ count_outneighbor_v_rel = self._count_outneighbor
6277
+ outneighbor_u_rel = self._outneighbor
6278
+ outneighbor_v_rel = self._outneighbor
6279
+
6280
+ node_constraints = []
6281
+
6282
+ # Define cosine similarity logic for both weighted and unweighted cases.
5582
6283
  if not self.weighted:
5583
- node_u, node_v = self.Node.ref(), self.Node.ref()
5584
- count_outneighor_u, count_outneighor_v, score = Integer.ref(), Integer.ref(), Float.ref()
6284
+ # Unweighted case: use count of common outneighbors.
6285
+ count_outneighor_u, count_outneighor_v = Integer.ref(), Integer.ref()
6286
+ common_outneighbor_node = self.Node.ref()
6287
+ score = Float.ref()
5585
6288
 
5586
6289
  where(
5587
- self._count_outneighbor(node_u, count_outneighor_u),
5588
- self._count_outneighbor(node_v, count_outneighor_v),
5589
- c_common := self._count_common_outneighbor_fragment(node_u, node_v),
6290
+ *node_constraints,
6291
+ count_outneighbor_u_rel(node_u, count_outneighor_u),
6292
+ count_outneighbor_v_rel(node_v, count_outneighor_v),
6293
+ c_common := count(common_outneighbor_node).per(node_u, node_v).where(
6294
+ outneighbor_u_rel(node_u, common_outneighbor_node),
6295
+ outneighbor_v_rel(node_v, common_outneighbor_node),
6296
+ ),
5590
6297
  score := c_common / sqrt(count_outneighor_u * count_outneighor_v),
5591
6298
  ).define(
5592
6299
  _cosine_similarity_rel(node_u, node_v, score)
5593
6300
  )
5594
6301
  else:
5595
- node_u, node_v = self.Node.ref(), self.Node.ref()
6302
+ # Weighted case: use dot product and norms.
5596
6303
  node_uk, node_vk = self.Node.ref(), self.Node.ref()
5597
6304
  wu, wv = Float.ref(), Float.ref()
6305
+
5598
6306
  where(
6307
+ *node_constraints,
5599
6308
  squared_norm_wu := sum(node_uk, wu * wu).per(node_u).where(self._weight(node_u, node_uk, wu)),
5600
6309
  squared_norm_wv := sum(node_vk, wv * wv).per(node_v).where(self._weight(node_v, node_vk, wv)),
5601
6310
  wu_dot_wv := self._wu_dot_wv_fragment(node_u, node_v),
@@ -5608,19 +6317,69 @@ class Graph():
5608
6317
 
5609
6318
 
5610
6319
  @include_in_docs
5611
- def adamic_adar(self):
5612
- """Returns a ternary relationship containing the Adamic-Adar index for all pairs of nodes.
6320
+ def adamic_adar(
6321
+ self,
6322
+ *,
6323
+ full: Optional[bool] = None,
6324
+ from_: Optional[Relationship] = None,
6325
+ to: Optional[Relationship] = None,
6326
+ between: Optional[Relationship] = None,
6327
+ ):
6328
+ """Returns a ternary relationship containing the Adamic-Adar index for pairs of nodes.
5613
6329
 
5614
6330
  The Adamic-Adar index is a similarity measure between two nodes based
5615
6331
  on the amount of shared neighbors between them, giving more weight to
5616
6332
  common neighbors that are less connected.
5617
6333
 
6334
+ Parameters
6335
+ ----------
6336
+ full : bool, optional
6337
+ If ``True``, computes the Adamic-Adar index for all pairs of nodes in
6338
+ the graph. This computation can be expensive for large graphs, as
6339
+ dependencies can scale quadratically in the number of edges or cubically
6340
+ in the number of nodes. Mutually exclusive with other parameters.
6341
+ Default is ``None``.
6342
+ from_ : Relationship, optional
6343
+ A unary relationship containing a subset of the graph's nodes. When
6344
+ provided, constrains the domain of the Adamic-Adar computation: only
6345
+ Adamic-Adar indices for node pairs where the first node is in this relationship
6346
+ are computed and returned. Mutually exclusive with ``full`` and ``between``.
6347
+ Default is ``None``.
6348
+ to : Relationship, optional
6349
+ A unary relationship containing a subset of the graph's nodes. Can only
6350
+ be used together with the ``from_`` parameter. When provided with ``from_``,
6351
+ constrains the domain of the Adamic-Adar computation: only Adamic-Adar
6352
+ indices for node pairs where the first node is in ``from_`` and the
6353
+ second node is in ``to`` are computed and returned.
6354
+ Default is ``None``.
6355
+ between : Relationship, optional
6356
+ A binary relationship containing pairs of nodes. When provided,
6357
+ constrains the domain of the Adamic-Adar computation: only Adamic-Adar
6358
+ indices for the specific node pairs in this relationship are computed
6359
+ and returned. Mutually exclusive with other parameters.
6360
+ Default is ``None``.
6361
+
5618
6362
  Returns
5619
6363
  -------
5620
6364
  Relationship
5621
6365
  A ternary relationship where each tuple represents a pair of nodes
5622
6366
  and their Adamic-Adar index.
5623
6367
 
6368
+ Raises
6369
+ ------
6370
+ ValueError
6371
+ If ``full`` is provided with any other parameter.
6372
+ If ``between`` is provided with any other parameter.
6373
+ If ``from_`` is provided with any parameter other than ``to``.
6374
+ If none of ``full``, ``from_``, or ``between`` is provided.
6375
+ If ``full`` is not ``True`` or ``None``.
6376
+ AssertionError
6377
+ If ``from_``, ``to``, or ``between`` is not a ``Relationship``.
6378
+ If ``from_``, ``to``, or ``between`` is not attached to the same model as the graph.
6379
+ If ``from_``, ``to``, or ``between`` does not contain the graph's ``Node`` concept.
6380
+ If ``from_`` or ``to`` is not a unary relationship.
6381
+ If ``between`` is not a binary relationship.
6382
+
5624
6383
  Relationship Schema
5625
6384
  -------------------
5626
6385
  ``adamic_adar(node_u, node_v, score)``
@@ -5644,9 +6403,38 @@ class Graph():
5644
6403
 
5645
6404
  AA(u,v) = Σ (1 / log(degree(w)))
5646
6405
 
6406
+ The ``adamic_adar(full=True)`` method computes and caches the full Adamic-Adar
6407
+ relationship for all pairs of nodes, providing efficient reuse across
6408
+ multiple calls. This can be expensive as dependencies can contain O(|E|²) or
6409
+ O(|V|³) tuples depending on graph density.
6410
+
6411
+ Calling ``adamic_adar()`` without arguments raises a ``ValueError``,
6412
+ to ensure awareness and explicit acknowledgement (``full=True``) of this cost.
6413
+
6414
+ In contrast, ``adamic_adar(from_=subset)`` constrains the computation to
6415
+ tuples with the first position in the passed-in ``subset``. The result is
6416
+ not cached; it is specific to the call site. When a significant fraction of
6417
+ the Adamic-Adar relation is needed across a program, ``adamic_adar(full=True)``
6418
+ is typically more efficient. Use ``adamic_adar(from_=subset)`` only
6419
+ when small subsets of the Adamic-Adar relationship are needed
6420
+ collectively across the program.
6421
+
6422
+ The ``to`` parameter can be used together with ``from_`` to further
6423
+ constrain the computation: ``adamic_adar(from_=subset_a, to=subset_b)``
6424
+ computes Adamic-Adar indices only for node pairs where the first node is in
6425
+ ``subset_a`` and the second node is in ``subset_b``. (Since ``adamic_adar``
6426
+ is symmetric in its first two positions, using ``to`` without ``from_`` would
6427
+ be functionally redundant, and is not allowed.)
6428
+
6429
+ The ``between`` parameter provides another way to constrain the computation.
6430
+ Unlike ``from_`` and ``to``, which allow you to independently constrain the first
6431
+ and second positions in ``adamic_adar`` tuples to sets of nodes, ``between``
6432
+ allows you constrain the first and second positions, jointly, to specific pairs
6433
+ of nodes.
6434
+
5647
6435
  Examples
5648
6436
  --------
5649
- >>> from relationalai.semantics import Model, define, select, Float
6437
+ >>> from relationalai.semantics import Model, define, select, where, Float
5650
6438
  >>> from relationalai.semantics.reasoners.graph import Graph
5651
6439
  >>>
5652
6440
  >>> # 1. Set up an undirected graph
@@ -5665,10 +6453,10 @@ class Graph():
5665
6453
  ... Edge.new(src=n4, dst=n3),
5666
6454
  ... )
5667
6455
  >>>
5668
- >>> # 3. Select the Adamic-Adar index for the pair (2, 4)
6456
+ >>> # 3. Select the Adamic-Adar indices from the full relationship
5669
6457
  >>> u, v = Node.ref("u"), Node.ref("v")
5670
6458
  >>> score = Float.ref("score")
5671
- >>> adamic_adar = graph.adamic_adar()
6459
+ >>> adamic_adar = graph.adamic_adar(full=True)
5672
6460
  >>> select(
5673
6461
  ... u.id, v.id, score,
5674
6462
  ... ).where(
@@ -5680,33 +6468,193 @@ class Graph():
5680
6468
  id id2 score
5681
6469
  0 2 4 0.910239
5682
6470
 
6471
+ >>> # 4. Use 'from_' parameter to constrain the set of nodes for the first position
6472
+ >>> # Define a subset containing only node 1
6473
+ >>> subset = model.Relationship(f"{{node:{Node}}} is in subset")
6474
+ >>> node = Node.ref()
6475
+ >>> where(node.id == 1).define(subset(node))
6476
+ >>>
6477
+ >>> # Get Adamic-Adar indices only for pairs where first node is in subset
6478
+ >>> constrained_adamic_adar = graph.adamic_adar(from_=subset)
6479
+ >>> select(u.id, v.id, score).where(constrained_adamic_adar(u, v, score)).inspect()
6480
+ ▰▰▰▰ Setup complete
6481
+ id id2 score
6482
+ 0 1 1 2.885390
6483
+ 1 1 4 2.885390
6484
+
6485
+ >>> # 5. Use both 'from_' and 'to' parameters to constrain both positions
6486
+ >>> subset_a = model.Relationship(f"{{node:{Node}}} is in subset_a")
6487
+ >>> subset_b = model.Relationship(f"{{node:{Node}}} is in subset_b")
6488
+ >>> where(node.id == 1).define(subset_a(node))
6489
+ >>> where(node.id == 4).define(subset_b(node))
6490
+ >>>
6491
+ >>> # Get Adamic-Adar indices only where first node is in subset_a and second node is in subset_b
6492
+ >>> constrained_adamic_adar = graph.adamic_adar(from_=subset_a, to=subset_b)
6493
+ >>> select(u.id, v.id, score).where(constrained_adamic_adar(u, v, score)).inspect()
6494
+ ▰▰▰▰ Setup complete
6495
+ id id2 score
6496
+ 0 1 4 2.885390
6497
+
6498
+ >>> # 6. Use 'between' parameter to constrain to specific pairs of nodes
6499
+ >>> pairs = model.Relationship(f"{{node_a:{Node}}} and {{node_b:{Node}}} are a pair")
6500
+ >>> node_a, node_b = Node.ref(), Node.ref()
6501
+ >>> where(node_a.id == 1, node_b.id == 4).define(pairs(node_a, node_b))
6502
+ >>> where(node_a.id == 2, node_b.id == 3).define(pairs(node_a, node_b))
6503
+ >>>
6504
+ >>> # Get Adamic-Adar indices only for the specific pairs (1, 4) and (2, 3)
6505
+ >>> constrained_adamic_adar = graph.adamic_adar(between=pairs)
6506
+ >>> select(u.id, v.id, score).where(constrained_adamic_adar(u, v, score)).inspect()
6507
+ ▰▰▰▰ Setup complete
6508
+ id id2 score
6509
+ 0 1 4 2.885390
6510
+ 1 2 3 1.442695
6511
+
5683
6512
  """
5684
- warnings.warn(
5685
- (
5686
- "`adamic_adar` presently always computes the similarity "
5687
- "of all pairs of nodes of the graph. To provide better control over "
5688
- "the computed subset, `adamic_adar`'s interface will soon "
5689
- "need to change."
5690
- ),
5691
- FutureWarning,
5692
- stacklevel=2
6513
+ # Validate domain constraint parameters.
6514
+ self._validate_domain_constraint_parameters(
6515
+ 'adamic_adar', full, from_, to, between
5693
6516
  )
5694
6517
 
6518
+ # At this point, exactly one of `full`, `from_`, or `between`
6519
+ # has been provided, and if `to` is provided, `from_` is also provided.
6520
+
6521
+ # Handle `between`.
6522
+ if between is not None:
6523
+ self._validate_pair_subset_parameter(between)
6524
+ return self._adamic_adar_between(between)
6525
+
6526
+ # Handle `from_` (and potentially `to`).
6527
+ if from_ is not None:
6528
+ self._validate_node_subset_parameter('from_', from_)
6529
+ if to is not None:
6530
+ self._validate_node_subset_parameter('to', to)
6531
+ return self._adamic_adar_from_to(from_, to)
6532
+ return self._adamic_adar_from(from_)
6533
+
6534
+ # Handle `full`.
5695
6535
  return self._adamic_adar
5696
6536
 
5697
6537
  @cached_property
5698
6538
  def _adamic_adar(self):
5699
- """Lazily define and cache the self._adamic_adar relationship."""
5700
- _adamic_adar_rel = self._model.Relationship(f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} have adamic adar score {{score:Float}}")
6539
+ """Lazily define and cache the full adamic_adar relationship."""
6540
+ _adamic_adar_rel = self._create_adamic_adar_relationship()
5701
6541
  _adamic_adar_rel.annotate(annotations.track("graphs", "adamic_adar"))
6542
+ return _adamic_adar_rel
6543
+
6544
+ def _adamic_adar_from(self, node_subset_from: Relationship):
6545
+ """
6546
+ Create an adamic_adar relationship, with the first position in each
6547
+ tuple constrained to be in the given subset of nodes. Note this relationship
6548
+ is not cached; it is specific to the callsite.
6549
+ """
6550
+ _adamic_adar_rel = self._create_adamic_adar_relationship(
6551
+ node_subset_from=node_subset_from
6552
+ )
6553
+ _adamic_adar_rel.annotate(annotations.track("graphs", "adamic_adar_from"))
6554
+ return _adamic_adar_rel
6555
+
6556
+ def _adamic_adar_from_to(self, node_subset_from: Relationship, node_subset_to: Relationship):
6557
+ """
6558
+ Create an adamic_adar relationship, with the first position in each
6559
+ tuple constrained to be in `node_subset_from`, and the second position in
6560
+ each tuple constrained to be in `node_subset_to`. Note this relationship
6561
+ is not cached; it is specific to the callsite.
6562
+ """
6563
+ _adamic_adar_rel = self._create_adamic_adar_relationship(
6564
+ node_subset_from=node_subset_from,
6565
+ node_subset_to=node_subset_to
6566
+ )
6567
+ _adamic_adar_rel.annotate(annotations.track("graphs", "adamic_adar_from_to"))
6568
+ return _adamic_adar_rel
6569
+
6570
+ def _adamic_adar_between(self, pair_subset_between: Relationship):
6571
+ """
6572
+ Create an adamic_adar relationship, with the first and second position
6573
+ in each tuple jointly constrained to be in the given set of pairs
6574
+ of nodes. Note this relationship is not cached;
6575
+ it is specific to the callsite.
6576
+ """
6577
+ _adamic_adar_rel = self._create_adamic_adar_relationship(
6578
+ pair_subset_between=pair_subset_between
6579
+ )
6580
+ _adamic_adar_rel.annotate(annotations.track("graphs", "adamic_adar_between"))
6581
+ return _adamic_adar_rel
5702
6582
 
6583
+ def _create_adamic_adar_relationship(
6584
+ self,
6585
+ *,
6586
+ node_subset_from: Optional[Relationship] = None,
6587
+ node_subset_to: Optional[Relationship] = None,
6588
+ pair_subset_between: Optional[Relationship] = None,
6589
+ ):
6590
+ """
6591
+ Create adamic_adar relationship, optionally constrained by the provided
6592
+ node subsets or pair subset.
6593
+ """
6594
+ _adamic_adar_rel = self._model.Relationship(
6595
+ f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} "
6596
+ f"have adamic adar score {{score:Float}}"
6597
+ )
6598
+
6599
+ # NOTE: Handling of the common_neighbor relation (`common_neighbor_rel`)
6600
+ # differs in each case, whereas handling of the count_neighbor relation
6601
+ # (`count_neighbor_rel`) is: a) the same among the constrained cases;
6602
+ # and b) different in the unconstrained case. As such we handle
6603
+ # `common_neighbor_rel` in the branches by case below, and handle
6604
+ # `count_neighbor_rel` in a separate constrained/unconstrained branch later.
6605
+
6606
+ # Handle the `between` case.
6607
+ if pair_subset_between is not None:
6608
+ # Get the appropriate common_neighbor relationship.
6609
+ common_neighbor_rel = self._common_neighbor_between(pair_subset_between)
6610
+
6611
+ # Handle the `from_` case.
6612
+ elif node_subset_from is not None and node_subset_to is None:
6613
+ # Get the appropriate common_neighbor relationship.
6614
+ common_neighbor_rel = self._common_neighbor_from(node_subset_from)
6615
+
6616
+ # Handle the `from_`/`to` case.
6617
+ elif node_subset_from is not None and node_subset_to is not None:
6618
+ common_neighbor_rel = self._common_neighbor_from_to(node_subset_from, node_subset_to)
6619
+ # Note that _common_neighbor_from_to handles optimization
6620
+ # when the from_ and to sets are object-identical.
6621
+
6622
+ # Handle the `full` case.
6623
+ else:
6624
+ # Use cached full relationship.
6625
+ common_neighbor_rel = self._common_neighbor
6626
+
6627
+ # Handle `count_neighbor_rel` for unconstrained versus constrained cases.
6628
+ if pair_subset_between is None and node_subset_from is None:
6629
+ # Unconstrained case.
6630
+ count_neighbor_rel = self._count_neighbor
6631
+
6632
+ else:
6633
+ # Constrained cases.
6634
+
6635
+ # Extract common neighbors that appear in
6636
+ # the constrained common_neighbor relationship.
6637
+ common_neighbors_subset = self._model.Relationship(
6638
+ f"{{node:{self._NodeConceptStr}}} is a relevant common neighbor"
6639
+ )
6640
+ node_x, node_y, neighbor_z = self.Node.ref(), self.Node.ref(), self.Node.ref()
6641
+ where(
6642
+ common_neighbor_rel(node_x, node_y, neighbor_z)
6643
+ ).define(
6644
+ common_neighbors_subset(neighbor_z)
6645
+ )
6646
+
6647
+ # From those common neighbors,
6648
+ # build a constrained count_neighbor relationship.
6649
+ count_neighbor_rel = self._count_neighbor_of(common_neighbors_subset)
6650
+
6651
+ # Define the Adamic-Adar aggregation using the selected relationships.
5703
6652
  node_u, node_v, common_neighbor = self.Node.ref(), self.Node.ref(), self.Node.ref()
5704
6653
  neighbor_count = Integer.ref()
5705
-
5706
6654
  where(
5707
6655
  _score := sum(common_neighbor, 1.0 / natural_log(neighbor_count)).per(node_u, node_v).where(
5708
- self._common_neighbor(node_u, node_v, common_neighbor),
5709
- self._count_neighbor(common_neighbor, neighbor_count),
6656
+ common_neighbor_rel(node_u, node_v, common_neighbor),
6657
+ count_neighbor_rel(common_neighbor, neighbor_count),
5710
6658
  )
5711
6659
  ).define(_adamic_adar_rel(node_u, node_v, _score))
5712
6660