relationalai 0.11.4__py3-none-any.whl → 0.12.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. relationalai/clients/config.py +7 -0
  2. relationalai/clients/direct_access_client.py +113 -0
  3. relationalai/clients/snowflake.py +263 -189
  4. relationalai/clients/types.py +4 -1
  5. relationalai/clients/use_index_poller.py +72 -48
  6. relationalai/clients/util.py +9 -0
  7. relationalai/dsl.py +1 -2
  8. relationalai/early_access/metamodel/rewrite/__init__.py +5 -3
  9. relationalai/early_access/rel/rewrite/__init__.py +1 -1
  10. relationalai/environments/snowbook.py +10 -1
  11. relationalai/errors.py +24 -3
  12. relationalai/semantics/internal/annotations.py +1 -0
  13. relationalai/semantics/internal/internal.py +22 -3
  14. relationalai/semantics/lqp/builtins.py +1 -0
  15. relationalai/semantics/lqp/executor.py +12 -4
  16. relationalai/semantics/lqp/model2lqp.py +1 -0
  17. relationalai/semantics/lqp/passes.py +3 -4
  18. relationalai/semantics/{rel → lqp}/rewrite/__init__.py +6 -0
  19. relationalai/semantics/metamodel/builtins.py +12 -1
  20. relationalai/semantics/metamodel/executor.py +2 -1
  21. relationalai/semantics/metamodel/rewrite/__init__.py +3 -9
  22. relationalai/semantics/metamodel/rewrite/flatten.py +8 -7
  23. relationalai/semantics/reasoners/graph/core.py +1356 -258
  24. relationalai/semantics/rel/builtins.py +5 -1
  25. relationalai/semantics/rel/compiler.py +3 -3
  26. relationalai/semantics/rel/executor.py +20 -11
  27. relationalai/semantics/sql/compiler.py +2 -3
  28. relationalai/semantics/sql/executor/duck_db.py +8 -4
  29. relationalai/semantics/sql/executor/snowflake.py +1 -1
  30. relationalai/tools/cli.py +17 -6
  31. relationalai/tools/cli_controls.py +334 -352
  32. relationalai/tools/constants.py +1 -0
  33. relationalai/tools/query_utils.py +27 -0
  34. relationalai/util/otel_configuration.py +1 -1
  35. {relationalai-0.11.4.dist-info → relationalai-0.12.1.dist-info}/METADATA +5 -4
  36. {relationalai-0.11.4.dist-info → relationalai-0.12.1.dist-info}/RECORD +45 -45
  37. relationalai/semantics/metamodel/rewrite/gc_nodes.py +0 -58
  38. relationalai/semantics/metamodel/rewrite/list_types.py +0 -109
  39. /relationalai/semantics/{rel → lqp}/rewrite/cdc.py +0 -0
  40. /relationalai/semantics/{rel → lqp}/rewrite/extract_common.py +0 -0
  41. /relationalai/semantics/{metamodel → lqp}/rewrite/extract_keys.py +0 -0
  42. /relationalai/semantics/{metamodel → lqp}/rewrite/fd_constraints.py +0 -0
  43. /relationalai/semantics/{rel → lqp}/rewrite/quantify_vars.py +0 -0
  44. /relationalai/semantics/{metamodel → lqp}/rewrite/splinter.py +0 -0
  45. {relationalai-0.11.4.dist-info → relationalai-0.12.1.dist-info}/WHEEL +0 -0
  46. {relationalai-0.11.4.dist-info → relationalai-0.12.1.dist-info}/entry_points.txt +0 -0
  47. {relationalai-0.11.4.dist-info → relationalai-0.12.1.dist-info}/licenses/LICENSE +0 -0
@@ -20,6 +20,7 @@ from relationalai.semantics import (
20
20
  count, sum, avg,
21
21
  )
22
22
  from relationalai.docutils import include_in_docs
23
+ from relationalai.semantics.internal import annotations
23
24
  from relationalai.semantics.std.math import abs, isnan, isinf, maximum, natural_log, sqrt
24
25
 
25
26
  Numeric = Union[int, float, Decimal]
@@ -1055,6 +1056,181 @@ class Graph():
1055
1056
 
1056
1057
  # End Visualization --------------------------------------------------------
1057
1058
 
1059
+ # The following three helper methods validate
1060
+ # `from_`, `to`, and `between`
1061
+ # parameters to public methods that accept them.
1062
+
1063
+ def _validate_domain_constraint_parameters(
1064
+ self,
1065
+ method_name: str,
1066
+ full: Optional[bool],
1067
+ from_: Optional[Relationship],
1068
+ to: Optional[Relationship],
1069
+ between: Optional[Relationship],
1070
+ ):
1071
+ """
1072
+ Validate the domain constraint parameters for methods that accept
1073
+ `full`, `from_`, `to`, and `between` parameters.
1074
+
1075
+ This helper method performs common validation logic that applies
1076
+ across multiple graph methods (e.g., common_neighbor, adamic_adar).
1077
+
1078
+ Parameters
1079
+ ----------
1080
+ method_name : str
1081
+ The name of the method being validated (for error messages).
1082
+ full : bool, optional
1083
+ The full parameter value.
1084
+ from_ : Relationship, optional
1085
+ The from_ parameter value.
1086
+ to : Relationship, optional
1087
+ The to parameter value.
1088
+ between : Relationship, optional
1089
+ The between parameter value.
1090
+
1091
+ Raises
1092
+ ------
1093
+ ValueError
1094
+ If parameter combinations are invalid.
1095
+ """
1096
+ # Confirm that `full` was not provided with any other parameter.
1097
+ if (
1098
+ full is not None
1099
+ and (
1100
+ from_ is not None or
1101
+ to is not None or
1102
+ between is not None
1103
+ )
1104
+ ):
1105
+ raise ValueError(
1106
+ "The 'full' parameter is mutually exclusive with 'from_', 'to', and 'between'. "
1107
+ f"Use 'full=True' to compute {method_name} for all node pairs, "
1108
+ "or use 'from_'/'to'/'between' to constrain computation to "
1109
+ "specific nodes or pairs."
1110
+ )
1111
+
1112
+ # Confirm that `between` was not provided with any other parameter.
1113
+ if (between is not None
1114
+ and (
1115
+ from_ is not None or
1116
+ to is not None
1117
+ # `full` is implied by the preceding check.
1118
+ )
1119
+ ):
1120
+ raise ValueError(
1121
+ "The 'between' parameter is mutually exclusive with 'from_' and 'to'. "
1122
+ "Use 'between' to constrain computation to specific node pairs, "
1123
+ "or use 'from_'/'to' to constrain by position."
1124
+ )
1125
+
1126
+ # Confirm that 'to' is only used with 'from_'.
1127
+ if to is not None and from_ is None:
1128
+ raise ValueError(
1129
+ "The 'to' parameter can only be used together with the 'from_' parameter. "
1130
+ f"The 'from_' parameter constrains the first position in {method_name} tuples, "
1131
+ f"while 'to' constrains the second position. Since {method_name} is symmetric "
1132
+ "in its first two positions, 'to' without 'from_' would be functionally redundant. "
1133
+ "Please either provide both 'from_' and 'to' parameters, or only 'from_'."
1134
+ )
1135
+
1136
+ # If no parameters are provided, raise an exception
1137
+ # to avoid unintentional, potentially expensive full computation.
1138
+ if (
1139
+ full is None and
1140
+ from_ is None and
1141
+ between is None
1142
+ ):
1143
+ raise ValueError(
1144
+ f"Computing {method_name} for all pairs of nodes can be expensive. "
1145
+ f"To compute the full {method_name} relationship, "
1146
+ f"please call `{method_name}(full=True)`. To constrain computation to specific nodes, "
1147
+ f"please use `{method_name}(from_=node_subset)`, "
1148
+ f"`{method_name}(from_=node_subset_a, to=node_subset_b)`, "
1149
+ f"or `{method_name}(between=node_pairs)`."
1150
+ )
1151
+
1152
+ # Validate that full is True (not just not None).
1153
+ # This check is only reached if full is not None
1154
+ # and no other parameters are provided.
1155
+ if full is not None and full is not True:
1156
+ raise ValueError(
1157
+ f"Invalid value (`{full}`) for 'full' parameter. Use `full=True` "
1158
+ f"to compute the full {method_name} relationship, or use 'from_', "
1159
+ "'from_' and 'to', or 'between' to constrain computation."
1160
+ )
1161
+
1162
+ def _validate_node_subset_parameter(
1163
+ self,
1164
+ parameter_name: str,
1165
+ node_subset_relation: Relationship,
1166
+ ):
1167
+ """
1168
+ Validate that a parameter identifying a subset of nodes of interest is
1169
+ is a unary relationship, of nodes, attached to the same model
1170
+ that the graph is attached to.
1171
+ """
1172
+ # Validate that the parameter is a relationship.
1173
+ assert isinstance(node_subset_relation, Relationship), (
1174
+ f"The '{parameter_name}' parameter must be a `Relationship`, "
1175
+ f"but is a `{type(node_subset_relation).__name__}`."
1176
+ )
1177
+
1178
+ # Validate that the relationship is attached to the same model as the graph.
1179
+ assert node_subset_relation._model is self._model, (
1180
+ f"The given '{parameter_name}' relationship must "
1181
+ "be attached to the same model as the graph."
1182
+ )
1183
+
1184
+ # Validate that it's a unary relationship (has exactly one field).
1185
+ assert len(node_subset_relation._fields) == 1, (
1186
+ f"The '{parameter_name}' parameter must be a unary relationship, "
1187
+ f"but it has {len(node_subset_relation._fields)} fields."
1188
+ )
1189
+
1190
+ # Validate that the concept type matches the graph's Node concept.
1191
+ assert node_subset_relation._fields[0].type_str == self.Node._name, (
1192
+ f"The '{parameter_name}' relationship must be over "
1193
+ f"the graph's Node concept ('{self.Node._name}'), "
1194
+ f"but is over '{node_subset_relation._fields[0].type_str}'."
1195
+ )
1196
+
1197
+ # No parameter name at this time, as pertains only to `between` for now.
1198
+ def _validate_pair_subset_parameter(self, pairs_relation):
1199
+ """
1200
+ Validate that a parameter identifying pairs of nodes of interest is
1201
+ a binary relationship, of pairs of nodes, attached to the same model
1202
+ that the graph is attached to.
1203
+ """
1204
+ # Validate that the parameter is a relationship.
1205
+ assert isinstance(pairs_relation, Relationship), (
1206
+ "The 'between' parameter must be a `Relationship`, "
1207
+ f"but is a `{type(pairs_relation).__name__}`."
1208
+ )
1209
+
1210
+ # Validate that the relationship is attached to the same model as the graph.
1211
+ assert pairs_relation._model is self._model, (
1212
+ "The given 'between' relationship must be "
1213
+ "attached to the same model as the graph."
1214
+ )
1215
+
1216
+ # Validate that it's a binary relationship (has exactly two fields).
1217
+ assert len(pairs_relation._fields) == 2, (
1218
+ "The 'between' parameter must be a binary relationship, "
1219
+ f"but it has {len(pairs_relation._fields)} fields."
1220
+ )
1221
+
1222
+ # Validate that both fields are typed as the graph's Node concept.
1223
+ assert pairs_relation._fields[0].type_str == self.Node._name, (
1224
+ "The 'between' relationship's first field must be "
1225
+ f"the graph's Node concept ('{self.Node._name}'), "
1226
+ f"but is '{pairs_relation._fields[0].type_str}'."
1227
+ )
1228
+ assert pairs_relation._fields[1].type_str == self.Node._name, (
1229
+ f"The 'between' relationship's second field must be "
1230
+ f"the graph's Node concept ('{self.Node._name}'), "
1231
+ f"but is '{pairs_relation._fields[1].type_str}'."
1232
+ )
1233
+
1058
1234
 
1059
1235
  # The following three `_count_[in,out]neighbor` relationships are
1060
1236
  # primarily for internal consumption. They differ from corresponding
@@ -1064,26 +1240,26 @@ class Graph():
1064
1240
  @cached_property
1065
1241
  def _count_neighbor(self):
1066
1242
  """Lazily define and cache the self._count_neighbor relationship."""
1067
- return self._create_count_neighbor_relationship(nodes_subset=None)
1243
+ return self._create_count_neighbor_relationship(node_subset=None)
1068
1244
 
1069
- def _count_neighbor_of(self, nodes_subset: Relationship):
1245
+ def _count_neighbor_of(self, node_subset: Relationship):
1070
1246
  """
1071
1247
  Create a _count_neighbor relationship constrained to the subset of nodes
1072
- in `nodes_subset`. Note this relationship is not cached; it is
1248
+ in `node_subset`. Note this relationship is not cached; it is
1073
1249
  specific to the callsite.
1074
1250
  """
1075
- return self._create_count_neighbor_relationship(nodes_subset=nodes_subset)
1251
+ return self._create_count_neighbor_relationship(node_subset=node_subset)
1076
1252
 
1077
- def _create_count_neighbor_relationship(self, *, nodes_subset: Optional[Relationship]):
1253
+ def _create_count_neighbor_relationship(self, *, node_subset: Optional[Relationship]):
1078
1254
  _count_neighbor_rel = self._model.Relationship(f"{{src:{self._NodeConceptStr}}} has neighbor count {{count:Integer}}")
1079
1255
 
1080
1256
  # Choose the appropriate neighbor relationship based on whether we have constraints
1081
- if nodes_subset is None:
1257
+ if node_subset is None:
1082
1258
  # No constraint - use cached neighbor relationship
1083
1259
  neighbor_rel = self._neighbor
1084
1260
  else:
1085
1261
  # Constrained to nodes in the subset - use constrained neighbor relationship
1086
- neighbor_rel = self._neighbor_of(nodes_subset)
1262
+ neighbor_rel = self._neighbor_of(node_subset)
1087
1263
 
1088
1264
  # Apply the same counting logic for both cases
1089
1265
  src, dst = self.Node.ref(), self.Node.ref()
@@ -1094,26 +1270,26 @@ class Graph():
1094
1270
  @cached_property
1095
1271
  def _count_inneighbor(self):
1096
1272
  """Lazily define and cache the self._count_inneighbor relationship."""
1097
- return self._create_count_inneighbor_relationship(nodes_subset=None)
1273
+ return self._create_count_inneighbor_relationship(node_subset=None)
1098
1274
 
1099
- def _count_inneighbor_of(self, nodes_subset: Relationship):
1275
+ def _count_inneighbor_of(self, node_subset: Relationship):
1100
1276
  """
1101
1277
  Create a _count_inneighbor relationship constrained to the subset of nodes
1102
- in `nodes_subset`. Note this relationship is not cached; it is
1278
+ in `node_subset`. Note this relationship is not cached; it is
1103
1279
  specific to the callsite.
1104
1280
  """
1105
- return self._create_count_inneighbor_relationship(nodes_subset=nodes_subset)
1281
+ return self._create_count_inneighbor_relationship(node_subset=node_subset)
1106
1282
 
1107
- def _create_count_inneighbor_relationship(self, *, nodes_subset: Optional[Relationship]):
1283
+ def _create_count_inneighbor_relationship(self, *, node_subset: Optional[Relationship]):
1108
1284
  _count_inneighbor_rel = self._model.Relationship(f"{{dst:{self._NodeConceptStr}}} has inneighbor count {{count:Integer}}")
1109
1285
 
1110
1286
  # Choose the appropriate inneighbor relationship based on whether we have constraints
1111
- if nodes_subset is None:
1287
+ if node_subset is None:
1112
1288
  # No constraint - use cached inneighbor relationship
1113
1289
  inneighbor_rel = self._inneighbor
1114
1290
  else:
1115
1291
  # Constrained to nodes in the subset - use constrained inneighbor relationship
1116
- inneighbor_rel = self._inneighbor_of(nodes_subset)
1292
+ inneighbor_rel = self._inneighbor_of(node_subset)
1117
1293
 
1118
1294
  # Apply the same counting logic for both cases
1119
1295
  dst, src = self.Node.ref(), self.Node.ref()
@@ -1124,26 +1300,26 @@ class Graph():
1124
1300
  @cached_property
1125
1301
  def _count_outneighbor(self):
1126
1302
  """Lazily define and cache the self._count_outneighbor relationship."""
1127
- return self._create_count_outneighbor_relationship(nodes_subset=None)
1303
+ return self._create_count_outneighbor_relationship(node_subset=None)
1128
1304
 
1129
- def _count_outneighbor_of(self, nodes_subset: Relationship):
1305
+ def _count_outneighbor_of(self, node_subset: Relationship):
1130
1306
  """
1131
1307
  Create a _count_outneighbor relationship constrained to the subset of nodes
1132
- in `nodes_subset`. Note this relationship is not cached; it is
1308
+ in `node_subset`. Note this relationship is not cached; it is
1133
1309
  specific to the callsite.
1134
1310
  """
1135
- return self._create_count_outneighbor_relationship(nodes_subset=nodes_subset)
1311
+ return self._create_count_outneighbor_relationship(node_subset=node_subset)
1136
1312
 
1137
- def _create_count_outneighbor_relationship(self, *, nodes_subset: Optional[Relationship]):
1313
+ def _create_count_outneighbor_relationship(self, *, node_subset: Optional[Relationship]):
1138
1314
  _count_outneighbor_rel = self._model.Relationship(f"{{src:{self._NodeConceptStr}}} has outneighbor count {{count:Integer}}")
1139
1315
 
1140
1316
  # Choose the appropriate outneighbor relationship based on whether we have constraints
1141
- if nodes_subset is None:
1317
+ if node_subset is None:
1142
1318
  # No constraint - use cached outneighbor relationship
1143
1319
  outneighbor_rel = self._outneighbor
1144
1320
  else:
1145
1321
  # Constrained to nodes in the subset - use constrained outneighbor relationship
1146
- outneighbor_rel = self._outneighbor_of(nodes_subset)
1322
+ outneighbor_rel = self._outneighbor_of(node_subset)
1147
1323
 
1148
1324
  # Apply the same counting logic for both cases
1149
1325
  src, dst = self.Node.ref(), self.Node.ref()
@@ -1250,6 +1426,8 @@ class Graph():
1250
1426
  def _num_nodes(self):
1251
1427
  """Lazily define and cache the self._num_nodes relationship."""
1252
1428
  _num_nodes_rel = self._model.Relationship("The graph has {num_nodes:Integer} nodes")
1429
+ _num_nodes_rel.annotate(annotations.track("graphs", "num_nodes"))
1430
+
1253
1431
  define(_num_nodes_rel(count(self.Node) | 0))
1254
1432
  return _num_nodes_rel
1255
1433
 
@@ -1316,6 +1494,7 @@ class Graph():
1316
1494
  def _num_edges(self):
1317
1495
  """Lazily define and cache the self._num_edges relationship."""
1318
1496
  _num_edges_rel = self._model.Relationship("The graph has {num_edges:Integer} edges")
1497
+ _num_edges_rel.annotate(annotations.track("graphs", "num_edges"))
1319
1498
 
1320
1499
  src, dst = self.Node.ref(), self.Node.ref()
1321
1500
  if self.directed:
@@ -1433,52 +1612,27 @@ class Graph():
1433
1612
  return self._neighbor
1434
1613
  else:
1435
1614
  # Validate the 'of' parameter
1436
- self._validate_node_subset_parameter(of)
1615
+ self._validate_node_subset_parameter('of', of)
1437
1616
  return self._neighbor_of(of)
1438
1617
 
1439
- def _validate_node_subset_parameter(self, of_relation):
1440
- """
1441
- Validate that a parameter identifying a subset of nodes of interest is
1442
- is a unary relationship containing nodes that is attached to
1443
- the same model that the graph is attached to.
1444
- """
1445
- # Validate that the parameter is a relationship.
1446
- assert isinstance(of_relation, Relationship), (
1447
- "The 'of' parameter must be a `Relationship`, "
1448
- f"but is a `{type(of_relation).__name__}`."
1449
- )
1450
-
1451
- # Validate that the relationship is attached to the same model as the graph.
1452
- assert of_relation._model is self._model, (
1453
- "The given 'of' relationship must be attached to the same model as the graph."
1454
- )
1455
-
1456
- # Validate that it's a unary relationship (has exactly one field).
1457
- assert len(of_relation._fields) == 1, (
1458
- "The 'of' parameter must be a unary relationship, "
1459
- f"but it has {len(of_relation._fields)} fields."
1460
- )
1461
-
1462
- # Validate that the concept type matches the graph's Node concept.
1463
- assert of_relation._fields[0].type_str == self.Node._name, (
1464
- f"The 'of' relationship must be over the graph's Node concept ('{self.Node._name}'), "
1465
- f"but is over '{of_relation._fields[0].type_str}'."
1466
- )
1467
-
1468
1618
  @cached_property
1469
1619
  def _neighbor(self):
1470
1620
  """Lazily define and cache the self._neighbor relationship."""
1471
- return self._create_neighbor_relationship(nodes_subset=None)
1621
+ _neighbor_rel = self._create_neighbor_relationship(node_subset=None)
1622
+ _neighbor_rel.annotate(annotations.track("graphs", "neighbor"))
1623
+ return _neighbor_rel
1472
1624
 
1473
- def _neighbor_of(self, nodes_subset: Relationship):
1625
+ def _neighbor_of(self, node_subset: Relationship):
1474
1626
  """
1475
1627
  Create a neighbor relationship constrained to the subset of nodes
1476
- in `nodes_subset`. Note this relationship is not cached; it is
1628
+ in `node_subset`. Note this relationship is not cached; it is
1477
1629
  specific to the callsite.
1478
1630
  """
1479
- return self._create_neighbor_relationship(nodes_subset=nodes_subset)
1631
+ _neighbor_rel = self._create_neighbor_relationship(node_subset=node_subset)
1632
+ _neighbor_rel.annotate(annotations.track("graphs", "neighbor_of"))
1633
+ return _neighbor_rel
1480
1634
 
1481
- def _create_neighbor_relationship(self, *, nodes_subset: Optional[Relationship]):
1635
+ def _create_neighbor_relationship(self, *, node_subset: Optional[Relationship]):
1482
1636
  _neighbor_rel = self._model.Relationship(f"{{src:{self._NodeConceptStr}}} has neighbor {{dst:{self._NodeConceptStr}}}")
1483
1637
  src, dst = self.Node.ref(), self.Node.ref()
1484
1638
 
@@ -1489,14 +1643,14 @@ class Graph():
1489
1643
  # Capture out-neighbors.
1490
1644
  where(
1491
1645
  self._edge(src, dst),
1492
- *([nodes_subset(src)] if nodes_subset else [])
1646
+ *([node_subset(src)] if node_subset else [])
1493
1647
  ).define(
1494
1648
  _neighbor_rel(src, dst)
1495
1649
  )
1496
1650
  # Capture in-neighbors.
1497
1651
  where(
1498
1652
  self._edge(src, dst),
1499
- *([nodes_subset(dst)] if nodes_subset else [])
1653
+ *([node_subset(dst)] if node_subset else [])
1500
1654
  ).define(
1501
1655
  _neighbor_rel(dst, src)
1502
1656
  )
@@ -1505,7 +1659,7 @@ class Graph():
1505
1659
  # so a single rule suffices to capture all neighbors.
1506
1660
  where(
1507
1661
  self._edge(src, dst),
1508
- *([nodes_subset(src)] if nodes_subset else [])
1662
+ *([node_subset(src)] if node_subset else [])
1509
1663
  ).define(
1510
1664
  _neighbor_rel(src, dst)
1511
1665
  )
@@ -1620,23 +1774,27 @@ class Graph():
1620
1774
  return self._inneighbor
1621
1775
  else:
1622
1776
  # Validate the 'of' parameter
1623
- self._validate_node_subset_parameter(of)
1777
+ self._validate_node_subset_parameter('of', of)
1624
1778
  return self._inneighbor_of(of)
1625
1779
 
1626
1780
  @cached_property
1627
1781
  def _inneighbor(self):
1628
1782
  """Lazily define and cache the self._inneighbor relationship."""
1629
- return self._create_inneighbor_relationship(nodes_subset=None)
1783
+ _inneighbor_rel = self._create_inneighbor_relationship(node_subset=None)
1784
+ _inneighbor_rel.annotate(annotations.track("graphs", "inneighbor"))
1785
+ return _inneighbor_rel
1630
1786
 
1631
- def _inneighbor_of(self, nodes_subset: Relationship):
1787
+ def _inneighbor_of(self, node_subset: Relationship):
1632
1788
  """
1633
1789
  Create an inneighbor relationship constrained to the subset of nodes
1634
- in `nodes_subset`. Note this relationship is not cached; it is
1790
+ in `node_subset`. Note this relationship is not cached; it is
1635
1791
  specific to the callsite.
1636
1792
  """
1637
- return self._create_inneighbor_relationship(nodes_subset=nodes_subset)
1793
+ _inneighbor_rel = self._create_inneighbor_relationship(node_subset=node_subset)
1794
+ _inneighbor_rel.annotate(annotations.track("graphs", "inneighbor_of"))
1795
+ return _inneighbor_rel
1638
1796
 
1639
- def _create_inneighbor_relationship(self, *, nodes_subset: Optional[Relationship]):
1797
+ def _create_inneighbor_relationship(self, *, node_subset: Optional[Relationship]):
1640
1798
  _inneighbor_rel = self._model.Relationship(f"{{dst:{self._NodeConceptStr}}} has inneighbor {{src:{self._NodeConceptStr}}}")
1641
1799
  src, dst = self.Node.ref(), self.Node.ref()
1642
1800
 
@@ -1645,7 +1803,7 @@ class Graph():
1645
1803
  # have an edge to the destination nodes in our subset.
1646
1804
  where(
1647
1805
  self._edge(src, dst),
1648
- *([nodes_subset(dst)] if nodes_subset else [])
1806
+ *([node_subset(dst)] if node_subset else [])
1649
1807
  ).define(
1650
1808
  _inneighbor_rel(dst, src)
1651
1809
  )
@@ -1654,7 +1812,7 @@ class Graph():
1654
1812
  # so neighbors and in-neighbors are the same.
1655
1813
  where(
1656
1814
  self._edge(src, dst),
1657
- *([nodes_subset(dst)] if nodes_subset else [])
1815
+ *([node_subset(dst)] if node_subset else [])
1658
1816
  ).define(
1659
1817
  _inneighbor_rel(dst, src)
1660
1818
  )
@@ -1771,23 +1929,27 @@ class Graph():
1771
1929
  return self._outneighbor
1772
1930
  else:
1773
1931
  # Validate the 'of' parameter
1774
- self._validate_node_subset_parameter(of)
1932
+ self._validate_node_subset_parameter('of', of)
1775
1933
  return self._outneighbor_of(of)
1776
1934
 
1777
1935
  @cached_property
1778
1936
  def _outneighbor(self):
1779
1937
  """Lazily define and cache the self._outneighbor relationship."""
1780
- return self._create_outneighbor_relationship(nodes_subset=None)
1938
+ _outneighbor_rel = self._create_outneighbor_relationship(node_subset=None)
1939
+ _outneighbor_rel.annotate(annotations.track("graphs", "outneighbor"))
1940
+ return _outneighbor_rel
1781
1941
 
1782
- def _outneighbor_of(self, nodes_subset: Relationship):
1942
+ def _outneighbor_of(self, node_subset: Relationship):
1783
1943
  """
1784
1944
  Create an outneighbor relationship constrained to the subset of nodes
1785
- in `nodes_subset`. Note this relationship is not cached; it is
1945
+ in `node_subset`. Note this relationship is not cached; it is
1786
1946
  specific to the callsite.
1787
1947
  """
1788
- return self._create_outneighbor_relationship(nodes_subset=nodes_subset)
1948
+ _outneighbor_rel = self._create_outneighbor_relationship(node_subset=node_subset)
1949
+ _outneighbor_rel.annotate(annotations.track("graphs", "outneighbor_of"))
1950
+ return _outneighbor_rel
1789
1951
 
1790
- def _create_outneighbor_relationship(self, *, nodes_subset: Optional[Relationship]):
1952
+ def _create_outneighbor_relationship(self, *, node_subset: Optional[Relationship]):
1791
1953
  _outneighbor_rel = self._model.Relationship(f"{{src:{self._NodeConceptStr}}} has outneighbor {{dst:{self._NodeConceptStr}}}")
1792
1954
  src, dst = self.Node.ref(), self.Node.ref()
1793
1955
 
@@ -1796,7 +1958,7 @@ class Graph():
1796
1958
  # have an edge from the source nodes in our subset.
1797
1959
  where(
1798
1960
  self._edge(src, dst),
1799
- *([nodes_subset(src)] if nodes_subset else [])
1961
+ *([node_subset(src)] if node_subset else [])
1800
1962
  ).define(
1801
1963
  _outneighbor_rel(src, dst)
1802
1964
  )
@@ -1805,7 +1967,7 @@ class Graph():
1805
1967
  # so neighbors and out-neighbors are the same.
1806
1968
  where(
1807
1969
  self._edge(src, dst),
1808
- *([nodes_subset(src)] if nodes_subset else [])
1970
+ *([node_subset(src)] if node_subset else [])
1809
1971
  ).define(
1810
1972
  _outneighbor_rel(src, dst)
1811
1973
  )
@@ -1814,18 +1976,67 @@ class Graph():
1814
1976
 
1815
1977
 
1816
1978
  @include_in_docs
1817
- def common_neighbor(self):
1818
- """Returns a ternary relationship of all common neighbor triplets.
1979
+ def common_neighbor(self,
1980
+ *,
1981
+ full: Optional[bool] = None,
1982
+ from_: Optional[Relationship] = None,
1983
+ to: Optional[Relationship] = None,
1984
+ between: Optional[Relationship] = None,
1985
+ ):
1986
+ """Returns a ternary relationship of common neighbor triplets.
1819
1987
 
1820
1988
  A node `w` is a common neighbor of a pair of nodes `u` and `v` if
1821
1989
  `w` is a neighbor of both `u` and `v`.
1822
1990
 
1991
+ Parameters
1992
+ ----------
1993
+ full : bool, optional
1994
+ If ``True``, computes common neighbors for all pairs of nodes in
1995
+ the graph. This computation can be expensive for large graphs, as the
1996
+ result can scale quadratically in the number of edges or cubically in
1997
+ the number of nodes. Mutually exclusive with other parameters.
1998
+ Default is ``None``.
1999
+ from_ : Relationship, optional
2000
+ A unary relationship containing a subset of the graph's nodes. When
2001
+ provided, constrains the domain of the common neighbor computation: only
2002
+ common neighbors of node pairs where the first node is in this relationship
2003
+ are computed and returned. Mutually exclusive with ``full`` and ``between``.
2004
+ Default is ``None``.
2005
+ to : Relationship, optional
2006
+ A unary relationship containing a subset of the graph's nodes. Can only
2007
+ be used together with the ``from_`` parameter. When provided with ``from_``,
2008
+ constrains the domain of the common neighbor computation: only common
2009
+ neighbors of node pairs where the first node is in ``from_`` and the
2010
+ second node is in ``to`` are computed and returned.
2011
+ Default is ``None``.
2012
+ between : Relationship, optional
2013
+ A binary relationship containing pairs of nodes. When provided,
2014
+ constrains the domain of the common neighbor computation: only common
2015
+ neighbors for the specific node pairs in this relationship are computed
2016
+ and returned. Mutually exclusive with other parameters.
2017
+ Default is ``None``.
2018
+
1823
2019
  Returns
1824
2020
  -------
1825
2021
  Relationship
1826
2022
  A ternary relationship where each tuple represents a pair of nodes
1827
2023
  and one of their common neighbors.
1828
2024
 
2025
+ Raises
2026
+ ------
2027
+ ValueError
2028
+ If ``full`` is provided with any other parameter.
2029
+ If ``between`` is provided with any other parameter.
2030
+ If ``from_`` is provided with any parameter other than ``to``.
2031
+ If none of ``full``, ``from_``, or ``between`` is provided.
2032
+ If ``full`` is not ``True`` or ``None``.
2033
+ AssertionError
2034
+ If ``from_``, ``to``, or ``between`` is not a ``Relationship``.
2035
+ If ``from_``, ``to``, or ``between`` is not attached to the same model as the graph.
2036
+ If ``from_``, ``to``, or ``between`` does not contain the graph's ``Node`` concept.
2037
+ If ``from_`` or ``to`` is not a unary relationship.
2038
+ If ``between`` is not a binary relationship.
2039
+
1829
2040
  Relationship Schema
1830
2041
  -------------------
1831
2042
  ``common_neighbor(node_u, node_v, common_neighbor_node)``
@@ -1842,6 +2053,37 @@ class Graph():
1842
2053
  | Directed | Yes | |
1843
2054
  | Weighted | Yes | Weights are ignored. |
1844
2055
 
2056
+ Notes
2057
+ -----
2058
+ The ``common_neighbor(full=True)`` method computes and caches the full common
2059
+ neighbor relationship for all pairs of nodes, providing efficient reuse across
2060
+ multiple calls. This can be expensive as the result can contain O(|E|²) or
2061
+ O(|V|³) tuples depending on graph density.
2062
+
2063
+ Calling ``common_neighbor()`` without arguments raises a ``ValueError``,
2064
+ to ensure awareness and explicit acknowledgement (``full=True``) of this cost.
2065
+
2066
+ In contrast, ``common_neighbor(from_=subset)`` constrains the computation to
2067
+ tuples with the first position in the passed-in ``subset``. The result is
2068
+ not cached; it is specific to the call site. When a significant fraction of
2069
+ the common neighbor relation is needed across a program, ``common_neighbor(full=True)``
2070
+ is typically more efficient. Use ``common_neighbor(from_=subset)`` only
2071
+ when small subsets of the common neighbor relationship are needed
2072
+ collectively across the program.
2073
+
2074
+ The ``to`` parameter can be used together with ``from_`` to further
2075
+ constrain the computation: ``common_neighbor(from_=subset_a, to=subset_b)``
2076
+ computes common neighbors only for node pairs where the first node is in
2077
+ ``subset_a`` and the second node is in ``subset_b``. (Since ``common_neighbor``
2078
+ is symmetric in its first two positions, using ``to`` without ``from_`` would
2079
+ be functionally redundant, and is not allowed.)
2080
+
2081
+ The ``between`` parameter provides another way to constrain the computation:
2082
+ Unlike ``from_`` and ``to``, which allow you to independently constrain the first
2083
+ and second positions in ``common_neighbor`` tuples to sets of nodes, ``between``
2084
+ allows you to constrain the first and second positions, jointly, to specific pairs
2085
+ of nodes.
2086
+
1845
2087
  Examples
1846
2088
  --------
1847
2089
  >>> from relationalai.semantics import Model, define, select
@@ -1865,7 +2107,7 @@ class Graph():
1865
2107
  >>>
1866
2108
  >>> # 3. Select the IDs from the common_neighbor relationship and inspect
1867
2109
  >>> u, v, w = Node.ref("u"), Node.ref("v"), Node.ref("w")
1868
- >>> common_neighbor = graph.common_neighbor()
2110
+ >>> common_neighbor = graph.common_neighbor(full=True)
1869
2111
  >>> select(
1870
2112
  ... u.id, v.id, w.id
1871
2113
  ... ).where(
@@ -1897,25 +2139,230 @@ class Graph():
1897
2139
  21 4 4 2
1898
2140
  22 4 4 3
1899
2141
 
2142
+ >>> # 4. Use 'from_' parameter to constrain the set of nodes to compute common neighbors for
2143
+ >>> # Define a subset containing only node 1
2144
+ >>> subset = model.Relationship(f"{{node:{Node}}} is in subset")
2145
+ >>> node = Node.ref()
2146
+ >>> where(node.id == 1).define(subset(node))
2147
+ >>>
2148
+ >>> # Get common neighbors only for pairs where first node is in subset
2149
+ >>> constrained_common_neighbor = graph.common_neighbor(from_=subset)
2150
+ >>> select(u.id, v.id, w.id).where(constrained_common_neighbor(u, v, w)).inspect()
2151
+ ▰▰▰▰ Setup complete
2152
+ id id2 id3
2153
+ 0 1 1 2
2154
+ 1 1 3 2
2155
+ 2 1 4 2
2156
+
2157
+ >>> # 5. Use both 'from_' and 'to' parameters to constrain the first two positions
2158
+ >>> subset_a = model.Relationship(f"{{node:{Node}}} is in subset_a")
2159
+ >>> subset_b = model.Relationship(f"{{node:{Node}}} is in subset_b")
2160
+ >>> where(node.id == 1).define(subset_a(node))
2161
+ >>> where(node.id == 3).define(subset_b(node))
2162
+ >>>
2163
+ >>> # Get common neighbors only where the first node is in subset_a and the second node is in subset_b
2164
+ >>> constrained_common_neighbor = graph.common_neighbor(from_=subset_a, to=subset_b)
2165
+ >>> select(u.id, v.id, w.id).where(constrained_common_neighbor(u, v, w)).inspect()
2166
+ ▰▰▰▰ Setup complete
2167
+ id id2 id3
2168
+ 0 1 3 2
2169
+
2170
+ >>> # 6. Use 'between' parameter to constrain to specific pairs of nodes
2171
+ >>> pairs = model.Relationship(f"{{node_a:{Node}}} and {{node_b:{Node}}} are a pair")
2172
+ >>> node_a, node_b = Node.ref(), Node.ref()
2173
+ >>> where(node_a.id == 1, node_b.id == 3).define(pairs(node_a, node_b))
2174
+ >>> where(node_a.id == 2, node_b.id == 4).define(pairs(node_a, node_b))
2175
+ >>>
2176
+ >>> # Get common neighbors only for the specific pairs (1, 3) and (2, 4)
2177
+ >>> constrained_common_neighbor = graph.common_neighbor(between=pairs)
2178
+ >>> select(u.id, v.id, w.id).where(constrained_common_neighbor(u, v, w)).inspect()
2179
+ ▰▰▰▰ Setup complete
2180
+ id id2 id3
2181
+ 0 1 3 2
2182
+ 1 2 4 3
2183
+
1900
2184
  """
1901
- warnings.warn(
1902
- (
1903
- "`common_neighbor` presently always computes common neighbors "
1904
- "for all pairs of nodes in the graph. To provide better control "
1905
- "over the computed subset, `common_neighbor`'s interface "
1906
- "will soon need to change."
1907
- ),
1908
- FutureWarning,
1909
- stacklevel=2
2185
+ # Validate domain constraint parameters.
2186
+ self._validate_domain_constraint_parameters(
2187
+ 'common_neighbor', full, from_, to, between
1910
2188
  )
2189
+
2190
+ # At this point, exactly one of `full`, `from_`, or `between`
2191
+ # has been provided, and if `to` is provided, `from_` is also provided.
2192
+
2193
+ # Handle `between`.
2194
+ if between is not None:
2195
+ self._validate_pair_subset_parameter(between)
2196
+ return self._common_neighbor_between(between)
2197
+
2198
+ # Handle `from_` (and potentially `to`).
2199
+ if from_ is not None:
2200
+ self._validate_node_subset_parameter('from_', from_)
2201
+ if to is not None:
2202
+ self._validate_node_subset_parameter('to', to)
2203
+ return self._common_neighbor_from_to(from_, to)
2204
+ return self._common_neighbor_from(from_)
2205
+
2206
+ # Handle `full`.
1911
2207
  return self._common_neighbor
1912
2208
 
1913
2209
  @cached_property
1914
2210
  def _common_neighbor(self):
1915
- """Lazily define and cache the self._common_neighbor relationship."""
1916
- _common_neighbor_rel = self._model.Relationship(f"{{node_a:{self._NodeConceptStr}}} and {{node_b:{self._NodeConceptStr}}} have common neighbor {{node_c:{self._NodeConceptStr}}}")
1917
- node_a, node_b, node_c = self.Node.ref(), self.Node.ref(), self.Node.ref()
1918
- where(self._neighbor(node_a, node_c), self._neighbor(node_b, node_c)).define(_common_neighbor_rel(node_a, node_b, node_c))
2211
+ """Lazily define and cache the full common_neighbor relationship."""
2212
+ _common_neighbor_rel = self._create_common_neighbor_relationship()
2213
+ _common_neighbor_rel.annotate(annotations.track("graphs", "common_neighbor"))
2214
+ return _common_neighbor_rel
2215
+
2216
+ def _common_neighbor_from(self, node_subset_from: Relationship):
2217
+ """
2218
+ Create a common_neighbor relationship, with the first position in each
2219
+ tuple constrained to be in the given subset of nodes. Note this relationship
2220
+ is not cached; it is specific to the callsite.
2221
+ """
2222
+ _common_neighbor_rel = self._create_common_neighbor_relationship(
2223
+ node_subset_from=node_subset_from
2224
+ )
2225
+ _common_neighbor_rel.annotate(annotations.track("graphs", "common_neighbor_from"))
2226
+ return _common_neighbor_rel
2227
+
2228
+ def _common_neighbor_from_to(self, node_subset_from: Relationship, node_subset_to: Relationship):
2229
+ """
2230
+ Create a common_neighbor relationship, with the first position in each
2231
+ tuple constrained to be in `node_subset_from`, and the second position in
2232
+ each tuple constrained to be in `node_subset_to`. Note this relationship
2233
+ is not cached; it is specific to the callsite.
2234
+ """
2235
+ _common_neighbor_rel = self._create_common_neighbor_relationship(
2236
+ node_subset_from=node_subset_from,
2237
+ node_subset_to=node_subset_to
2238
+ )
2239
+ _common_neighbor_rel.annotate(annotations.track("graphs", "common_neighbor_from_to"))
2240
+ return _common_neighbor_rel
2241
+
2242
+ def _common_neighbor_between(self, pair_subset: Relationship):
2243
+ """
2244
+ Create a common_neighbor relationship, with the first and second position
2245
+ in each tuple jointly constrained to be in the given set of pairs
2246
+ of nodes. Note this relationship is not cached;
2247
+ it is specific to the callsite.
2248
+ """
2249
+ _common_neighbor_rel = self._create_common_neighbor_relationship(
2250
+ pair_subset_between=pair_subset
2251
+ )
2252
+ _common_neighbor_rel.annotate(annotations.track("graphs", "common_neighbor_between"))
2253
+ return _common_neighbor_rel
2254
+
2255
+ def _create_common_neighbor_relationship(
2256
+ self,
2257
+ *,
2258
+ node_subset_from: Optional[Relationship] = None,
2259
+ node_subset_to: Optional[Relationship] = None,
2260
+ pair_subset_between: Optional[Relationship] = None,
2261
+ ):
2262
+ """
2263
+ Create common_neighbor relationship, optionally constrained by the provided
2264
+ node subsets or pair subset.
2265
+ """
2266
+ _common_neighbor_rel = self._model.Relationship(
2267
+ f"{{node_a:{self._NodeConceptStr}}} and {{node_b:{self._NodeConceptStr}}} "
2268
+ f"have common neighbor {{neighbor_node:{self._NodeConceptStr}}}"
2269
+ )
2270
+ node_a, node_b, neighbor_node = self.Node.ref(), self.Node.ref(), self.Node.ref()
2271
+
2272
+ # Handle the `between` case.
2273
+ if pair_subset_between is not None:
2274
+ # Extract all nodes that appear in any position of the pairs relationship
2275
+ # into a unary relation that we can use to constrain the neighbor computation.
2276
+ nodes_in_pairs = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} is in pairs subset")
2277
+ node_x, node_y = self.Node.ref(), self.Node.ref()
2278
+ where(
2279
+ pair_subset_between(node_x, node_y)
2280
+ ).define(
2281
+ nodes_in_pairs(node_x),
2282
+ nodes_in_pairs(node_y)
2283
+ )
2284
+
2285
+ # Create a neighbor relation constrained to the nodes that appear in the pairs.
2286
+ neighbor_rel = self._neighbor_of(nodes_in_pairs)
2287
+ neighbor_a_rel = neighbor_rel
2288
+ neighbor_b_rel = neighbor_rel
2289
+
2290
+ # The constraint fragment ensures we only compute common neighbors for the
2291
+ # specific pairs provided, not for all combinations of nodes in those pairs.
2292
+ node_constraint = [pair_subset_between(node_a, node_b)]
2293
+
2294
+ # Handle the `from_` case.
2295
+ elif node_subset_from is not None and node_subset_to is None:
2296
+ # Note that in this case we must compute all of `_neighbor` anyway,
2297
+ # as the second position in each tuple is unconstrained. Given that,
2298
+ # computing `_neighbor_of` for `node_subset_from` to constrain the
2299
+ # first position that way would be less efficient than using
2300
+ # `_neighbor` and joining the relevant variable with `node_subset_from`.
2301
+ neighbor_a_rel = self._neighbor
2302
+ neighbor_b_rel = self._neighbor
2303
+ node_constraint = [node_subset_from(node_a)]
2304
+ # TODO: Nice observation from @rygao: We can instead implement this
2305
+ # as a depth-2 traversal starting from `node_subset_from`. Candidate code:
2306
+
2307
+ # neighbor_a_rel = self._neighbor_of(node_subset_from)
2308
+ #
2309
+ # domain_w = Relationship(f"{{node:{self._NodeConceptStr}}} is the domain of `w` in `common_neighbor(u, v, w)`")
2310
+ # node_x, node_y = graph.Node.ref(), graph.Node.ref()
2311
+ # where(neighbor_a_rel(node_x, node_y)).define(domain_w(node_y))
2312
+ # neighbor_b_rel = self._neighbor_of(domain_w)
2313
+ #
2314
+ # node_constraint = []
2315
+ #
2316
+ # # need to reverse the args of `neighbor_b_rel()`, due to its domain constraint
2317
+ # # relies on the symmetry of `neighbor`
2318
+ # where(
2319
+ # *node_constraint,
2320
+ # neighbor_a_rel(node_a, neighbor_node),
2321
+ # neighbor_b_rel(neighbor_node, node_b)
2322
+ # ).define(_common_neighbor_rel(node_a, node_b, neighbor_node))
2323
+
2324
+ # Handle the `from_`/`to` case.
2325
+ elif node_subset_from is not None and node_subset_to is not None:
2326
+ # There are two cases:
2327
+ #
2328
+ # NOTE: For both of the following branches, spiritually we are applying
2329
+ # `node_constraint = [node_subset_from(node_a), node_subset_to(node_b)]`,
2330
+ # but these are already enforced by the use of the constrained
2331
+ # `_neighbor_of` relationships, so we don't need to include them
2332
+ # again in `node_constraint`.
2333
+ if node_subset_from is node_subset_to:
2334
+ # If `node_subset_from` and `node_subset_to` are object-identical,
2335
+ # we can compute `_neighbor_of` once, use it for both positions,
2336
+ # and apply no further constraint.
2337
+ neighbor_rel = self._neighbor_of(node_subset_from)
2338
+ neighbor_a_rel = neighbor_rel
2339
+ neighbor_b_rel = neighbor_rel
2340
+ node_constraint = []
2341
+ else:
2342
+ # Otherwise, we have two options: 1) compute `_neighbor_of` twice,
2343
+ # once for each node subset; or 2) compute `_neighbor` once, over
2344
+ # the union of both subsets, and apply constraints to each position.
2345
+ # Which of these is more efficient depends on the detailed nature
2346
+ # of the subsets, which we don't have knowledge of here. Here
2347
+ # we choose the simpler/cleaner of the two options (1) as such:
2348
+ neighbor_a_rel = self._neighbor_of(node_subset_from)
2349
+ neighbor_b_rel = self._neighbor_of(node_subset_to)
2350
+ node_constraint = []
2351
+
2352
+ # Handle the `full` case.
2353
+ else:
2354
+ neighbor_a_rel = self._neighbor
2355
+ neighbor_b_rel = self._neighbor
2356
+ node_constraint = []
2357
+
2358
+ # Define the common neighbor relationship using the neighbor relations and
2359
+ # constraints determined above. This logic is shared across all constraint types.
2360
+ where(
2361
+ *node_constraint,
2362
+ neighbor_a_rel(node_a, neighbor_node),
2363
+ neighbor_b_rel(node_b, neighbor_node)
2364
+ ).define(_common_neighbor_rel(node_a, node_b, neighbor_node))
2365
+
1919
2366
  return _common_neighbor_rel
1920
2367
 
1921
2368
 
@@ -2066,33 +2513,37 @@ class Graph():
2066
2513
  return self._degree
2067
2514
  else:
2068
2515
  # Validate the 'of' parameter
2069
- self._validate_node_subset_parameter(of)
2516
+ self._validate_node_subset_parameter('of', of)
2070
2517
  return self._degree_of(of)
2071
2518
 
2072
2519
  @cached_property
2073
2520
  def _degree(self):
2074
2521
  """Lazily define and cache the self._degree relationship."""
2075
- return self._create_degree_relationship(nodes_subset=None)
2522
+ _degree_rel = self._create_degree_relationship(node_subset=None)
2523
+ _degree_rel.annotate(annotations.track("graphs", "degree"))
2524
+ return _degree_rel
2076
2525
 
2077
- def _degree_of(self, nodes_subset: Relationship):
2526
+ def _degree_of(self, node_subset: Relationship):
2078
2527
  """
2079
2528
  Create a degree relationship constrained to the subset of nodes
2080
- in `nodes_subset`. Note this relationship is not cached; it is
2529
+ in `node_subset`. Note this relationship is not cached; it is
2081
2530
  specific to the callsite.
2082
2531
  """
2083
- return self._create_degree_relationship(nodes_subset=nodes_subset)
2532
+ _degree_rel = self._create_degree_relationship(node_subset=node_subset)
2533
+ _degree_rel.annotate(annotations.track("graphs", "degree_of"))
2534
+ return _degree_rel
2084
2535
 
2085
- def _create_degree_relationship(self, *, nodes_subset: Optional[Relationship]):
2536
+ def _create_degree_relationship(self, *, node_subset: Optional[Relationship]):
2086
2537
  _degree_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has degree {{count:Integer}}")
2087
2538
 
2088
2539
  if self.directed:
2089
2540
  # For directed graphs, degree is the sum of indegree and outdegree.
2090
- if nodes_subset is None:
2541
+ if node_subset is None:
2091
2542
  indegree_rel = self._indegree
2092
2543
  outdegree_rel = self._outdegree
2093
2544
  else:
2094
- indegree_rel = self._indegree_of(nodes_subset)
2095
- outdegree_rel = self._outdegree_of(nodes_subset)
2545
+ indegree_rel = self._indegree_of(node_subset)
2546
+ outdegree_rel = self._outdegree_of(node_subset)
2096
2547
 
2097
2548
  incount, outcount = Integer.ref(), Integer.ref()
2098
2549
  where(
@@ -2101,12 +2552,12 @@ class Graph():
2101
2552
  ).define(_degree_rel(self.Node, incount + outcount))
2102
2553
  else:
2103
2554
  # For undirected graphs, degree is the count of neighbors.
2104
- if nodes_subset is None:
2555
+ if node_subset is None:
2105
2556
  node_set = self.Node
2106
2557
  count_neighbor_rel = self._count_neighbor
2107
2558
  else:
2108
- node_set = nodes_subset
2109
- count_neighbor_rel = self._count_neighbor_of(nodes_subset)
2559
+ node_set = node_subset
2560
+ count_neighbor_rel = self._count_neighbor_of(node_subset)
2110
2561
 
2111
2562
  where(
2112
2563
  node_set(self.Node), # Necessary given the match on the following line.
@@ -2257,34 +2708,38 @@ class Graph():
2257
2708
  return self._indegree
2258
2709
  else:
2259
2710
  # Validate the 'of' parameter
2260
- self._validate_node_subset_parameter(of)
2711
+ self._validate_node_subset_parameter('of', of)
2261
2712
  return self._indegree_of(of)
2262
2713
 
2263
2714
  @cached_property
2264
2715
  def _indegree(self):
2265
2716
  """Lazily define and cache the self._indegree relationship."""
2266
- return self._create_indegree_relationship(nodes_subset=None)
2717
+ _indegree_rel = self._create_indegree_relationship(node_subset=None)
2718
+ _indegree_rel.annotate(annotations.track("graphs", "indegree"))
2719
+ return _indegree_rel
2267
2720
 
2268
- def _indegree_of(self, nodes_subset: Relationship):
2721
+ def _indegree_of(self, node_subset: Relationship):
2269
2722
  """
2270
2723
  Create an indegree relationship constrained to the subset of nodes
2271
- in `nodes_subset`. Note this relationship is not cached; it is
2724
+ in `node_subset`. Note this relationship is not cached; it is
2272
2725
  specific to the callsite.
2273
2726
  """
2274
- return self._create_indegree_relationship(nodes_subset=nodes_subset)
2727
+ _indegree_rel = self._create_indegree_relationship(node_subset=node_subset)
2728
+ _indegree_rel.annotate(annotations.track("graphs", "indegree_of"))
2729
+ return _indegree_rel
2275
2730
 
2276
- def _create_indegree_relationship(self, *, nodes_subset: Optional[Relationship]):
2731
+ def _create_indegree_relationship(self, *, node_subset: Optional[Relationship]):
2277
2732
  _indegree_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has indegree {{count:Integer}}")
2278
2733
 
2279
2734
  # Choose the appropriate count_inneighbor relationship and node set
2280
- if nodes_subset is None:
2735
+ if node_subset is None:
2281
2736
  # No constraint - use cached count_inneighbor relationship and all nodes
2282
2737
  count_inneighbor_rel = self._count_inneighbor
2283
2738
  node_set = self.Node
2284
2739
  else:
2285
2740
  # Constrained to nodes in the subset - use constrained count_inneighbor relationship
2286
- count_inneighbor_rel = self._count_inneighbor_of(nodes_subset)
2287
- node_set = nodes_subset
2741
+ count_inneighbor_rel = self._count_inneighbor_of(node_subset)
2742
+ node_set = node_subset
2288
2743
 
2289
2744
  # Apply the same indegree logic for both cases
2290
2745
  where(
@@ -2437,34 +2892,38 @@ class Graph():
2437
2892
  return self._outdegree
2438
2893
  else:
2439
2894
  # Validate the 'of' parameter
2440
- self._validate_node_subset_parameter(of)
2895
+ self._validate_node_subset_parameter('of', of)
2441
2896
  return self._outdegree_of(of)
2442
2897
 
2443
2898
  @cached_property
2444
2899
  def _outdegree(self):
2445
2900
  """Lazily define and cache the self._outdegree relationship."""
2446
- return self._create_outdegree_relationship(nodes_subset=None)
2901
+ _outdegree_rel = self._create_outdegree_relationship(node_subset=None)
2902
+ _outdegree_rel.annotate(annotations.track("graphs", "outdegree"))
2903
+ return _outdegree_rel
2447
2904
 
2448
- def _outdegree_of(self, nodes_subset: Relationship):
2905
+ def _outdegree_of(self, node_subset: Relationship):
2449
2906
  """
2450
2907
  Create an outdegree relationship constrained to the subset of nodes
2451
- in `nodes_subset`. Note this relationship is not cached; it is
2908
+ in `node_subset`. Note this relationship is not cached; it is
2452
2909
  specific to the callsite.
2453
2910
  """
2454
- return self._create_outdegree_relationship(nodes_subset=nodes_subset)
2911
+ _outdegree_rel = self._create_outdegree_relationship(node_subset=node_subset)
2912
+ _outdegree_rel.annotate(annotations.track("graphs", "outdegree_of"))
2913
+ return _outdegree_rel
2455
2914
 
2456
- def _create_outdegree_relationship(self, *, nodes_subset: Optional[Relationship]):
2915
+ def _create_outdegree_relationship(self, *, node_subset: Optional[Relationship]):
2457
2916
  _outdegree_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has outdegree {{count:Integer}}")
2458
2917
 
2459
2918
  # Choose the appropriate count_outneighbor relationship and node set
2460
- if nodes_subset is None:
2919
+ if node_subset is None:
2461
2920
  # No constraint - use cached count_outneighbor relationship and all nodes
2462
2921
  count_outneighbor_rel = self._count_outneighbor
2463
2922
  node_set = self.Node
2464
2923
  else:
2465
2924
  # Constrained to nodes in the subset - use constrained count_outneighbor relationship
2466
- count_outneighbor_rel = self._count_outneighbor_of(nodes_subset)
2467
- node_set = nodes_subset
2925
+ count_outneighbor_rel = self._count_outneighbor_of(node_subset)
2926
+ node_set = node_subset
2468
2927
 
2469
2928
  # Apply the same outdegree logic for both cases
2470
2929
  where(
@@ -2582,33 +3041,37 @@ class Graph():
2582
3041
  return self._weighted_degree
2583
3042
  else:
2584
3043
  # Validate the 'of' parameter
2585
- self._validate_node_subset_parameter(of)
3044
+ self._validate_node_subset_parameter('of', of)
2586
3045
  return self._weighted_degree_of(of)
2587
3046
 
2588
3047
  @cached_property
2589
3048
  def _weighted_degree(self):
2590
3049
  """Lazily define and cache the self._weighted_degree relationship."""
2591
- return self._create_weighted_degree_relationship(nodes_subset=None)
3050
+ _weighted_degree_rel = self._create_weighted_degree_relationship(node_subset=None)
3051
+ _weighted_degree_rel.annotate(annotations.track("graphs", "weighted_degree"))
3052
+ return _weighted_degree_rel
2592
3053
 
2593
- def _weighted_degree_of(self, nodes_subset: Relationship):
3054
+ def _weighted_degree_of(self, node_subset: Relationship):
2594
3055
  """
2595
3056
  Create a weighted degree relationship constrained to the subset of nodes
2596
- in `nodes_subset`. Note this relationship is not cached; it is
3057
+ in `node_subset`. Note this relationship is not cached; it is
2597
3058
  specific to the callsite.
2598
3059
  """
2599
- return self._create_weighted_degree_relationship(nodes_subset=nodes_subset)
3060
+ _weighted_degree_rel = self._create_weighted_degree_relationship(node_subset=node_subset)
3061
+ _weighted_degree_rel.annotate(annotations.track("graphs", "weighted_degree_of"))
3062
+ return _weighted_degree_rel
2600
3063
 
2601
- def _create_weighted_degree_relationship(self, *, nodes_subset: Optional[Relationship]):
3064
+ def _create_weighted_degree_relationship(self, *, node_subset: Optional[Relationship]):
2602
3065
  _weighted_degree_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has weighted degree {{weight:Float}}")
2603
3066
 
2604
3067
  if self.directed:
2605
3068
  # For directed graphs, weighted degree is the sum of weighted indegree and weighted outdegree.
2606
- if nodes_subset is None:
3069
+ if node_subset is None:
2607
3070
  weighted_indegree_rel = self._weighted_indegree
2608
3071
  weighted_outdegree_rel = self._weighted_outdegree
2609
3072
  else:
2610
- weighted_indegree_rel = self._weighted_indegree_of(nodes_subset)
2611
- weighted_outdegree_rel = self._weighted_outdegree_of(nodes_subset)
3073
+ weighted_indegree_rel = self._weighted_indegree_of(node_subset)
3074
+ weighted_outdegree_rel = self._weighted_outdegree_of(node_subset)
2612
3075
 
2613
3076
  inweight, outweight = Float.ref(), Float.ref()
2614
3077
  where(
@@ -2617,12 +3080,12 @@ class Graph():
2617
3080
  ).define(_weighted_degree_rel(self.Node, inweight + outweight))
2618
3081
  elif not self.directed:
2619
3082
  # Choose the appropriate node set
2620
- if nodes_subset is None:
3083
+ if node_subset is None:
2621
3084
  # No constraint - use all nodes
2622
3085
  node_set = self.Node
2623
3086
  else:
2624
3087
  # Constrained to nodes in the subset
2625
- node_set = nodes_subset
3088
+ node_set = node_subset
2626
3089
 
2627
3090
  dst, weight = self.Node.ref(), Float.ref()
2628
3091
  where(
@@ -2738,32 +3201,36 @@ class Graph():
2738
3201
  return self._weighted_indegree
2739
3202
  else:
2740
3203
  # Validate the 'of' parameter
2741
- self._validate_node_subset_parameter(of)
3204
+ self._validate_node_subset_parameter('of', of)
2742
3205
  return self._weighted_indegree_of(of)
2743
3206
 
2744
3207
  @cached_property
2745
3208
  def _weighted_indegree(self):
2746
3209
  """Lazily define and cache the self._weighted_indegree relationship."""
2747
- return self._create_weighted_indegree_relationship(nodes_subset=None)
3210
+ _weighted_indegree_rel = self._create_weighted_indegree_relationship(node_subset=None)
3211
+ _weighted_indegree_rel.annotate(annotations.track("graphs", "weighted_indegree"))
3212
+ return _weighted_indegree_rel
2748
3213
 
2749
- def _weighted_indegree_of(self, nodes_subset: Relationship):
3214
+ def _weighted_indegree_of(self, node_subset: Relationship):
2750
3215
  """
2751
3216
  Create a weighted indegree relationship constrained to the subset of nodes
2752
- in `nodes_subset`. Note this relationship is not cached; it is
3217
+ in `node_subset`. Note this relationship is not cached; it is
2753
3218
  specific to the callsite.
2754
3219
  """
2755
- return self._create_weighted_indegree_relationship(nodes_subset=nodes_subset)
3220
+ _weighted_indegree_rel = self._create_weighted_indegree_relationship(node_subset=node_subset)
3221
+ _weighted_indegree_rel.annotate(annotations.track("graphs", "weighted_indegree_of"))
3222
+ return _weighted_indegree_rel
2756
3223
 
2757
- def _create_weighted_indegree_relationship(self, *, nodes_subset: Optional[Relationship]):
3224
+ def _create_weighted_indegree_relationship(self, *, node_subset: Optional[Relationship]):
2758
3225
  _weighted_indegree_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has weighted indegree {{weight:Float}}")
2759
3226
 
2760
3227
  # Choose the appropriate node set
2761
- if nodes_subset is None:
3228
+ if node_subset is None:
2762
3229
  # No constraint - use all nodes
2763
3230
  node_set = self.Node
2764
3231
  else:
2765
3232
  # Constrained to nodes in the subset
2766
- node_set = nodes_subset
3233
+ node_set = node_subset
2767
3234
  # TODO: In a future cleanup pass, replace `node_set` with a `node_constraint`
2768
3235
  # that replaces the `node_set(self.Node)` in the where clause below,
2769
3236
  # and generates only `self.Node` (rather than `self.Node(self.Node)`)
@@ -2886,32 +3353,36 @@ class Graph():
2886
3353
  return self._weighted_outdegree
2887
3354
  else:
2888
3355
  # Validate the 'of' parameter
2889
- self._validate_node_subset_parameter(of)
3356
+ self._validate_node_subset_parameter('of', of)
2890
3357
  return self._weighted_outdegree_of(of)
2891
3358
 
2892
3359
  @cached_property
2893
3360
  def _weighted_outdegree(self):
2894
3361
  """Lazily define and cache the self._weighted_outdegree relationship."""
2895
- return self._create_weighted_outdegree_relationship(nodes_subset=None)
3362
+ _weighted_outdegree_rel = self._create_weighted_outdegree_relationship(node_subset=None)
3363
+ _weighted_outdegree_rel.annotate(annotations.track("graphs", "weighted_outdegree"))
3364
+ return _weighted_outdegree_rel
2896
3365
 
2897
- def _weighted_outdegree_of(self, nodes_subset: Relationship):
3366
+ def _weighted_outdegree_of(self, node_subset: Relationship):
2898
3367
  """
2899
3368
  Create a weighted outdegree relationship constrained to the subset of nodes
2900
- in `nodes_subset`. Note this relationship is not cached; it is
3369
+ in `node_subset`. Note this relationship is not cached; it is
2901
3370
  specific to the callsite.
2902
3371
  """
2903
- return self._create_weighted_outdegree_relationship(nodes_subset=nodes_subset)
3372
+ _weighted_outdegree_rel = self._create_weighted_outdegree_relationship(node_subset=node_subset)
3373
+ _weighted_outdegree_rel.annotate(annotations.track("graphs", "weighted_outdegree_of"))
3374
+ return _weighted_outdegree_rel
2904
3375
 
2905
- def _create_weighted_outdegree_relationship(self, *, nodes_subset: Optional[Relationship]):
3376
+ def _create_weighted_outdegree_relationship(self, *, node_subset: Optional[Relationship]):
2906
3377
  _weighted_outdegree_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has weighted outdegree {{weight:Float}}")
2907
3378
 
2908
3379
  # Choose the appropriate node set
2909
- if nodes_subset is None:
3380
+ if node_subset is None:
2910
3381
  # No constraint - use all nodes
2911
3382
  node_set = self.Node
2912
3383
  else:
2913
3384
  # Constrained to nodes in the subset
2914
- node_set = nodes_subset
3385
+ node_set = node_subset
2915
3386
 
2916
3387
  # Apply the weighted outdegree logic for both cases
2917
3388
  dst, outweight = self.Node.ref(), Float.ref()
@@ -3061,32 +3532,36 @@ class Graph():
3061
3532
  return self._degree_centrality
3062
3533
  else:
3063
3534
  # Validate the 'of' parameter
3064
- self._validate_node_subset_parameter(of)
3535
+ self._validate_node_subset_parameter('of', of)
3065
3536
  return self._degree_centrality_of(of)
3066
3537
 
3067
3538
  @cached_property
3068
3539
  def _degree_centrality(self):
3069
3540
  """Lazily define and cache the self._degree_centrality relationship."""
3070
- return self._create_degree_centrality_relationship(nodes_subset=None)
3541
+ _degree_centrality_rel = self._create_degree_centrality_relationship(node_subset=None)
3542
+ _degree_centrality_rel.annotate(annotations.track("graphs", "degree_centrality"))
3543
+ return _degree_centrality_rel
3071
3544
 
3072
- def _degree_centrality_of(self, nodes_subset: Relationship):
3545
+ def _degree_centrality_of(self, node_subset: Relationship):
3073
3546
  """
3074
3547
  Create a degree centrality relationship constrained to the subset of nodes
3075
- in `nodes_subset`. Note this relationship is not cached; it is
3548
+ in `node_subset`. Note this relationship is not cached; it is
3076
3549
  specific to the callsite.
3077
3550
  """
3078
- return self._create_degree_centrality_relationship(nodes_subset=nodes_subset)
3551
+ _degree_centrality_rel = self._create_degree_centrality_relationship(node_subset=node_subset)
3552
+ _degree_centrality_rel.annotate(annotations.track("graphs", "degree_centrality_of"))
3553
+ return _degree_centrality_rel
3079
3554
 
3080
- def _create_degree_centrality_relationship(self, *, nodes_subset: Optional[Relationship]):
3555
+ def _create_degree_centrality_relationship(self, *, node_subset: Optional[Relationship]):
3081
3556
  """Create a degree centrality relationship, optionally constrained to a subset of nodes."""
3082
3557
  _degree_centrality_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has {{degree_centrality:Float}}")
3083
3558
 
3084
- if nodes_subset is None:
3559
+ if node_subset is None:
3085
3560
  degree_rel = self._degree
3086
3561
  node_constraint = [] # No constraint on nodes.
3087
3562
  else:
3088
- degree_rel = self._degree_of(nodes_subset)
3089
- node_constraint = [nodes_subset(self.Node)] # Nodes constrained to given subset.
3563
+ degree_rel = self._degree_of(node_subset)
3564
+ node_constraint = [node_subset(self.Node)] # Nodes constrained to given subset.
3090
3565
 
3091
3566
  degree = Integer.ref()
3092
3567
 
@@ -3108,10 +3583,10 @@ class Graph():
3108
3583
  # General case, i.e. with more than one node.
3109
3584
  if self.weighted:
3110
3585
  maybe_weighted_degree = Float.ref()
3111
- if nodes_subset is None:
3586
+ if node_subset is None:
3112
3587
  maybe_weighted_degree_rel = self._weighted_degree
3113
3588
  else:
3114
- maybe_weighted_degree_rel = self._weighted_degree_of(nodes_subset)
3589
+ maybe_weighted_degree_rel = self._weighted_degree_of(node_subset)
3115
3590
  else: # not self.weighted
3116
3591
  maybe_weighted_degree = Integer.ref()
3117
3592
  maybe_weighted_degree_rel = degree_rel
@@ -3572,6 +4047,7 @@ class Graph():
3572
4047
  def _triangle(self):
3573
4048
  """Lazily define and cache the self._triangle relationship."""
3574
4049
  _triangle_rel = self._model.Relationship(f"{{node_a:{self._NodeConceptStr}}} and {{node_b:{self._NodeConceptStr}}} and {{node_c:{self._NodeConceptStr}}} form a triangle")
4050
+ _triangle_rel.annotate(annotations.track("graphs", "triangle"))
3575
4051
 
3576
4052
  a, b, c = self.Node.ref(), self.Node.ref(), self.Node.ref()
3577
4053
 
@@ -3714,6 +4190,7 @@ class Graph():
3714
4190
  def _unique_triangle(self):
3715
4191
  """Lazily define and cache the self._unique_triangle relationship."""
3716
4192
  _unique_triangle_rel = self._model.Relationship(f"{{node_a:{self._NodeConceptStr}}} and {{node_b:{self._NodeConceptStr}}} and {{node_c:{self._NodeConceptStr}}} form unique triangle")
4193
+ _unique_triangle_rel.annotate(annotations.track("graphs", "unique_triangle"))
3717
4194
 
3718
4195
  node_a, node_b, node_c = self.Node.ref(), self.Node.ref(), self.Node.ref()
3719
4196
 
@@ -3849,6 +4326,7 @@ class Graph():
3849
4326
  def _num_triangles(self):
3850
4327
  """Lazily define and cache the self._num_triangles relationship."""
3851
4328
  _num_triangles_rel = self._model.Relationship("The graph has {num_triangles:Integer} triangles")
4329
+ _num_triangles_rel.annotate(annotations.track("graphs", "num_triangles"))
3852
4330
 
3853
4331
  _num_triangles = Integer.ref()
3854
4332
  node_a, node_b, node_c = self.Node.ref(), self.Node.ref(), self.Node.ref()
@@ -3966,31 +4444,35 @@ class Graph():
3966
4444
 
3967
4445
  """
3968
4446
  if of is not None:
3969
- self._validate_node_subset_parameter(of)
4447
+ self._validate_node_subset_parameter('of', of)
3970
4448
  return self._triangle_count_of(of)
3971
4449
  return self._triangle_count
3972
4450
 
3973
4451
  @cached_property
3974
4452
  def _triangle_count(self):
3975
4453
  """Lazily define and cache the self._triangle_count relationship."""
3976
- return self._create_triangle_count_relationship(nodes_subset=None)
4454
+ _triangle_count_rel = self._create_triangle_count_relationship(node_subset=None)
4455
+ _triangle_count_rel.annotate(annotations.track("graphs", "triangle_count"))
4456
+ return _triangle_count_rel
3977
4457
 
3978
- def _triangle_count_of(self, nodes_subset: Relationship):
4458
+ def _triangle_count_of(self, node_subset: Relationship):
3979
4459
  """
3980
4460
  Create a triangle count relationship constrained to the subset of nodes
3981
- in `nodes_subset`. Note this relationship is not cached; it is
4461
+ in `node_subset`. Note this relationship is not cached; it is
3982
4462
  specific to the callsite.
3983
4463
  """
3984
- return self._create_triangle_count_relationship(nodes_subset=nodes_subset)
4464
+ _triangle_count_rel = self._create_triangle_count_relationship(node_subset=node_subset)
4465
+ _triangle_count_rel.annotate(annotations.track("graphs", "triangle_count_of"))
4466
+ return _triangle_count_rel
3985
4467
 
3986
- def _create_triangle_count_relationship(self, *, nodes_subset: Optional[Relationship]):
4468
+ def _create_triangle_count_relationship(self, *, node_subset: Optional[Relationship]):
3987
4469
  """Create a triangle count relationship, optionally constrained to a subset of nodes."""
3988
4470
  _triangle_count_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} belongs to {{count:Integer}} triangles")
3989
4471
 
3990
- if nodes_subset is None:
4472
+ if node_subset is None:
3991
4473
  node_constraint = self.Node # No constraint on nodes.
3992
4474
  else:
3993
- node_constraint = nodes_subset(self.Node) # Nodes constrained to given subset.
4475
+ node_constraint = node_subset(self.Node) # Nodes constrained to given subset.
3994
4476
 
3995
4477
  where(
3996
4478
  node_constraint,
@@ -4113,7 +4595,7 @@ class Graph():
4113
4595
 
4114
4596
 
4115
4597
  @include_in_docs
4116
- def local_clustering_coefficient(self):
4598
+ def local_clustering_coefficient(self, *, of: Optional[Relationship] = None):
4117
4599
  """Returns a binary relationship containing the local clustering coefficient of each node.
4118
4600
 
4119
4601
  The local clustering coefficient quantifies how close a node's neighbors
@@ -4122,6 +4604,14 @@ class Graph():
4122
4604
  directly connecting them, and 1.0 indicates all neighbors have edges
4123
4605
  directly connecting them.
4124
4606
 
4607
+ Parameters
4608
+ ----------
4609
+ of : Relationship, optional
4610
+ A unary relationship containing a subset of the graph's nodes. When
4611
+ provided, constrains the domain of the local clustering coefficient
4612
+ computation: only coefficients of nodes in this relationship are
4613
+ computed and returned.
4614
+
4125
4615
  Returns
4126
4616
  -------
4127
4617
  Relationship
@@ -4148,17 +4638,6 @@ class Graph():
4148
4638
  | Directed | No | Undirected only. |
4149
4639
  | Weighted | Yes | Weights are ignored. |
4150
4640
 
4151
- Notes
4152
- -----
4153
- The formal definition of the local clustering coefficient (`C`) for a
4154
- node (`v`) can be given as::
4155
-
4156
- C(v) = (2 * num_edges) / (degree(v) * (degree(v) - 1))
4157
-
4158
- Here, `num_edges` represents the number of edges between the
4159
- neighbors of node `v`, and `degree(v)` represents the degree of the
4160
- node, i.e., the number of edges connected to the node.
4161
-
4162
4641
  Examples
4163
4642
  --------
4164
4643
  >>> from relationalai.semantics import Model, define, select, Float
@@ -4194,6 +4673,41 @@ class Graph():
4194
4673
  3 4 0.333333
4195
4674
  4 5 0.000000
4196
4675
 
4676
+ >>> # 4. Use 'of' parameter to constrain the set of nodes to compute local clustering coefficients of
4677
+ >>> # Define a subset containing only nodes 1 and 3
4678
+ >>> subset = model.Relationship(f"{{node:{Node}}} is in subset")
4679
+ >>> node = Node.ref()
4680
+ >>> where(union(node.id == 1, node.id == 3)).define(subset(node))
4681
+ >>>
4682
+ >>> # Get local clustering coefficients only of nodes in the subset
4683
+ >>> constrained_lcc = graph.local_clustering_coefficient(of=subset)
4684
+ >>> select(node.id, coeff).where(constrained_lcc(node, coeff)).inspect()
4685
+ ▰▰▰▰ Setup complete
4686
+ id coeff
4687
+ 0 1 1.000000
4688
+ 1 3 0.666667
4689
+
4690
+ Notes
4691
+ -----
4692
+ The local clustering coefficient for node `v` is::
4693
+
4694
+ (2 * num_neighbor_edges(v)) / (ext_degree(v) * (ext_degree(v) - 1))
4695
+
4696
+ where `num_neighbor_edges(v)` is the number of edges between
4697
+ the neighbors of node `v`, and `ext_degree(v)` is the degree of the
4698
+ node excluding self-loops. If `ext_degree(v)` is less than 2,
4699
+ the local clustering coefficient is 0.0.
4700
+
4701
+ The ``local_clustering_coefficient()`` method, called with no parameters, computes
4702
+ and caches the full local clustering coefficient relationship, providing efficient
4703
+ reuse across multiple calls to ``local_clustering_coefficient()``. In contrast,
4704
+ ``local_clustering_coefficient(of=subset)`` computes a constrained relationship
4705
+ specific to the passed-in ``subset`` and that call site. When a significant fraction
4706
+ of the local clustering coefficient relation is needed across a program,
4707
+ ``local_clustering_coefficient()`` is typically more efficient; this is the typical
4708
+ case. Use ``local_clustering_coefficient(of=subset)`` only when small subsets of the
4709
+ local clustering coefficient relationship are needed collectively across the program.
4710
+
4197
4711
 
4198
4712
  See Also
4199
4713
  --------
@@ -4206,29 +4720,51 @@ class Graph():
4206
4720
  raise NotImplementedError(
4207
4721
  "`local_clustering_coefficient` is not applicable to directed graphs"
4208
4722
  )
4723
+
4724
+ if of is not None:
4725
+ self._validate_node_subset_parameter('of', of)
4726
+ return self._local_clustering_coefficient_of(of)
4209
4727
  return self._local_clustering_coefficient
4210
4728
 
4211
4729
  @cached_property
4212
4730
  def _local_clustering_coefficient(self):
4731
+ """Lazily define and cache the self._local_clustering_coefficient relationship."""
4732
+ _local_clustering_coefficient_rel = self._create_local_clustering_coefficient_relationship(node_subset=None)
4733
+ _local_clustering_coefficient_rel.annotate(annotations.track("graphs", "local_clustering_coefficient"))
4734
+ return _local_clustering_coefficient_rel
4735
+
4736
+ def _local_clustering_coefficient_of(self, node_subset: Relationship):
4213
4737
  """
4214
- Lazily define and cache the self._local_clustering_coefficient relationship,
4215
- which only applies to undirected graphs.
4738
+ Create a local clustering coefficient relationship constrained to the subset of nodes
4739
+ in `node_subset`. Note this relationship is not cached; it is
4740
+ specific to the callsite.
4216
4741
  """
4217
- _local_clustering_coefficient_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has local clustering coefficient {{coefficient:Float}}")
4742
+ _local_clustering_coefficient_rel = self._create_local_clustering_coefficient_relationship(node_subset=node_subset)
4743
+ _local_clustering_coefficient_rel.annotate(annotations.track("graphs", "local_clustering_coefficient_of"))
4744
+ return _local_clustering_coefficient_rel
4218
4745
 
4219
- if self.directed:
4220
- raise NotImplementedError(
4221
- "`local_clustering_coefficient is not defined for directed graphs."
4222
- )
4746
+ def _create_local_clustering_coefficient_relationship(self, *, node_subset: Optional[Relationship]):
4747
+ """Create a local clustering coefficient relationship, optionally constrained to a subset of nodes."""
4748
+ _local_clustering_coefficient_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has local clustering coefficient {{coefficient:Float}}")
4223
4749
 
4224
4750
  node = self.Node.ref()
4751
+
4752
+ if node_subset is None:
4753
+ degree_no_self_rel = self._degree_no_self
4754
+ triangle_count_rel = self._triangle_count
4755
+ node_constraint = node # No constraint on nodes.
4756
+ else:
4757
+ degree_no_self_rel = self._degree_no_self_of(node_subset)
4758
+ triangle_count_rel = self._triangle_count_of(node_subset)
4759
+ node_constraint = node_subset(node) # Nodes constrained to given subset.
4760
+
4225
4761
  degree_no_self = Integer.ref()
4226
4762
  triangle_count = Integer.ref()
4227
4763
  where(
4228
- node,
4764
+ node_constraint,
4229
4765
  _lcc := where(
4230
- self._degree_no_self(node, degree_no_self),
4231
- self._triangle_count(node, triangle_count),
4766
+ degree_no_self_rel(node, degree_no_self),
4767
+ triangle_count_rel(node, triangle_count),
4232
4768
  degree_no_self > 1
4233
4769
  ).select(
4234
4770
  2.0 * triangle_count / (degree_no_self * (degree_no_self - 1.0))
@@ -4243,11 +4779,32 @@ class Graph():
4243
4779
  Lazily define and cache the self._degree_no_self relationship,
4244
4780
  a non-public helper for local_clustering_coefficient.
4245
4781
  """
4782
+ return self._create_degree_no_self_relationship(node_subset=None)
4783
+
4784
+ def _degree_no_self_of(self, node_subset: Relationship):
4785
+ """
4786
+ Create a self-loop-exclusive degree relationship constrained to
4787
+ the subset of nodes in `node_subset`. Note this relationship
4788
+ is not cached; it is specific to the callsite.
4789
+ """
4790
+ return self._create_degree_no_self_relationship(node_subset=node_subset)
4791
+
4792
+ def _create_degree_no_self_relationship(self, *, node_subset: Optional[Relationship]):
4793
+ """
4794
+ Create a self-loop-exclusive degree relationship,
4795
+ optionally constrained to a subset of nodes.
4796
+ """
4246
4797
  _degree_no_self_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has degree excluding self loops {{num:Integer}}")
4247
4798
 
4248
4799
  node, neighbor = self.Node.ref(), self.Node.ref()
4800
+
4801
+ if node_subset is None:
4802
+ node_constraint = node # No constraint on nodes.
4803
+ else:
4804
+ node_constraint = node_subset(node) # Nodes constrained to given subset.
4805
+
4249
4806
  where(
4250
- self.Node(node),
4807
+ node_constraint,
4251
4808
  _dns := count(neighbor).per(node).where(self._no_loop_edge(node, neighbor)) | 0,
4252
4809
  ).define(_degree_no_self_rel(node, _dns))
4253
4810
 
@@ -4331,6 +4888,7 @@ class Graph():
4331
4888
  which only applies to undirected graphs.
4332
4889
  """
4333
4890
  _average_clustering_coefficient_rel = self._model.Relationship("The graph has average clustering coefficient {{coefficient:Float}}")
4891
+ _average_clustering_coefficient_rel.annotate(annotations.track("graphs", "average_clustering_coefficient"))
4334
4892
 
4335
4893
  if self.directed:
4336
4894
  raise NotImplementedError(
@@ -4471,6 +5029,7 @@ class Graph():
4471
5029
  def _reachable_from(self):
4472
5030
  """Lazily define and cache the self._reachable_from relationship."""
4473
5031
  _reachable_from_rel = self._model.Relationship(f"{{node_a:{self._NodeConceptStr}}} reaches {{node_b:{self._NodeConceptStr}}}")
5032
+ _reachable_from_rel.annotate(annotations.track("graphs", "reachable_from"))
4474
5033
 
4475
5034
  node_a, node_b, node_c = self.Node.ref(), self.Node.ref(), self.Node.ref()
4476
5035
  define(_reachable_from_rel(node_a, node_a))
@@ -4613,9 +5172,12 @@ class Graph():
4613
5172
  def _distance(self):
4614
5173
  """Lazily define and cache the self._distance relationship."""
4615
5174
  if not self.weighted:
4616
- return self._distance_non_weighted
5175
+ _distance_rel = self._distance_non_weighted
4617
5176
  else:
4618
- return self._distance_weighted
5177
+ _distance_rel = self._distance_weighted
5178
+
5179
+ _distance_rel.annotate(annotations.track("graphs", "distance"))
5180
+ return _distance_rel
4619
5181
 
4620
5182
  @cached_property
4621
5183
  def _distance_weighted(self):
@@ -4741,6 +5303,7 @@ class Graph():
4741
5303
  def _weakly_connected_component(self):
4742
5304
  """Lazily define and cache the self._weakly_connected_component relationship."""
4743
5305
  _weakly_connected_component_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} is in the connected component {{id:{self._NodeConceptStr}}}")
5306
+ _weakly_connected_component_rel.annotate(annotations.track("graphs", "weakly_connected_component"))
4744
5307
 
4745
5308
  node, node_v, component = self.Node.ref(), self.Node.ref(), self.Node.ref()
4746
5309
  node, component = union(
@@ -4864,6 +5427,8 @@ class Graph():
4864
5427
  """
4865
5428
  _diameter_range_min_rel = self._model.Relationship("The graph has a min diameter range of {value:Integer}")
4866
5429
  _diameter_range_max_rel = self._model.Relationship("The graph has a max diameter range of {value:Integer}")
5430
+ _diameter_range_min_rel.annotate(annotations.track("graphs", "diameter_range_min"))
5431
+ _diameter_range_max_rel.annotate(annotations.track("graphs", "diameter_range_max"))
4867
5432
 
4868
5433
  component_node_pairs = self._model.Relationship(f"component id {{cid:{self._NodeConceptStr}}} has node id {{nid:{self._NodeConceptStr}}}")
4869
5434
  nodeid, cid, degreevalue = self.Node.ref(), self.Node.ref(), Integer.ref()
@@ -4924,16 +5489,22 @@ class Graph():
4924
5489
 
4925
5490
  @include_in_docs
4926
5491
  def is_connected(self):
4927
- """Returns a query fragment that is satisfied if the graph is connected.
5492
+ """Returns a unary relationship containing whether the graph is connected.
4928
5493
 
4929
5494
  A graph is considered connected if every node is reachable from every
4930
5495
  other node in the underlying undirected graph.
4931
5496
 
4932
5497
  Returns
4933
5498
  -------
4934
- Fragment
4935
- A query fragment that can be used as a condition in other
4936
- queries to assert that the graph is connected.
5499
+ Relationship
5500
+ A unary relationship containing a boolean indicator of whether the graph
5501
+ is connected.
5502
+
5503
+ Relationship Schema
5504
+ -------------------
5505
+ ``is_connected(connected)``
5506
+
5507
+ * **connected** (*Boolean*): Whether the graph is connected.
4937
5508
 
4938
5509
  Supported Graph Types
4939
5510
  ---------------------
@@ -4951,8 +5522,6 @@ class Graph():
4951
5522
  --------
4952
5523
  **Connected Graph Example**
4953
5524
 
4954
- The following query will produce a result because the graph is connected.
4955
-
4956
5525
  >>> from relationalai.semantics import Model, define, select
4957
5526
  >>> from relationalai.semantics.reasoners.graph import Graph
4958
5527
  >>>
@@ -4970,17 +5539,14 @@ class Graph():
4970
5539
  ... Edge.new(src=n4, dst=n3),
4971
5540
  ... )
4972
5541
  >>>
4973
- >>> # 3. Use the fragment as a condition in a query
4974
- >>> select("Graph is connected").where(graph.is_connected()).inspect()
5542
+ >>> # 3. Select and inspect the relation
5543
+ >>> select(graph.is_connected()).inspect()
4975
5544
  ▰▰▰▰ Setup complete
4976
- v
4977
- 0 Graph is connected
5545
+ is_connected
5546
+ 0 True
4978
5547
 
4979
5548
  **Disconnected Graph Example**
4980
5549
 
4981
- The following query will produce no results because the graph is not
4982
- connected.
4983
-
4984
5550
  >>> from relationalai.semantics import Model, define, select
4985
5551
  >>> from relationalai.semantics.reasoners.graph import Graph
4986
5552
  >>>
@@ -4998,22 +5564,31 @@ class Graph():
4998
5564
  ... Edge.new(src=n4, dst=n5), # This edge creates a separate component
4999
5565
  ... )
5000
5566
  >>>
5001
- >>> # 3. The conditional query produces no output
5002
- >>> select("Graph is connected").where(graph.is_connected()).inspect()
5567
+ >>> # 3. Select and inspect the relation
5568
+ >>> select(graph.is_connected()).inspect()
5003
5569
  ▰▰▰▰ Setup complete
5004
- Empty DataFrame
5005
- Columns: []
5006
- Index: []
5570
+ is_connected
5571
+ 0 False
5007
5572
 
5008
5573
  """
5009
- # TODO (dba) This method is inconsistent with the other,
5010
- # public methods. It does not return a `Relationship`. Revisit
5011
- # this. See GH thread:
5012
- # https://github.com/RelationalAI/relationalai-python/pull/2077#discussion_r2190538074
5013
- return where(
5574
+ return self._is_connected
5575
+
5576
+ @cached_property
5577
+ def _is_connected(self):
5578
+ """Lazily define and cache the self._is_connected relationship."""
5579
+ _is_connected_rel = self._model.Relationship("'The graph is connected' is {is_connected:Boolean}")
5580
+ _is_connected_rel.annotate(annotations.track("graphs", "is_connected"))
5581
+
5582
+ where(
5014
5583
  self._num_nodes(0) |
5015
5584
  count(self._reachable_from_min_node(self.Node.ref())) == self._num_nodes(Integer.ref())
5016
- )
5585
+ ).define(_is_connected_rel(True))
5586
+
5587
+ where(
5588
+ not_(_is_connected_rel(True))
5589
+ ).define(_is_connected_rel(False))
5590
+
5591
+ return _is_connected_rel
5017
5592
 
5018
5593
 
5019
5594
  @include_in_docs
@@ -5179,6 +5754,7 @@ class Graph():
5179
5754
  def _jaccard_similarity(self):
5180
5755
  """Lazily define and cache the self._jaccard_similarity relationship."""
5181
5756
  _jaccard_similarity_rel = self._model.Relationship(f"{{node_u:{self._NodeConceptStr}}} has a similarity to {{node_v:{self._NodeConceptStr}}} of {{similarity:Float}}")
5757
+ _jaccard_similarity_rel.annotate(annotations.track("graphs", "jaccard_similarity"))
5182
5758
 
5183
5759
  if not self.weighted:
5184
5760
  node_u, node_v = self.Node.ref(), self.Node.ref()
@@ -5270,19 +5846,72 @@ class Graph():
5270
5846
 
5271
5847
 
5272
5848
  @include_in_docs
5273
- def cosine_similarity(self):
5274
- """Returns a ternary relationship containing the cosine similarity for all pairs of nodes.
5849
+ def cosine_similarity(
5850
+ self,
5851
+ *,
5852
+ full: Optional[bool] = None,
5853
+ from_: Optional[Relationship] = None,
5854
+ to: Optional[Relationship] = None,
5855
+ between: Optional[Relationship] = None,
5856
+ ):
5857
+ """Returns a ternary relationship containing
5858
+ the cosine similarity for pairs of nodes.
5275
5859
 
5276
5860
  The cosine similarity measures the similarity between two nodes based
5277
5861
  on the angle between their neighborhood vectors. The score ranges from
5278
5862
  0.0 to 1.0, inclusive, where 1.0 indicates identical sets of neighbors.
5279
5863
 
5864
+ Parameters
5865
+ ----------
5866
+ full : bool, optional
5867
+ If ``True``, computes the cosine similarity for all pairs
5868
+ of nodes in the graph. This computation can be expensive for large graphs,
5869
+ as the result can scale quadratically in the number of nodes. Mutually exclusive
5870
+ with other parameters.
5871
+ Default is ``None``.
5872
+ from_ : Relationship, optional
5873
+ A unary relationship containing a subset of the graph's nodes. When
5874
+ provided, constrains the domain of the cosine similarity computation: only
5875
+ cosine similarity scores for node pairs where the first node is
5876
+ in this relationship are computed and returned. Mutually exclusive with
5877
+ ``full`` and ``between``.
5878
+ Default is ``None``.
5879
+ to : Relationship, optional
5880
+ A unary relationship containing a subset of the graph's nodes. Can only
5881
+ be used together with the ``from_`` parameter. When provided with ``from_``,
5882
+ constrains the domain of the cosine similarity computation: only
5883
+ cosine similarity scores for node pairs where the first node is
5884
+ in ``from_`` and the second node is in ``to`` are computed and returned.
5885
+ Default is ``None``.
5886
+ between : Relationship, optional
5887
+ A binary relationship containing pairs of nodes. When provided,
5888
+ constrains the domain of the cosine similarity computation: only
5889
+ cosine similarity scores for the specific node pairs in
5890
+ this relationship are computed and returned. Mutually exclusive
5891
+ with other parameters.
5892
+ Default is ``None``.
5893
+
5280
5894
  Returns
5281
5895
  -------
5282
5896
  Relationship
5283
5897
  A ternary relationship where each tuple represents a pair of nodes
5284
5898
  and their cosine similarity.
5285
5899
 
5900
+ Raises
5901
+ ------
5902
+ ValueError
5903
+ If ``full`` is provided with any other parameter.
5904
+ If ``between`` is provided with any other parameter.
5905
+ If ``from_`` is provided with any parameter other than ``to``.
5906
+ If none of ``full``, ``from_``, or ``between`` is provided.
5907
+ If ``full`` is not ``True`` or ``None``.
5908
+ AssertionError
5909
+ If ``from_``, ``to``, or ``between`` is not a ``Relationship``.
5910
+ If ``from_``, ``to``, or ``between`` is not attached to the same model as the graph.
5911
+ If ``from_``, ``to``, or ``between`` does not contain the graph's ``Node`` concept.
5912
+ If ``from_`` or ``to`` is not a unary relationship.
5913
+ If ``between`` is not a binary relationship.
5914
+
5286
5915
  Relationship Schema
5287
5916
  -------------------
5288
5917
  ``cosine_similarity(node_u, node_v, score)``
@@ -5315,6 +5944,36 @@ class Graph():
5315
5944
  vectors contain only non-negative elements. Therefore, the cosine
5316
5945
  similarity score is always between 0.0 and 1.0, inclusive.
5317
5946
 
5947
+ The ``cosine_similarity(full=True)`` method computes and caches
5948
+ the full cosine similarity relationship for all pairs of nodes,
5949
+ providing efficient reuse across multiple calls. This can be expensive
5950
+ as the result can contain O(|V|²) tuples.
5951
+
5952
+ Calling ``cosine_similarity()`` without arguments raises a ``ValueError``,
5953
+ to ensure awareness and explicit acknowledgement (``full=True``) of this cost.
5954
+
5955
+ In contrast, ``cosine_similarity(from_=subset)`` constrains the computation to
5956
+ tuples with the first position in the passed-in ``subset``. The result is
5957
+ not cached; it is specific to the call site. When a significant fraction of
5958
+ the cosine similarity relation is needed across a program,
5959
+ ``cosine_similarity(full=True)`` is typically more efficient. Use
5960
+ ``cosine_similarity(from_=subset)`` only when small subsets of
5961
+ the cosine similarity relationship are needed
5962
+ collectively across the program.
5963
+
5964
+ The ``to`` parameter can be used together with ``from_`` to further
5965
+ constrain the computation: ``cosine_similarity(from_=subset_a, to=subset_b)``
5966
+ computes cosine similarity scores only for node pairs where the first node is in
5967
+ ``subset_a`` and the second node is in ``subset_b``. (Since ``cosine_similarity``
5968
+ is symmetric in its first two positions, using ``to`` without ``from_`` would
5969
+ be functionally redundant, and is not allowed.)
5970
+
5971
+ The ``between`` parameter provides another way to constrain the computation.
5972
+ Unlike ``from_`` and ``to``, which allow you to independently constrain the first
5973
+ and second positions in ``cosine_similarity`` tuples to sets of nodes, ``between``
5974
+ allows you constrain the first and second positions, jointly, to specific pairs
5975
+ of nodes.
5976
+
5318
5977
  Examples
5319
5978
  --------
5320
5979
  **Unweighted Graph Examples**
@@ -5336,7 +5995,7 @@ class Graph():
5336
5995
  ... Edge.new(src=n4, dst=n3),
5337
5996
  ... )
5338
5997
  >>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
5339
- >>> cosine_similarity = graph.cosine_similarity()
5998
+ >>> cosine_similarity = graph.cosine_similarity(full=True)
5340
5999
  >>> select(score).where(cosine_similarity(u, v, score), u.id == 2, v.id == 4).inspect()
5341
6000
  ▰▰▰▰ Setup complete
5342
6001
  score
@@ -5359,7 +6018,7 @@ class Graph():
5359
6018
  ... Edge.new(src=n4, dst=n3),
5360
6019
  ... )
5361
6020
  >>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
5362
- >>> cosine_similarity = graph.cosine_similarity()
6021
+ >>> cosine_similarity = graph.cosine_similarity(full=True)
5363
6022
  >>> select(score).where(cosine_similarity(u, v, score), u.id == 2, v.id == 4).inspect()
5364
6023
  ▰▰▰▰ Setup complete
5365
6024
  score
@@ -5384,7 +6043,7 @@ class Graph():
5384
6043
  ... Edge.new(src=n14, dst=n13, weight=1.0),
5385
6044
  ... )
5386
6045
  >>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
5387
- >>> cosine_similarity = graph.cosine_similarity()
6046
+ >>> cosine_similarity = graph.cosine_similarity(full=True)
5388
6047
  >>> select(score).where(cosine_similarity(u, v, score), u.id == 1, v.id == 2).inspect()
5389
6048
  ▰▰▰▰ Setup complete
5390
6049
  score
@@ -5406,48 +6065,246 @@ class Graph():
5406
6065
  ... Edge.new(src=n2, dst=n4, weight=5.0),
5407
6066
  ... )
5408
6067
  >>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
5409
- >>> cosine_similarity = graph.cosine_similarity()
6068
+ >>> cosine_similarity = graph.cosine_similarity(full=True)
5410
6069
  >>> select(score).where(cosine_similarity(u, v, score), u.id == 1, v.id == 2).inspect()
5411
6070
  ▰▰▰▰ Setup complete
5412
6071
  score
5413
6072
  0 0.996241
5414
6073
 
6074
+ **Domain Constraint Examples**
6075
+
6076
+ >>> # Use 'from_' parameter to constrain the set of nodes for the first position
6077
+ >>> # Using the same undirected unweighted graph from above
6078
+ >>> from relationalai.semantics import where
6079
+ >>> subset = model.Relationship(f"{{node:{Node}}} is in subset")
6080
+ >>> node = Node.ref()
6081
+ >>> where(node.id == 2).define(subset(node))
6082
+ >>>
6083
+ >>> # Get cosine similarity scores only for pairs where first node is in subset
6084
+ >>> constrained_cosine_similarity = graph.cosine_similarity(from_=subset)
6085
+ >>> select(u.id, v.id, score).where(constrained_cosine_similarity(u, v, score)).inspect()
6086
+ ▰▰▰▰ Setup complete
6087
+ id id2 score
6088
+ 0 2 2 1.000000
6089
+ 1 2 3 0.707107
6090
+ 2 2 4 0.408248
6091
+
6092
+ >>> # Use both 'from_' and 'to' parameters to constrain both positions
6093
+ >>> from_subset = model.Relationship(f"{{node:{Node}}} is in from_subset")
6094
+ >>> to_subset = model.Relationship(f"{{node:{Node}}} is in to_subset")
6095
+ >>> where(node.id == 2).define(from_subset(node))
6096
+ >>> where(node.id == 4).define(to_subset(node))
6097
+ >>>
6098
+ >>> # Get cosine similarity scores only where first node is in from_subset and second node is in to_subset
6099
+ >>> constrained_cosine_similarity = graph.cosine_similarity(from_=from_subset, to=to_subset)
6100
+ >>> select(u.id, v.id, score).where(constrained_cosine_similarity(u, v, score)).inspect()
6101
+ ▰▰▰▰ Setup complete
6102
+ id id2 score
6103
+ 0 2 4 0.408248
6104
+
6105
+ >>> # Use 'between' parameter to constrain to specific pairs of nodes
6106
+ >>> pairs = model.Relationship(f"{{node_a:{Node}}} and {{node_b:{Node}}} are a pair")
6107
+ >>> node_a, node_b = Node.ref(), Node.ref()
6108
+ >>> where(node_a.id == 2, node_b.id == 4).define(pairs(node_a, node_b))
6109
+ >>> where(node_a.id == 3, node_b.id == 4).define(pairs(node_a, node_b))
6110
+ >>>
6111
+ >>> # Get cosine similarity scores only for the specific pairs (2, 4) and (3, 4)
6112
+ >>> constrained_cosine_similarity = graph.cosine_similarity(between=pairs)
6113
+ >>> select(u.id, v.id, score).where(constrained_cosine_similarity(u, v, score)).inspect()
6114
+ ▰▰▰▰ Setup complete
6115
+ id id2 score
6116
+ 0 2 4 0.408248
6117
+ 1 3 4 0.707107
6118
+
5415
6119
  """
5416
- warnings.warn(
5417
- (
5418
- "`cosine_similarity` presently always computes the similarity "
5419
- "of all pairs of nodes of the graph. To provide better control over "
5420
- "the computed subset, `cosine_similarity`'s interface will soon "
5421
- "need to change."
5422
- ),
5423
- FutureWarning,
5424
- stacklevel=2
6120
+ # Validate domain constraint parameters.
6121
+ self._validate_domain_constraint_parameters(
6122
+ 'cosine_similarity', full, from_, to, between
5425
6123
  )
5426
6124
 
6125
+ # At this point, exactly one of `full`, `from_`, or `between`
6126
+ # has been provided, and if `to` is provided, `from_` is also provided.
6127
+
6128
+ # Handle `between`.
6129
+ if between is not None:
6130
+ self._validate_pair_subset_parameter(between)
6131
+ return self._cosine_similarity_between(between)
6132
+
6133
+ # Handle `from_` (and potentially `to`).
6134
+ if from_ is not None:
6135
+ self._validate_node_subset_parameter('from_', from_)
6136
+ if to is not None:
6137
+ self._validate_node_subset_parameter('to', to)
6138
+ return self._cosine_similarity_from_to(from_, to)
6139
+ return self._cosine_similarity_from(from_)
6140
+
6141
+ # Handle `full`.
5427
6142
  return self._cosine_similarity
5428
6143
 
5429
6144
  @cached_property
5430
6145
  def _cosine_similarity(self):
5431
- """Lazily define and cache the self._cosine_similarity relationship."""
5432
- _cosine_similarity_rel = self._model.Relationship(f"{{node_u:{self._NodeConceptStr}}} has a cosine similarity to {{node_v:{self._NodeConceptStr}}} of {{score:Float}}")
6146
+ """Lazily define and cache the full cosine_similarity relationship."""
6147
+ _cosine_similarity_rel = self._create_cosine_similarity_relationship()
6148
+ _cosine_similarity_rel.annotate(annotations.track("graphs", "cosine_similarity"))
6149
+ return _cosine_similarity_rel
5433
6150
 
6151
+ def _cosine_similarity_from(self, node_subset_from: Relationship):
6152
+ """
6153
+ Create a cosine_similarity relationship, with the first position in each
6154
+ tuple constrained to be in the given subset of nodes. Note this relationship
6155
+ is not cached; it is specific to the callsite.
6156
+ """
6157
+ _cosine_similarity_rel = self._create_cosine_similarity_relationship(
6158
+ node_subset_from=node_subset_from
6159
+ )
6160
+ _cosine_similarity_rel.annotate(annotations.track("graphs", "cosine_similarity_from"))
6161
+ return _cosine_similarity_rel
6162
+
6163
+ def _cosine_similarity_from_to(self, node_subset_from: Relationship, node_subset_to: Relationship):
6164
+ """
6165
+ Create a cosine_similarity relationship, with the first position in each
6166
+ tuple constrained to be in `node_subset_from`, and the second position in
6167
+ each tuple constrained to be in `node_subset_to`. Note this relationship
6168
+ is not cached; it is specific to the callsite.
6169
+ """
6170
+ _cosine_similarity_rel = self._create_cosine_similarity_relationship(
6171
+ node_subset_from=node_subset_from,
6172
+ node_subset_to=node_subset_to
6173
+ )
6174
+ _cosine_similarity_rel.annotate(annotations.track("graphs", "cosine_similarity_from_to"))
6175
+ return _cosine_similarity_rel
6176
+
6177
+ def _cosine_similarity_between(self, pair_subset_between: Relationship):
6178
+ """
6179
+ Create a cosine_similarity relationship, with the first and second position
6180
+ in each tuple jointly constrained to be in the given set of pairs
6181
+ of nodes. Note this relationship is not cached;
6182
+ it is specific to the callsite.
6183
+ """
6184
+ _cosine_similarity_rel = self._create_cosine_similarity_relationship(
6185
+ pair_subset_between=pair_subset_between
6186
+ )
6187
+ _cosine_similarity_rel.annotate(annotations.track("graphs", "cosine_similarity_between"))
6188
+ return _cosine_similarity_rel
6189
+
6190
+ def _create_cosine_similarity_relationship(
6191
+ self,
6192
+ *,
6193
+ node_subset_from: Optional[Relationship] = None,
6194
+ node_subset_to: Optional[Relationship] = None,
6195
+ pair_subset_between: Optional[Relationship] = None,
6196
+ ):
6197
+ """
6198
+ Create cosine_similarity relationship, optionally constrained by
6199
+ the provided node subsets or pair subset.
6200
+ """
6201
+ _cosine_similarity_rel = self._model.Relationship(
6202
+ f"{{node_u:{self._NodeConceptStr}}} has a cosine similarity to "
6203
+ f"{{node_v:{self._NodeConceptStr}}} of {{score:Float}}"
6204
+ )
6205
+
6206
+ # TODO: Optimization opportunity. In a number of branches below,
6207
+ # we compute _count_outneighbor_of, which transitively computes
6208
+ # _outneighbor_of, and then compute _outneighbor_of directly;
6209
+ # the present code structure makes this a developer-time-efficient
6210
+ # way to get this off the ground, but of course involves redundant
6211
+ # work. In future this redundant work could be eliminated.
6212
+
6213
+ # TODO: Optimization opportunity. In some of the cases below
6214
+ # (unweighted in particular), the node_constraint is redundant with
6215
+ # the constraints baked into the _count_outneigherbor_of and
6216
+ # _outneighbor_of relationships. The join with node_constraint
6217
+ # could be eliminated in those cases. Possibly also relevant to
6218
+ # other domain-constrained relations.
6219
+
6220
+ # Branch by case to select appropriate count_outneighbor and
6221
+ # outneighbor relationships, and build appropriate constraints
6222
+ # on the domain of the nodes.
6223
+ node_u, node_v = self.Node.ref(), self.Node.ref()
6224
+
6225
+ # Handle the `between` case.
6226
+ if pair_subset_between is not None:
6227
+ # Extract first-position and second-position nodes.
6228
+ first_position_subset = self._model.Relationship(f"{{node:{self._NodeConceptStr}}}")
6229
+ second_position_subset = self._model.Relationship(f"{{node:{self._NodeConceptStr}}}")
6230
+ node_x, node_y = self.Node.ref(), self.Node.ref()
6231
+ where(
6232
+ pair_subset_between(node_x, node_y)
6233
+ ).define(
6234
+ first_position_subset(node_x),
6235
+ second_position_subset(node_y)
6236
+ )
6237
+
6238
+ count_outneighbor_u_rel = self._count_outneighbor_of(first_position_subset)
6239
+ count_outneighbor_v_rel = self._count_outneighbor_of(second_position_subset)
6240
+ outneighbor_u_rel = self._outneighbor_of(first_position_subset)
6241
+ outneighbor_v_rel = self._outneighbor_of(second_position_subset)
6242
+
6243
+ node_constraints = [pair_subset_between(node_u, node_v)]
6244
+
6245
+ # Handle the `from_` case.
6246
+ elif node_subset_from is not None and node_subset_to is None:
6247
+ count_outneighbor_u_rel = self._count_outneighbor_of(node_subset_from)
6248
+ count_outneighbor_v_rel = self._count_outneighbor
6249
+ outneighbor_u_rel = self._outneighbor_of(node_subset_from)
6250
+ outneighbor_v_rel = self._outneighbor
6251
+ # TODO: This case could be optimized via an analog of
6252
+ # the depth-2 traversal strategy suggested for the equivalent
6253
+ # case of common_neighbor, but for another day.
6254
+
6255
+ node_constraints = [node_subset_from(node_u)]
6256
+
6257
+ # Handle the `from_`/`to` case.
6258
+ elif node_subset_from is not None and node_subset_to is not None:
6259
+ # Check for object identity optimization.
6260
+ if node_subset_from is node_subset_to:
6261
+ count_outneighbor_u_rel = self._count_outneighbor_of(node_subset_from)
6262
+ count_outneighbor_v_rel = count_outneighbor_u_rel
6263
+ outneighbor_u_rel = self._outneighbor_of(node_subset_from)
6264
+ outneighbor_v_rel = outneighbor_u_rel
6265
+ else:
6266
+ count_outneighbor_u_rel = self._count_outneighbor_of(node_subset_from)
6267
+ count_outneighbor_v_rel = self._count_outneighbor_of(node_subset_to)
6268
+ outneighbor_u_rel = self._outneighbor_of(node_subset_from)
6269
+ outneighbor_v_rel = self._outneighbor_of(node_subset_to)
6270
+
6271
+ node_constraints = [node_subset_from(node_u), node_subset_to(node_v)]
6272
+
6273
+ # Handle the `full` case.
6274
+ else:
6275
+ count_outneighbor_u_rel = self._count_outneighbor
6276
+ count_outneighbor_v_rel = self._count_outneighbor
6277
+ outneighbor_u_rel = self._outneighbor
6278
+ outneighbor_v_rel = self._outneighbor
6279
+
6280
+ node_constraints = []
6281
+
6282
+ # Define cosine similarity logic for both weighted and unweighted cases.
5434
6283
  if not self.weighted:
5435
- node_u, node_v = self.Node.ref(), self.Node.ref()
5436
- count_outneighor_u, count_outneighor_v, score = Integer.ref(), Integer.ref(), Float.ref()
6284
+ # Unweighted case: use count of common outneighbors.
6285
+ count_outneighor_u, count_outneighor_v = Integer.ref(), Integer.ref()
6286
+ common_outneighbor_node = self.Node.ref()
6287
+ score = Float.ref()
5437
6288
 
5438
6289
  where(
5439
- self._count_outneighbor(node_u, count_outneighor_u),
5440
- self._count_outneighbor(node_v, count_outneighor_v),
5441
- c_common := self._count_common_outneighbor_fragment(node_u, node_v),
6290
+ *node_constraints,
6291
+ count_outneighbor_u_rel(node_u, count_outneighor_u),
6292
+ count_outneighbor_v_rel(node_v, count_outneighor_v),
6293
+ c_common := count(common_outneighbor_node).per(node_u, node_v).where(
6294
+ outneighbor_u_rel(node_u, common_outneighbor_node),
6295
+ outneighbor_v_rel(node_v, common_outneighbor_node),
6296
+ ),
5442
6297
  score := c_common / sqrt(count_outneighor_u * count_outneighor_v),
5443
6298
  ).define(
5444
6299
  _cosine_similarity_rel(node_u, node_v, score)
5445
6300
  )
5446
6301
  else:
5447
- node_u, node_v = self.Node.ref(), self.Node.ref()
6302
+ # Weighted case: use dot product and norms.
5448
6303
  node_uk, node_vk = self.Node.ref(), self.Node.ref()
5449
6304
  wu, wv = Float.ref(), Float.ref()
6305
+
5450
6306
  where(
6307
+ *node_constraints,
5451
6308
  squared_norm_wu := sum(node_uk, wu * wu).per(node_u).where(self._weight(node_u, node_uk, wu)),
5452
6309
  squared_norm_wv := sum(node_vk, wv * wv).per(node_v).where(self._weight(node_v, node_vk, wv)),
5453
6310
  wu_dot_wv := self._wu_dot_wv_fragment(node_u, node_v),
@@ -5460,19 +6317,69 @@ class Graph():
5460
6317
 
5461
6318
 
5462
6319
  @include_in_docs
5463
- def adamic_adar(self):
5464
- """Returns a ternary relationship containing the Adamic-Adar index for all pairs of nodes.
6320
+ def adamic_adar(
6321
+ self,
6322
+ *,
6323
+ full: Optional[bool] = None,
6324
+ from_: Optional[Relationship] = None,
6325
+ to: Optional[Relationship] = None,
6326
+ between: Optional[Relationship] = None,
6327
+ ):
6328
+ """Returns a ternary relationship containing the Adamic-Adar index for pairs of nodes.
5465
6329
 
5466
6330
  The Adamic-Adar index is a similarity measure between two nodes based
5467
6331
  on the amount of shared neighbors between them, giving more weight to
5468
6332
  common neighbors that are less connected.
5469
6333
 
6334
+ Parameters
6335
+ ----------
6336
+ full : bool, optional
6337
+ If ``True``, computes the Adamic-Adar index for all pairs of nodes in
6338
+ the graph. This computation can be expensive for large graphs, as
6339
+ dependencies can scale quadratically in the number of edges or cubically
6340
+ in the number of nodes. Mutually exclusive with other parameters.
6341
+ Default is ``None``.
6342
+ from_ : Relationship, optional
6343
+ A unary relationship containing a subset of the graph's nodes. When
6344
+ provided, constrains the domain of the Adamic-Adar computation: only
6345
+ Adamic-Adar indices for node pairs where the first node is in this relationship
6346
+ are computed and returned. Mutually exclusive with ``full`` and ``between``.
6347
+ Default is ``None``.
6348
+ to : Relationship, optional
6349
+ A unary relationship containing a subset of the graph's nodes. Can only
6350
+ be used together with the ``from_`` parameter. When provided with ``from_``,
6351
+ constrains the domain of the Adamic-Adar computation: only Adamic-Adar
6352
+ indices for node pairs where the first node is in ``from_`` and the
6353
+ second node is in ``to`` are computed and returned.
6354
+ Default is ``None``.
6355
+ between : Relationship, optional
6356
+ A binary relationship containing pairs of nodes. When provided,
6357
+ constrains the domain of the Adamic-Adar computation: only Adamic-Adar
6358
+ indices for the specific node pairs in this relationship are computed
6359
+ and returned. Mutually exclusive with other parameters.
6360
+ Default is ``None``.
6361
+
5470
6362
  Returns
5471
6363
  -------
5472
6364
  Relationship
5473
6365
  A ternary relationship where each tuple represents a pair of nodes
5474
6366
  and their Adamic-Adar index.
5475
6367
 
6368
+ Raises
6369
+ ------
6370
+ ValueError
6371
+ If ``full`` is provided with any other parameter.
6372
+ If ``between`` is provided with any other parameter.
6373
+ If ``from_`` is provided with any parameter other than ``to``.
6374
+ If none of ``full``, ``from_``, or ``between`` is provided.
6375
+ If ``full`` is not ``True`` or ``None``.
6376
+ AssertionError
6377
+ If ``from_``, ``to``, or ``between`` is not a ``Relationship``.
6378
+ If ``from_``, ``to``, or ``between`` is not attached to the same model as the graph.
6379
+ If ``from_``, ``to``, or ``between`` does not contain the graph's ``Node`` concept.
6380
+ If ``from_`` or ``to`` is not a unary relationship.
6381
+ If ``between`` is not a binary relationship.
6382
+
5476
6383
  Relationship Schema
5477
6384
  -------------------
5478
6385
  ``adamic_adar(node_u, node_v, score)``
@@ -5496,9 +6403,38 @@ class Graph():
5496
6403
 
5497
6404
  AA(u,v) = Σ (1 / log(degree(w)))
5498
6405
 
6406
+ The ``adamic_adar(full=True)`` method computes and caches the full Adamic-Adar
6407
+ relationship for all pairs of nodes, providing efficient reuse across
6408
+ multiple calls. This can be expensive as dependencies can contain O(|E|²) or
6409
+ O(|V|³) tuples depending on graph density.
6410
+
6411
+ Calling ``adamic_adar()`` without arguments raises a ``ValueError``,
6412
+ to ensure awareness and explicit acknowledgement (``full=True``) of this cost.
6413
+
6414
+ In contrast, ``adamic_adar(from_=subset)`` constrains the computation to
6415
+ tuples with the first position in the passed-in ``subset``. The result is
6416
+ not cached; it is specific to the call site. When a significant fraction of
6417
+ the Adamic-Adar relation is needed across a program, ``adamic_adar(full=True)``
6418
+ is typically more efficient. Use ``adamic_adar(from_=subset)`` only
6419
+ when small subsets of the Adamic-Adar relationship are needed
6420
+ collectively across the program.
6421
+
6422
+ The ``to`` parameter can be used together with ``from_`` to further
6423
+ constrain the computation: ``adamic_adar(from_=subset_a, to=subset_b)``
6424
+ computes Adamic-Adar indices only for node pairs where the first node is in
6425
+ ``subset_a`` and the second node is in ``subset_b``. (Since ``adamic_adar``
6426
+ is symmetric in its first two positions, using ``to`` without ``from_`` would
6427
+ be functionally redundant, and is not allowed.)
6428
+
6429
+ The ``between`` parameter provides another way to constrain the computation.
6430
+ Unlike ``from_`` and ``to``, which allow you to independently constrain the first
6431
+ and second positions in ``adamic_adar`` tuples to sets of nodes, ``between``
6432
+ allows you constrain the first and second positions, jointly, to specific pairs
6433
+ of nodes.
6434
+
5499
6435
  Examples
5500
6436
  --------
5501
- >>> from relationalai.semantics import Model, define, select, Float
6437
+ >>> from relationalai.semantics import Model, define, select, where, Float
5502
6438
  >>> from relationalai.semantics.reasoners.graph import Graph
5503
6439
  >>>
5504
6440
  >>> # 1. Set up an undirected graph
@@ -5517,10 +6453,10 @@ class Graph():
5517
6453
  ... Edge.new(src=n4, dst=n3),
5518
6454
  ... )
5519
6455
  >>>
5520
- >>> # 3. Select the Adamic-Adar index for the pair (2, 4)
6456
+ >>> # 3. Select the Adamic-Adar indices from the full relationship
5521
6457
  >>> u, v = Node.ref("u"), Node.ref("v")
5522
6458
  >>> score = Float.ref("score")
5523
- >>> adamic_adar = graph.adamic_adar()
6459
+ >>> adamic_adar = graph.adamic_adar(full=True)
5524
6460
  >>> select(
5525
6461
  ... u.id, v.id, score,
5526
6462
  ... ).where(
@@ -5532,32 +6468,193 @@ class Graph():
5532
6468
  id id2 score
5533
6469
  0 2 4 0.910239
5534
6470
 
6471
+ >>> # 4. Use 'from_' parameter to constrain the set of nodes for the first position
6472
+ >>> # Define a subset containing only node 1
6473
+ >>> subset = model.Relationship(f"{{node:{Node}}} is in subset")
6474
+ >>> node = Node.ref()
6475
+ >>> where(node.id == 1).define(subset(node))
6476
+ >>>
6477
+ >>> # Get Adamic-Adar indices only for pairs where first node is in subset
6478
+ >>> constrained_adamic_adar = graph.adamic_adar(from_=subset)
6479
+ >>> select(u.id, v.id, score).where(constrained_adamic_adar(u, v, score)).inspect()
6480
+ ▰▰▰▰ Setup complete
6481
+ id id2 score
6482
+ 0 1 1 2.885390
6483
+ 1 1 4 2.885390
6484
+
6485
+ >>> # 5. Use both 'from_' and 'to' parameters to constrain both positions
6486
+ >>> subset_a = model.Relationship(f"{{node:{Node}}} is in subset_a")
6487
+ >>> subset_b = model.Relationship(f"{{node:{Node}}} is in subset_b")
6488
+ >>> where(node.id == 1).define(subset_a(node))
6489
+ >>> where(node.id == 4).define(subset_b(node))
6490
+ >>>
6491
+ >>> # Get Adamic-Adar indices only where first node is in subset_a and second node is in subset_b
6492
+ >>> constrained_adamic_adar = graph.adamic_adar(from_=subset_a, to=subset_b)
6493
+ >>> select(u.id, v.id, score).where(constrained_adamic_adar(u, v, score)).inspect()
6494
+ ▰▰▰▰ Setup complete
6495
+ id id2 score
6496
+ 0 1 4 2.885390
6497
+
6498
+ >>> # 6. Use 'between' parameter to constrain to specific pairs of nodes
6499
+ >>> pairs = model.Relationship(f"{{node_a:{Node}}} and {{node_b:{Node}}} are a pair")
6500
+ >>> node_a, node_b = Node.ref(), Node.ref()
6501
+ >>> where(node_a.id == 1, node_b.id == 4).define(pairs(node_a, node_b))
6502
+ >>> where(node_a.id == 2, node_b.id == 3).define(pairs(node_a, node_b))
6503
+ >>>
6504
+ >>> # Get Adamic-Adar indices only for the specific pairs (1, 4) and (2, 3)
6505
+ >>> constrained_adamic_adar = graph.adamic_adar(between=pairs)
6506
+ >>> select(u.id, v.id, score).where(constrained_adamic_adar(u, v, score)).inspect()
6507
+ ▰▰▰▰ Setup complete
6508
+ id id2 score
6509
+ 0 1 4 2.885390
6510
+ 1 2 3 1.442695
6511
+
5535
6512
  """
5536
- warnings.warn(
5537
- (
5538
- "`adamic_adar` presently always computes the similarity "
5539
- "of all pairs of nodes of the graph. To provide better control over "
5540
- "the computed subset, `adamic_adar`'s interface will soon "
5541
- "need to change."
5542
- ),
5543
- FutureWarning,
5544
- stacklevel=2
6513
+ # Validate domain constraint parameters.
6514
+ self._validate_domain_constraint_parameters(
6515
+ 'adamic_adar', full, from_, to, between
5545
6516
  )
5546
6517
 
6518
+ # At this point, exactly one of `full`, `from_`, or `between`
6519
+ # has been provided, and if `to` is provided, `from_` is also provided.
6520
+
6521
+ # Handle `between`.
6522
+ if between is not None:
6523
+ self._validate_pair_subset_parameter(between)
6524
+ return self._adamic_adar_between(between)
6525
+
6526
+ # Handle `from_` (and potentially `to`).
6527
+ if from_ is not None:
6528
+ self._validate_node_subset_parameter('from_', from_)
6529
+ if to is not None:
6530
+ self._validate_node_subset_parameter('to', to)
6531
+ return self._adamic_adar_from_to(from_, to)
6532
+ return self._adamic_adar_from(from_)
6533
+
6534
+ # Handle `full`.
5547
6535
  return self._adamic_adar
5548
6536
 
5549
6537
  @cached_property
5550
6538
  def _adamic_adar(self):
5551
- """Lazily define and cache the self._adamic_adar relationship."""
5552
- _adamic_adar_rel = self._model.Relationship(f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} have adamic adar score {{score:Float}}")
6539
+ """Lazily define and cache the full adamic_adar relationship."""
6540
+ _adamic_adar_rel = self._create_adamic_adar_relationship()
6541
+ _adamic_adar_rel.annotate(annotations.track("graphs", "adamic_adar"))
6542
+ return _adamic_adar_rel
6543
+
6544
+ def _adamic_adar_from(self, node_subset_from: Relationship):
6545
+ """
6546
+ Create an adamic_adar relationship, with the first position in each
6547
+ tuple constrained to be in the given subset of nodes. Note this relationship
6548
+ is not cached; it is specific to the callsite.
6549
+ """
6550
+ _adamic_adar_rel = self._create_adamic_adar_relationship(
6551
+ node_subset_from=node_subset_from
6552
+ )
6553
+ _adamic_adar_rel.annotate(annotations.track("graphs", "adamic_adar_from"))
6554
+ return _adamic_adar_rel
6555
+
6556
+ def _adamic_adar_from_to(self, node_subset_from: Relationship, node_subset_to: Relationship):
6557
+ """
6558
+ Create an adamic_adar relationship, with the first position in each
6559
+ tuple constrained to be in `node_subset_from`, and the second position in
6560
+ each tuple constrained to be in `node_subset_to`. Note this relationship
6561
+ is not cached; it is specific to the callsite.
6562
+ """
6563
+ _adamic_adar_rel = self._create_adamic_adar_relationship(
6564
+ node_subset_from=node_subset_from,
6565
+ node_subset_to=node_subset_to
6566
+ )
6567
+ _adamic_adar_rel.annotate(annotations.track("graphs", "adamic_adar_from_to"))
6568
+ return _adamic_adar_rel
6569
+
6570
+ def _adamic_adar_between(self, pair_subset_between: Relationship):
6571
+ """
6572
+ Create an adamic_adar relationship, with the first and second position
6573
+ in each tuple jointly constrained to be in the given set of pairs
6574
+ of nodes. Note this relationship is not cached;
6575
+ it is specific to the callsite.
6576
+ """
6577
+ _adamic_adar_rel = self._create_adamic_adar_relationship(
6578
+ pair_subset_between=pair_subset_between
6579
+ )
6580
+ _adamic_adar_rel.annotate(annotations.track("graphs", "adamic_adar_between"))
6581
+ return _adamic_adar_rel
5553
6582
 
6583
+ def _create_adamic_adar_relationship(
6584
+ self,
6585
+ *,
6586
+ node_subset_from: Optional[Relationship] = None,
6587
+ node_subset_to: Optional[Relationship] = None,
6588
+ pair_subset_between: Optional[Relationship] = None,
6589
+ ):
6590
+ """
6591
+ Create adamic_adar relationship, optionally constrained by the provided
6592
+ node subsets or pair subset.
6593
+ """
6594
+ _adamic_adar_rel = self._model.Relationship(
6595
+ f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} "
6596
+ f"have adamic adar score {{score:Float}}"
6597
+ )
6598
+
6599
+ # NOTE: Handling of the common_neighbor relation (`common_neighbor_rel`)
6600
+ # differs in each case, whereas handling of the count_neighbor relation
6601
+ # (`count_neighbor_rel`) is: a) the same among the constrained cases;
6602
+ # and b) different in the unconstrained case. As such we handle
6603
+ # `common_neighbor_rel` in the branches by case below, and handle
6604
+ # `count_neighbor_rel` in a separate constrained/unconstrained branch later.
6605
+
6606
+ # Handle the `between` case.
6607
+ if pair_subset_between is not None:
6608
+ # Get the appropriate common_neighbor relationship.
6609
+ common_neighbor_rel = self._common_neighbor_between(pair_subset_between)
6610
+
6611
+ # Handle the `from_` case.
6612
+ elif node_subset_from is not None and node_subset_to is None:
6613
+ # Get the appropriate common_neighbor relationship.
6614
+ common_neighbor_rel = self._common_neighbor_from(node_subset_from)
6615
+
6616
+ # Handle the `from_`/`to` case.
6617
+ elif node_subset_from is not None and node_subset_to is not None:
6618
+ common_neighbor_rel = self._common_neighbor_from_to(node_subset_from, node_subset_to)
6619
+ # Note that _common_neighbor_from_to handles optimization
6620
+ # when the from_ and to sets are object-identical.
6621
+
6622
+ # Handle the `full` case.
6623
+ else:
6624
+ # Use cached full relationship.
6625
+ common_neighbor_rel = self._common_neighbor
6626
+
6627
+ # Handle `count_neighbor_rel` for unconstrained versus constrained cases.
6628
+ if pair_subset_between is None and node_subset_from is None:
6629
+ # Unconstrained case.
6630
+ count_neighbor_rel = self._count_neighbor
6631
+
6632
+ else:
6633
+ # Constrained cases.
6634
+
6635
+ # Extract common neighbors that appear in
6636
+ # the constrained common_neighbor relationship.
6637
+ common_neighbors_subset = self._model.Relationship(
6638
+ f"{{node:{self._NodeConceptStr}}} is a relevant common neighbor"
6639
+ )
6640
+ node_x, node_y, neighbor_z = self.Node.ref(), self.Node.ref(), self.Node.ref()
6641
+ where(
6642
+ common_neighbor_rel(node_x, node_y, neighbor_z)
6643
+ ).define(
6644
+ common_neighbors_subset(neighbor_z)
6645
+ )
6646
+
6647
+ # From those common neighbors,
6648
+ # build a constrained count_neighbor relationship.
6649
+ count_neighbor_rel = self._count_neighbor_of(common_neighbors_subset)
6650
+
6651
+ # Define the Adamic-Adar aggregation using the selected relationships.
5554
6652
  node_u, node_v, common_neighbor = self.Node.ref(), self.Node.ref(), self.Node.ref()
5555
6653
  neighbor_count = Integer.ref()
5556
-
5557
6654
  where(
5558
6655
  _score := sum(common_neighbor, 1.0 / natural_log(neighbor_count)).per(node_u, node_v).where(
5559
- self._common_neighbor(node_u, node_v, common_neighbor),
5560
- self._count_neighbor(common_neighbor, neighbor_count),
6656
+ common_neighbor_rel(node_u, node_v, common_neighbor),
6657
+ count_neighbor_rel(common_neighbor, neighbor_count),
5561
6658
  )
5562
6659
  ).define(_adamic_adar_rel(node_u, node_v, _score))
5563
6660
 
@@ -5648,6 +6745,7 @@ class Graph():
5648
6745
  def _preferential_attachment(self):
5649
6746
  """Lazily define and cache the self._preferential_attachment relationship."""
5650
6747
  _preferential_attachment_rel = self._model.Relationship(f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} have preferential attachment score {{score:Integer}}")
6748
+ _preferential_attachment_rel.annotate(annotations.track("graphs", "preferential_attachment"))
5651
6749
 
5652
6750
  node_u, node_v = self.Node.ref(), self.Node.ref()
5653
6751
  count_u, count_v = Integer.ref(), Integer.ref()