relationalai 0.12.0__py3-none-any.whl → 0.12.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- relationalai/clients/snowflake.py +228 -83
- relationalai/clients/types.py +4 -1
- relationalai/clients/use_index_poller.py +72 -48
- relationalai/clients/util.py +9 -0
- relationalai/dsl.py +1 -2
- relationalai/environments/snowbook.py +10 -1
- relationalai/semantics/internal/internal.py +22 -3
- relationalai/semantics/lqp/executor.py +12 -4
- relationalai/semantics/lqp/model2lqp.py +1 -0
- relationalai/semantics/metamodel/executor.py +2 -1
- relationalai/semantics/metamodel/rewrite/flatten.py +8 -7
- relationalai/semantics/reasoners/graph/core.py +1174 -226
- relationalai/semantics/rel/executor.py +20 -11
- relationalai/semantics/sql/executor/snowflake.py +1 -1
- relationalai/tools/cli.py +6 -2
- relationalai/tools/cli_controls.py +334 -352
- relationalai/tools/constants.py +1 -0
- relationalai/tools/query_utils.py +27 -0
- relationalai/util/otel_configuration.py +1 -1
- {relationalai-0.12.0.dist-info → relationalai-0.12.1.dist-info}/METADATA +1 -1
- {relationalai-0.12.0.dist-info → relationalai-0.12.1.dist-info}/RECORD +24 -23
- {relationalai-0.12.0.dist-info → relationalai-0.12.1.dist-info}/WHEEL +0 -0
- {relationalai-0.12.0.dist-info → relationalai-0.12.1.dist-info}/entry_points.txt +0 -0
- {relationalai-0.12.0.dist-info → relationalai-0.12.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -1056,6 +1056,181 @@ class Graph():
|
|
|
1056
1056
|
|
|
1057
1057
|
# End Visualization --------------------------------------------------------
|
|
1058
1058
|
|
|
1059
|
+
# The following three helper methods validate
|
|
1060
|
+
# `from_`, `to`, and `between`
|
|
1061
|
+
# parameters to public methods that accept them.
|
|
1062
|
+
|
|
1063
|
+
def _validate_domain_constraint_parameters(
|
|
1064
|
+
self,
|
|
1065
|
+
method_name: str,
|
|
1066
|
+
full: Optional[bool],
|
|
1067
|
+
from_: Optional[Relationship],
|
|
1068
|
+
to: Optional[Relationship],
|
|
1069
|
+
between: Optional[Relationship],
|
|
1070
|
+
):
|
|
1071
|
+
"""
|
|
1072
|
+
Validate the domain constraint parameters for methods that accept
|
|
1073
|
+
`full`, `from_`, `to`, and `between` parameters.
|
|
1074
|
+
|
|
1075
|
+
This helper method performs common validation logic that applies
|
|
1076
|
+
across multiple graph methods (e.g., common_neighbor, adamic_adar).
|
|
1077
|
+
|
|
1078
|
+
Parameters
|
|
1079
|
+
----------
|
|
1080
|
+
method_name : str
|
|
1081
|
+
The name of the method being validated (for error messages).
|
|
1082
|
+
full : bool, optional
|
|
1083
|
+
The full parameter value.
|
|
1084
|
+
from_ : Relationship, optional
|
|
1085
|
+
The from_ parameter value.
|
|
1086
|
+
to : Relationship, optional
|
|
1087
|
+
The to parameter value.
|
|
1088
|
+
between : Relationship, optional
|
|
1089
|
+
The between parameter value.
|
|
1090
|
+
|
|
1091
|
+
Raises
|
|
1092
|
+
------
|
|
1093
|
+
ValueError
|
|
1094
|
+
If parameter combinations are invalid.
|
|
1095
|
+
"""
|
|
1096
|
+
# Confirm that `full` was not provided with any other parameter.
|
|
1097
|
+
if (
|
|
1098
|
+
full is not None
|
|
1099
|
+
and (
|
|
1100
|
+
from_ is not None or
|
|
1101
|
+
to is not None or
|
|
1102
|
+
between is not None
|
|
1103
|
+
)
|
|
1104
|
+
):
|
|
1105
|
+
raise ValueError(
|
|
1106
|
+
"The 'full' parameter is mutually exclusive with 'from_', 'to', and 'between'. "
|
|
1107
|
+
f"Use 'full=True' to compute {method_name} for all node pairs, "
|
|
1108
|
+
"or use 'from_'/'to'/'between' to constrain computation to "
|
|
1109
|
+
"specific nodes or pairs."
|
|
1110
|
+
)
|
|
1111
|
+
|
|
1112
|
+
# Confirm that `between` was not provided with any other parameter.
|
|
1113
|
+
if (between is not None
|
|
1114
|
+
and (
|
|
1115
|
+
from_ is not None or
|
|
1116
|
+
to is not None
|
|
1117
|
+
# `full` is implied by the preceding check.
|
|
1118
|
+
)
|
|
1119
|
+
):
|
|
1120
|
+
raise ValueError(
|
|
1121
|
+
"The 'between' parameter is mutually exclusive with 'from_' and 'to'. "
|
|
1122
|
+
"Use 'between' to constrain computation to specific node pairs, "
|
|
1123
|
+
"or use 'from_'/'to' to constrain by position."
|
|
1124
|
+
)
|
|
1125
|
+
|
|
1126
|
+
# Confirm that 'to' is only used with 'from_'.
|
|
1127
|
+
if to is not None and from_ is None:
|
|
1128
|
+
raise ValueError(
|
|
1129
|
+
"The 'to' parameter can only be used together with the 'from_' parameter. "
|
|
1130
|
+
f"The 'from_' parameter constrains the first position in {method_name} tuples, "
|
|
1131
|
+
f"while 'to' constrains the second position. Since {method_name} is symmetric "
|
|
1132
|
+
"in its first two positions, 'to' without 'from_' would be functionally redundant. "
|
|
1133
|
+
"Please either provide both 'from_' and 'to' parameters, or only 'from_'."
|
|
1134
|
+
)
|
|
1135
|
+
|
|
1136
|
+
# If no parameters are provided, raise an exception
|
|
1137
|
+
# to avoid unintentional, potentially expensive full computation.
|
|
1138
|
+
if (
|
|
1139
|
+
full is None and
|
|
1140
|
+
from_ is None and
|
|
1141
|
+
between is None
|
|
1142
|
+
):
|
|
1143
|
+
raise ValueError(
|
|
1144
|
+
f"Computing {method_name} for all pairs of nodes can be expensive. "
|
|
1145
|
+
f"To compute the full {method_name} relationship, "
|
|
1146
|
+
f"please call `{method_name}(full=True)`. To constrain computation to specific nodes, "
|
|
1147
|
+
f"please use `{method_name}(from_=node_subset)`, "
|
|
1148
|
+
f"`{method_name}(from_=node_subset_a, to=node_subset_b)`, "
|
|
1149
|
+
f"or `{method_name}(between=node_pairs)`."
|
|
1150
|
+
)
|
|
1151
|
+
|
|
1152
|
+
# Validate that full is True (not just not None).
|
|
1153
|
+
# This check is only reached if full is not None
|
|
1154
|
+
# and no other parameters are provided.
|
|
1155
|
+
if full is not None and full is not True:
|
|
1156
|
+
raise ValueError(
|
|
1157
|
+
f"Invalid value (`{full}`) for 'full' parameter. Use `full=True` "
|
|
1158
|
+
f"to compute the full {method_name} relationship, or use 'from_', "
|
|
1159
|
+
"'from_' and 'to', or 'between' to constrain computation."
|
|
1160
|
+
)
|
|
1161
|
+
|
|
1162
|
+
def _validate_node_subset_parameter(
|
|
1163
|
+
self,
|
|
1164
|
+
parameter_name: str,
|
|
1165
|
+
node_subset_relation: Relationship,
|
|
1166
|
+
):
|
|
1167
|
+
"""
|
|
1168
|
+
Validate that a parameter identifying a subset of nodes of interest is
|
|
1169
|
+
is a unary relationship, of nodes, attached to the same model
|
|
1170
|
+
that the graph is attached to.
|
|
1171
|
+
"""
|
|
1172
|
+
# Validate that the parameter is a relationship.
|
|
1173
|
+
assert isinstance(node_subset_relation, Relationship), (
|
|
1174
|
+
f"The '{parameter_name}' parameter must be a `Relationship`, "
|
|
1175
|
+
f"but is a `{type(node_subset_relation).__name__}`."
|
|
1176
|
+
)
|
|
1177
|
+
|
|
1178
|
+
# Validate that the relationship is attached to the same model as the graph.
|
|
1179
|
+
assert node_subset_relation._model is self._model, (
|
|
1180
|
+
f"The given '{parameter_name}' relationship must "
|
|
1181
|
+
"be attached to the same model as the graph."
|
|
1182
|
+
)
|
|
1183
|
+
|
|
1184
|
+
# Validate that it's a unary relationship (has exactly one field).
|
|
1185
|
+
assert len(node_subset_relation._fields) == 1, (
|
|
1186
|
+
f"The '{parameter_name}' parameter must be a unary relationship, "
|
|
1187
|
+
f"but it has {len(node_subset_relation._fields)} fields."
|
|
1188
|
+
)
|
|
1189
|
+
|
|
1190
|
+
# Validate that the concept type matches the graph's Node concept.
|
|
1191
|
+
assert node_subset_relation._fields[0].type_str == self.Node._name, (
|
|
1192
|
+
f"The '{parameter_name}' relationship must be over "
|
|
1193
|
+
f"the graph's Node concept ('{self.Node._name}'), "
|
|
1194
|
+
f"but is over '{node_subset_relation._fields[0].type_str}'."
|
|
1195
|
+
)
|
|
1196
|
+
|
|
1197
|
+
# No parameter name at this time, as pertains only to `between` for now.
|
|
1198
|
+
def _validate_pair_subset_parameter(self, pairs_relation):
|
|
1199
|
+
"""
|
|
1200
|
+
Validate that a parameter identifying pairs of nodes of interest is
|
|
1201
|
+
a binary relationship, of pairs of nodes, attached to the same model
|
|
1202
|
+
that the graph is attached to.
|
|
1203
|
+
"""
|
|
1204
|
+
# Validate that the parameter is a relationship.
|
|
1205
|
+
assert isinstance(pairs_relation, Relationship), (
|
|
1206
|
+
"The 'between' parameter must be a `Relationship`, "
|
|
1207
|
+
f"but is a `{type(pairs_relation).__name__}`."
|
|
1208
|
+
)
|
|
1209
|
+
|
|
1210
|
+
# Validate that the relationship is attached to the same model as the graph.
|
|
1211
|
+
assert pairs_relation._model is self._model, (
|
|
1212
|
+
"The given 'between' relationship must be "
|
|
1213
|
+
"attached to the same model as the graph."
|
|
1214
|
+
)
|
|
1215
|
+
|
|
1216
|
+
# Validate that it's a binary relationship (has exactly two fields).
|
|
1217
|
+
assert len(pairs_relation._fields) == 2, (
|
|
1218
|
+
"The 'between' parameter must be a binary relationship, "
|
|
1219
|
+
f"but it has {len(pairs_relation._fields)} fields."
|
|
1220
|
+
)
|
|
1221
|
+
|
|
1222
|
+
# Validate that both fields are typed as the graph's Node concept.
|
|
1223
|
+
assert pairs_relation._fields[0].type_str == self.Node._name, (
|
|
1224
|
+
"The 'between' relationship's first field must be "
|
|
1225
|
+
f"the graph's Node concept ('{self.Node._name}'), "
|
|
1226
|
+
f"but is '{pairs_relation._fields[0].type_str}'."
|
|
1227
|
+
)
|
|
1228
|
+
assert pairs_relation._fields[1].type_str == self.Node._name, (
|
|
1229
|
+
f"The 'between' relationship's second field must be "
|
|
1230
|
+
f"the graph's Node concept ('{self.Node._name}'), "
|
|
1231
|
+
f"but is '{pairs_relation._fields[1].type_str}'."
|
|
1232
|
+
)
|
|
1233
|
+
|
|
1059
1234
|
|
|
1060
1235
|
# The following three `_count_[in,out]neighbor` relationships are
|
|
1061
1236
|
# primarily for internal consumption. They differ from corresponding
|
|
@@ -1065,26 +1240,26 @@ class Graph():
|
|
|
1065
1240
|
@cached_property
|
|
1066
1241
|
def _count_neighbor(self):
|
|
1067
1242
|
"""Lazily define and cache the self._count_neighbor relationship."""
|
|
1068
|
-
return self._create_count_neighbor_relationship(
|
|
1243
|
+
return self._create_count_neighbor_relationship(node_subset=None)
|
|
1069
1244
|
|
|
1070
|
-
def _count_neighbor_of(self,
|
|
1245
|
+
def _count_neighbor_of(self, node_subset: Relationship):
|
|
1071
1246
|
"""
|
|
1072
1247
|
Create a _count_neighbor relationship constrained to the subset of nodes
|
|
1073
|
-
in `
|
|
1248
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
1074
1249
|
specific to the callsite.
|
|
1075
1250
|
"""
|
|
1076
|
-
return self._create_count_neighbor_relationship(
|
|
1251
|
+
return self._create_count_neighbor_relationship(node_subset=node_subset)
|
|
1077
1252
|
|
|
1078
|
-
def _create_count_neighbor_relationship(self, *,
|
|
1253
|
+
def _create_count_neighbor_relationship(self, *, node_subset: Optional[Relationship]):
|
|
1079
1254
|
_count_neighbor_rel = self._model.Relationship(f"{{src:{self._NodeConceptStr}}} has neighbor count {{count:Integer}}")
|
|
1080
1255
|
|
|
1081
1256
|
# Choose the appropriate neighbor relationship based on whether we have constraints
|
|
1082
|
-
if
|
|
1257
|
+
if node_subset is None:
|
|
1083
1258
|
# No constraint - use cached neighbor relationship
|
|
1084
1259
|
neighbor_rel = self._neighbor
|
|
1085
1260
|
else:
|
|
1086
1261
|
# Constrained to nodes in the subset - use constrained neighbor relationship
|
|
1087
|
-
neighbor_rel = self._neighbor_of(
|
|
1262
|
+
neighbor_rel = self._neighbor_of(node_subset)
|
|
1088
1263
|
|
|
1089
1264
|
# Apply the same counting logic for both cases
|
|
1090
1265
|
src, dst = self.Node.ref(), self.Node.ref()
|
|
@@ -1095,26 +1270,26 @@ class Graph():
|
|
|
1095
1270
|
@cached_property
|
|
1096
1271
|
def _count_inneighbor(self):
|
|
1097
1272
|
"""Lazily define and cache the self._count_inneighbor relationship."""
|
|
1098
|
-
return self._create_count_inneighbor_relationship(
|
|
1273
|
+
return self._create_count_inneighbor_relationship(node_subset=None)
|
|
1099
1274
|
|
|
1100
|
-
def _count_inneighbor_of(self,
|
|
1275
|
+
def _count_inneighbor_of(self, node_subset: Relationship):
|
|
1101
1276
|
"""
|
|
1102
1277
|
Create a _count_inneighbor relationship constrained to the subset of nodes
|
|
1103
|
-
in `
|
|
1278
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
1104
1279
|
specific to the callsite.
|
|
1105
1280
|
"""
|
|
1106
|
-
return self._create_count_inneighbor_relationship(
|
|
1281
|
+
return self._create_count_inneighbor_relationship(node_subset=node_subset)
|
|
1107
1282
|
|
|
1108
|
-
def _create_count_inneighbor_relationship(self, *,
|
|
1283
|
+
def _create_count_inneighbor_relationship(self, *, node_subset: Optional[Relationship]):
|
|
1109
1284
|
_count_inneighbor_rel = self._model.Relationship(f"{{dst:{self._NodeConceptStr}}} has inneighbor count {{count:Integer}}")
|
|
1110
1285
|
|
|
1111
1286
|
# Choose the appropriate inneighbor relationship based on whether we have constraints
|
|
1112
|
-
if
|
|
1287
|
+
if node_subset is None:
|
|
1113
1288
|
# No constraint - use cached inneighbor relationship
|
|
1114
1289
|
inneighbor_rel = self._inneighbor
|
|
1115
1290
|
else:
|
|
1116
1291
|
# Constrained to nodes in the subset - use constrained inneighbor relationship
|
|
1117
|
-
inneighbor_rel = self._inneighbor_of(
|
|
1292
|
+
inneighbor_rel = self._inneighbor_of(node_subset)
|
|
1118
1293
|
|
|
1119
1294
|
# Apply the same counting logic for both cases
|
|
1120
1295
|
dst, src = self.Node.ref(), self.Node.ref()
|
|
@@ -1125,26 +1300,26 @@ class Graph():
|
|
|
1125
1300
|
@cached_property
|
|
1126
1301
|
def _count_outneighbor(self):
|
|
1127
1302
|
"""Lazily define and cache the self._count_outneighbor relationship."""
|
|
1128
|
-
return self._create_count_outneighbor_relationship(
|
|
1303
|
+
return self._create_count_outneighbor_relationship(node_subset=None)
|
|
1129
1304
|
|
|
1130
|
-
def _count_outneighbor_of(self,
|
|
1305
|
+
def _count_outneighbor_of(self, node_subset: Relationship):
|
|
1131
1306
|
"""
|
|
1132
1307
|
Create a _count_outneighbor relationship constrained to the subset of nodes
|
|
1133
|
-
in `
|
|
1308
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
1134
1309
|
specific to the callsite.
|
|
1135
1310
|
"""
|
|
1136
|
-
return self._create_count_outneighbor_relationship(
|
|
1311
|
+
return self._create_count_outneighbor_relationship(node_subset=node_subset)
|
|
1137
1312
|
|
|
1138
|
-
def _create_count_outneighbor_relationship(self, *,
|
|
1313
|
+
def _create_count_outneighbor_relationship(self, *, node_subset: Optional[Relationship]):
|
|
1139
1314
|
_count_outneighbor_rel = self._model.Relationship(f"{{src:{self._NodeConceptStr}}} has outneighbor count {{count:Integer}}")
|
|
1140
1315
|
|
|
1141
1316
|
# Choose the appropriate outneighbor relationship based on whether we have constraints
|
|
1142
|
-
if
|
|
1317
|
+
if node_subset is None:
|
|
1143
1318
|
# No constraint - use cached outneighbor relationship
|
|
1144
1319
|
outneighbor_rel = self._outneighbor
|
|
1145
1320
|
else:
|
|
1146
1321
|
# Constrained to nodes in the subset - use constrained outneighbor relationship
|
|
1147
|
-
outneighbor_rel = self._outneighbor_of(
|
|
1322
|
+
outneighbor_rel = self._outneighbor_of(node_subset)
|
|
1148
1323
|
|
|
1149
1324
|
# Apply the same counting logic for both cases
|
|
1150
1325
|
src, dst = self.Node.ref(), self.Node.ref()
|
|
@@ -1437,56 +1612,27 @@ class Graph():
|
|
|
1437
1612
|
return self._neighbor
|
|
1438
1613
|
else:
|
|
1439
1614
|
# Validate the 'of' parameter
|
|
1440
|
-
self._validate_node_subset_parameter(of)
|
|
1615
|
+
self._validate_node_subset_parameter('of', of)
|
|
1441
1616
|
return self._neighbor_of(of)
|
|
1442
1617
|
|
|
1443
|
-
def _validate_node_subset_parameter(self, of_relation):
|
|
1444
|
-
"""
|
|
1445
|
-
Validate that a parameter identifying a subset of nodes of interest is
|
|
1446
|
-
is a unary relationship containing nodes that is attached to
|
|
1447
|
-
the same model that the graph is attached to.
|
|
1448
|
-
"""
|
|
1449
|
-
# Validate that the parameter is a relationship.
|
|
1450
|
-
assert isinstance(of_relation, Relationship), (
|
|
1451
|
-
"The 'of' parameter must be a `Relationship`, "
|
|
1452
|
-
f"but is a `{type(of_relation).__name__}`."
|
|
1453
|
-
)
|
|
1454
|
-
|
|
1455
|
-
# Validate that the relationship is attached to the same model as the graph.
|
|
1456
|
-
assert of_relation._model is self._model, (
|
|
1457
|
-
"The given 'of' relationship must be attached to the same model as the graph."
|
|
1458
|
-
)
|
|
1459
|
-
|
|
1460
|
-
# Validate that it's a unary relationship (has exactly one field).
|
|
1461
|
-
assert len(of_relation._fields) == 1, (
|
|
1462
|
-
"The 'of' parameter must be a unary relationship, "
|
|
1463
|
-
f"but it has {len(of_relation._fields)} fields."
|
|
1464
|
-
)
|
|
1465
|
-
|
|
1466
|
-
# Validate that the concept type matches the graph's Node concept.
|
|
1467
|
-
assert of_relation._fields[0].type_str == self.Node._name, (
|
|
1468
|
-
f"The 'of' relationship must be over the graph's Node concept ('{self.Node._name}'), "
|
|
1469
|
-
f"but is over '{of_relation._fields[0].type_str}'."
|
|
1470
|
-
)
|
|
1471
|
-
|
|
1472
1618
|
@cached_property
|
|
1473
1619
|
def _neighbor(self):
|
|
1474
1620
|
"""Lazily define and cache the self._neighbor relationship."""
|
|
1475
|
-
_neighbor_rel = self._create_neighbor_relationship(
|
|
1621
|
+
_neighbor_rel = self._create_neighbor_relationship(node_subset=None)
|
|
1476
1622
|
_neighbor_rel.annotate(annotations.track("graphs", "neighbor"))
|
|
1477
1623
|
return _neighbor_rel
|
|
1478
1624
|
|
|
1479
|
-
def _neighbor_of(self,
|
|
1625
|
+
def _neighbor_of(self, node_subset: Relationship):
|
|
1480
1626
|
"""
|
|
1481
1627
|
Create a neighbor relationship constrained to the subset of nodes
|
|
1482
|
-
in `
|
|
1628
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
1483
1629
|
specific to the callsite.
|
|
1484
1630
|
"""
|
|
1485
|
-
_neighbor_rel = self._create_neighbor_relationship(
|
|
1631
|
+
_neighbor_rel = self._create_neighbor_relationship(node_subset=node_subset)
|
|
1486
1632
|
_neighbor_rel.annotate(annotations.track("graphs", "neighbor_of"))
|
|
1487
1633
|
return _neighbor_rel
|
|
1488
1634
|
|
|
1489
|
-
def _create_neighbor_relationship(self, *,
|
|
1635
|
+
def _create_neighbor_relationship(self, *, node_subset: Optional[Relationship]):
|
|
1490
1636
|
_neighbor_rel = self._model.Relationship(f"{{src:{self._NodeConceptStr}}} has neighbor {{dst:{self._NodeConceptStr}}}")
|
|
1491
1637
|
src, dst = self.Node.ref(), self.Node.ref()
|
|
1492
1638
|
|
|
@@ -1497,14 +1643,14 @@ class Graph():
|
|
|
1497
1643
|
# Capture out-neighbors.
|
|
1498
1644
|
where(
|
|
1499
1645
|
self._edge(src, dst),
|
|
1500
|
-
*([
|
|
1646
|
+
*([node_subset(src)] if node_subset else [])
|
|
1501
1647
|
).define(
|
|
1502
1648
|
_neighbor_rel(src, dst)
|
|
1503
1649
|
)
|
|
1504
1650
|
# Capture in-neighbors.
|
|
1505
1651
|
where(
|
|
1506
1652
|
self._edge(src, dst),
|
|
1507
|
-
*([
|
|
1653
|
+
*([node_subset(dst)] if node_subset else [])
|
|
1508
1654
|
).define(
|
|
1509
1655
|
_neighbor_rel(dst, src)
|
|
1510
1656
|
)
|
|
@@ -1513,7 +1659,7 @@ class Graph():
|
|
|
1513
1659
|
# so a single rule suffices to capture all neighbors.
|
|
1514
1660
|
where(
|
|
1515
1661
|
self._edge(src, dst),
|
|
1516
|
-
*([
|
|
1662
|
+
*([node_subset(src)] if node_subset else [])
|
|
1517
1663
|
).define(
|
|
1518
1664
|
_neighbor_rel(src, dst)
|
|
1519
1665
|
)
|
|
@@ -1628,27 +1774,27 @@ class Graph():
|
|
|
1628
1774
|
return self._inneighbor
|
|
1629
1775
|
else:
|
|
1630
1776
|
# Validate the 'of' parameter
|
|
1631
|
-
self._validate_node_subset_parameter(of)
|
|
1777
|
+
self._validate_node_subset_parameter('of', of)
|
|
1632
1778
|
return self._inneighbor_of(of)
|
|
1633
1779
|
|
|
1634
1780
|
@cached_property
|
|
1635
1781
|
def _inneighbor(self):
|
|
1636
1782
|
"""Lazily define and cache the self._inneighbor relationship."""
|
|
1637
|
-
_inneighbor_rel = self._create_inneighbor_relationship(
|
|
1783
|
+
_inneighbor_rel = self._create_inneighbor_relationship(node_subset=None)
|
|
1638
1784
|
_inneighbor_rel.annotate(annotations.track("graphs", "inneighbor"))
|
|
1639
1785
|
return _inneighbor_rel
|
|
1640
1786
|
|
|
1641
|
-
def _inneighbor_of(self,
|
|
1787
|
+
def _inneighbor_of(self, node_subset: Relationship):
|
|
1642
1788
|
"""
|
|
1643
1789
|
Create an inneighbor relationship constrained to the subset of nodes
|
|
1644
|
-
in `
|
|
1790
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
1645
1791
|
specific to the callsite.
|
|
1646
1792
|
"""
|
|
1647
|
-
_inneighbor_rel = self._create_inneighbor_relationship(
|
|
1793
|
+
_inneighbor_rel = self._create_inneighbor_relationship(node_subset=node_subset)
|
|
1648
1794
|
_inneighbor_rel.annotate(annotations.track("graphs", "inneighbor_of"))
|
|
1649
1795
|
return _inneighbor_rel
|
|
1650
1796
|
|
|
1651
|
-
def _create_inneighbor_relationship(self, *,
|
|
1797
|
+
def _create_inneighbor_relationship(self, *, node_subset: Optional[Relationship]):
|
|
1652
1798
|
_inneighbor_rel = self._model.Relationship(f"{{dst:{self._NodeConceptStr}}} has inneighbor {{src:{self._NodeConceptStr}}}")
|
|
1653
1799
|
src, dst = self.Node.ref(), self.Node.ref()
|
|
1654
1800
|
|
|
@@ -1657,7 +1803,7 @@ class Graph():
|
|
|
1657
1803
|
# have an edge to the destination nodes in our subset.
|
|
1658
1804
|
where(
|
|
1659
1805
|
self._edge(src, dst),
|
|
1660
|
-
*([
|
|
1806
|
+
*([node_subset(dst)] if node_subset else [])
|
|
1661
1807
|
).define(
|
|
1662
1808
|
_inneighbor_rel(dst, src)
|
|
1663
1809
|
)
|
|
@@ -1666,7 +1812,7 @@ class Graph():
|
|
|
1666
1812
|
# so neighbors and in-neighbors are the same.
|
|
1667
1813
|
where(
|
|
1668
1814
|
self._edge(src, dst),
|
|
1669
|
-
*([
|
|
1815
|
+
*([node_subset(dst)] if node_subset else [])
|
|
1670
1816
|
).define(
|
|
1671
1817
|
_inneighbor_rel(dst, src)
|
|
1672
1818
|
)
|
|
@@ -1783,27 +1929,27 @@ class Graph():
|
|
|
1783
1929
|
return self._outneighbor
|
|
1784
1930
|
else:
|
|
1785
1931
|
# Validate the 'of' parameter
|
|
1786
|
-
self._validate_node_subset_parameter(of)
|
|
1932
|
+
self._validate_node_subset_parameter('of', of)
|
|
1787
1933
|
return self._outneighbor_of(of)
|
|
1788
1934
|
|
|
1789
1935
|
@cached_property
|
|
1790
1936
|
def _outneighbor(self):
|
|
1791
1937
|
"""Lazily define and cache the self._outneighbor relationship."""
|
|
1792
|
-
_outneighbor_rel = self._create_outneighbor_relationship(
|
|
1938
|
+
_outneighbor_rel = self._create_outneighbor_relationship(node_subset=None)
|
|
1793
1939
|
_outneighbor_rel.annotate(annotations.track("graphs", "outneighbor"))
|
|
1794
1940
|
return _outneighbor_rel
|
|
1795
1941
|
|
|
1796
|
-
def _outneighbor_of(self,
|
|
1942
|
+
def _outneighbor_of(self, node_subset: Relationship):
|
|
1797
1943
|
"""
|
|
1798
1944
|
Create an outneighbor relationship constrained to the subset of nodes
|
|
1799
|
-
in `
|
|
1945
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
1800
1946
|
specific to the callsite.
|
|
1801
1947
|
"""
|
|
1802
|
-
_outneighbor_rel = self._create_outneighbor_relationship(
|
|
1948
|
+
_outneighbor_rel = self._create_outneighbor_relationship(node_subset=node_subset)
|
|
1803
1949
|
_outneighbor_rel.annotate(annotations.track("graphs", "outneighbor_of"))
|
|
1804
1950
|
return _outneighbor_rel
|
|
1805
1951
|
|
|
1806
|
-
def _create_outneighbor_relationship(self, *,
|
|
1952
|
+
def _create_outneighbor_relationship(self, *, node_subset: Optional[Relationship]):
|
|
1807
1953
|
_outneighbor_rel = self._model.Relationship(f"{{src:{self._NodeConceptStr}}} has outneighbor {{dst:{self._NodeConceptStr}}}")
|
|
1808
1954
|
src, dst = self.Node.ref(), self.Node.ref()
|
|
1809
1955
|
|
|
@@ -1812,7 +1958,7 @@ class Graph():
|
|
|
1812
1958
|
# have an edge from the source nodes in our subset.
|
|
1813
1959
|
where(
|
|
1814
1960
|
self._edge(src, dst),
|
|
1815
|
-
*([
|
|
1961
|
+
*([node_subset(src)] if node_subset else [])
|
|
1816
1962
|
).define(
|
|
1817
1963
|
_outneighbor_rel(src, dst)
|
|
1818
1964
|
)
|
|
@@ -1821,7 +1967,7 @@ class Graph():
|
|
|
1821
1967
|
# so neighbors and out-neighbors are the same.
|
|
1822
1968
|
where(
|
|
1823
1969
|
self._edge(src, dst),
|
|
1824
|
-
*([
|
|
1970
|
+
*([node_subset(src)] if node_subset else [])
|
|
1825
1971
|
).define(
|
|
1826
1972
|
_outneighbor_rel(src, dst)
|
|
1827
1973
|
)
|
|
@@ -1830,18 +1976,67 @@ class Graph():
|
|
|
1830
1976
|
|
|
1831
1977
|
|
|
1832
1978
|
@include_in_docs
|
|
1833
|
-
def common_neighbor(self
|
|
1834
|
-
|
|
1979
|
+
def common_neighbor(self,
|
|
1980
|
+
*,
|
|
1981
|
+
full: Optional[bool] = None,
|
|
1982
|
+
from_: Optional[Relationship] = None,
|
|
1983
|
+
to: Optional[Relationship] = None,
|
|
1984
|
+
between: Optional[Relationship] = None,
|
|
1985
|
+
):
|
|
1986
|
+
"""Returns a ternary relationship of common neighbor triplets.
|
|
1835
1987
|
|
|
1836
1988
|
A node `w` is a common neighbor of a pair of nodes `u` and `v` if
|
|
1837
1989
|
`w` is a neighbor of both `u` and `v`.
|
|
1838
1990
|
|
|
1991
|
+
Parameters
|
|
1992
|
+
----------
|
|
1993
|
+
full : bool, optional
|
|
1994
|
+
If ``True``, computes common neighbors for all pairs of nodes in
|
|
1995
|
+
the graph. This computation can be expensive for large graphs, as the
|
|
1996
|
+
result can scale quadratically in the number of edges or cubically in
|
|
1997
|
+
the number of nodes. Mutually exclusive with other parameters.
|
|
1998
|
+
Default is ``None``.
|
|
1999
|
+
from_ : Relationship, optional
|
|
2000
|
+
A unary relationship containing a subset of the graph's nodes. When
|
|
2001
|
+
provided, constrains the domain of the common neighbor computation: only
|
|
2002
|
+
common neighbors of node pairs where the first node is in this relationship
|
|
2003
|
+
are computed and returned. Mutually exclusive with ``full`` and ``between``.
|
|
2004
|
+
Default is ``None``.
|
|
2005
|
+
to : Relationship, optional
|
|
2006
|
+
A unary relationship containing a subset of the graph's nodes. Can only
|
|
2007
|
+
be used together with the ``from_`` parameter. When provided with ``from_``,
|
|
2008
|
+
constrains the domain of the common neighbor computation: only common
|
|
2009
|
+
neighbors of node pairs where the first node is in ``from_`` and the
|
|
2010
|
+
second node is in ``to`` are computed and returned.
|
|
2011
|
+
Default is ``None``.
|
|
2012
|
+
between : Relationship, optional
|
|
2013
|
+
A binary relationship containing pairs of nodes. When provided,
|
|
2014
|
+
constrains the domain of the common neighbor computation: only common
|
|
2015
|
+
neighbors for the specific node pairs in this relationship are computed
|
|
2016
|
+
and returned. Mutually exclusive with other parameters.
|
|
2017
|
+
Default is ``None``.
|
|
2018
|
+
|
|
1839
2019
|
Returns
|
|
1840
2020
|
-------
|
|
1841
2021
|
Relationship
|
|
1842
2022
|
A ternary relationship where each tuple represents a pair of nodes
|
|
1843
2023
|
and one of their common neighbors.
|
|
1844
2024
|
|
|
2025
|
+
Raises
|
|
2026
|
+
------
|
|
2027
|
+
ValueError
|
|
2028
|
+
If ``full`` is provided with any other parameter.
|
|
2029
|
+
If ``between`` is provided with any other parameter.
|
|
2030
|
+
If ``from_`` is provided with any parameter other than ``to``.
|
|
2031
|
+
If none of ``full``, ``from_``, or ``between`` is provided.
|
|
2032
|
+
If ``full`` is not ``True`` or ``None``.
|
|
2033
|
+
AssertionError
|
|
2034
|
+
If ``from_``, ``to``, or ``between`` is not a ``Relationship``.
|
|
2035
|
+
If ``from_``, ``to``, or ``between`` is not attached to the same model as the graph.
|
|
2036
|
+
If ``from_``, ``to``, or ``between`` does not contain the graph's ``Node`` concept.
|
|
2037
|
+
If ``from_`` or ``to`` is not a unary relationship.
|
|
2038
|
+
If ``between`` is not a binary relationship.
|
|
2039
|
+
|
|
1845
2040
|
Relationship Schema
|
|
1846
2041
|
-------------------
|
|
1847
2042
|
``common_neighbor(node_u, node_v, common_neighbor_node)``
|
|
@@ -1858,6 +2053,37 @@ class Graph():
|
|
|
1858
2053
|
| Directed | Yes | |
|
|
1859
2054
|
| Weighted | Yes | Weights are ignored. |
|
|
1860
2055
|
|
|
2056
|
+
Notes
|
|
2057
|
+
-----
|
|
2058
|
+
The ``common_neighbor(full=True)`` method computes and caches the full common
|
|
2059
|
+
neighbor relationship for all pairs of nodes, providing efficient reuse across
|
|
2060
|
+
multiple calls. This can be expensive as the result can contain O(|E|²) or
|
|
2061
|
+
O(|V|³) tuples depending on graph density.
|
|
2062
|
+
|
|
2063
|
+
Calling ``common_neighbor()`` without arguments raises a ``ValueError``,
|
|
2064
|
+
to ensure awareness and explicit acknowledgement (``full=True``) of this cost.
|
|
2065
|
+
|
|
2066
|
+
In contrast, ``common_neighbor(from_=subset)`` constrains the computation to
|
|
2067
|
+
tuples with the first position in the passed-in ``subset``. The result is
|
|
2068
|
+
not cached; it is specific to the call site. When a significant fraction of
|
|
2069
|
+
the common neighbor relation is needed across a program, ``common_neighbor(full=True)``
|
|
2070
|
+
is typically more efficient. Use ``common_neighbor(from_=subset)`` only
|
|
2071
|
+
when small subsets of the common neighbor relationship are needed
|
|
2072
|
+
collectively across the program.
|
|
2073
|
+
|
|
2074
|
+
The ``to`` parameter can be used together with ``from_`` to further
|
|
2075
|
+
constrain the computation: ``common_neighbor(from_=subset_a, to=subset_b)``
|
|
2076
|
+
computes common neighbors only for node pairs where the first node is in
|
|
2077
|
+
``subset_a`` and the second node is in ``subset_b``. (Since ``common_neighbor``
|
|
2078
|
+
is symmetric in its first two positions, using ``to`` without ``from_`` would
|
|
2079
|
+
be functionally redundant, and is not allowed.)
|
|
2080
|
+
|
|
2081
|
+
The ``between`` parameter provides another way to constrain the computation:
|
|
2082
|
+
Unlike ``from_`` and ``to``, which allow you to independently constrain the first
|
|
2083
|
+
and second positions in ``common_neighbor`` tuples to sets of nodes, ``between``
|
|
2084
|
+
allows you to constrain the first and second positions, jointly, to specific pairs
|
|
2085
|
+
of nodes.
|
|
2086
|
+
|
|
1861
2087
|
Examples
|
|
1862
2088
|
--------
|
|
1863
2089
|
>>> from relationalai.semantics import Model, define, select
|
|
@@ -1881,7 +2107,7 @@ class Graph():
|
|
|
1881
2107
|
>>>
|
|
1882
2108
|
>>> # 3. Select the IDs from the common_neighbor relationship and inspect
|
|
1883
2109
|
>>> u, v, w = Node.ref("u"), Node.ref("v"), Node.ref("w")
|
|
1884
|
-
>>> common_neighbor = graph.common_neighbor()
|
|
2110
|
+
>>> common_neighbor = graph.common_neighbor(full=True)
|
|
1885
2111
|
>>> select(
|
|
1886
2112
|
... u.id, v.id, w.id
|
|
1887
2113
|
... ).where(
|
|
@@ -1913,27 +2139,230 @@ class Graph():
|
|
|
1913
2139
|
21 4 4 2
|
|
1914
2140
|
22 4 4 3
|
|
1915
2141
|
|
|
2142
|
+
>>> # 4. Use 'from_' parameter to constrain the set of nodes to compute common neighbors for
|
|
2143
|
+
>>> # Define a subset containing only node 1
|
|
2144
|
+
>>> subset = model.Relationship(f"{{node:{Node}}} is in subset")
|
|
2145
|
+
>>> node = Node.ref()
|
|
2146
|
+
>>> where(node.id == 1).define(subset(node))
|
|
2147
|
+
>>>
|
|
2148
|
+
>>> # Get common neighbors only for pairs where first node is in subset
|
|
2149
|
+
>>> constrained_common_neighbor = graph.common_neighbor(from_=subset)
|
|
2150
|
+
>>> select(u.id, v.id, w.id).where(constrained_common_neighbor(u, v, w)).inspect()
|
|
2151
|
+
▰▰▰▰ Setup complete
|
|
2152
|
+
id id2 id3
|
|
2153
|
+
0 1 1 2
|
|
2154
|
+
1 1 3 2
|
|
2155
|
+
2 1 4 2
|
|
2156
|
+
|
|
2157
|
+
>>> # 5. Use both 'from_' and 'to' parameters to constrain the first two positions
|
|
2158
|
+
>>> subset_a = model.Relationship(f"{{node:{Node}}} is in subset_a")
|
|
2159
|
+
>>> subset_b = model.Relationship(f"{{node:{Node}}} is in subset_b")
|
|
2160
|
+
>>> where(node.id == 1).define(subset_a(node))
|
|
2161
|
+
>>> where(node.id == 3).define(subset_b(node))
|
|
2162
|
+
>>>
|
|
2163
|
+
>>> # Get common neighbors only where the first node is in subset_a and the second node is in subset_b
|
|
2164
|
+
>>> constrained_common_neighbor = graph.common_neighbor(from_=subset_a, to=subset_b)
|
|
2165
|
+
>>> select(u.id, v.id, w.id).where(constrained_common_neighbor(u, v, w)).inspect()
|
|
2166
|
+
▰▰▰▰ Setup complete
|
|
2167
|
+
id id2 id3
|
|
2168
|
+
0 1 3 2
|
|
2169
|
+
|
|
2170
|
+
>>> # 6. Use 'between' parameter to constrain to specific pairs of nodes
|
|
2171
|
+
>>> pairs = model.Relationship(f"{{node_a:{Node}}} and {{node_b:{Node}}} are a pair")
|
|
2172
|
+
>>> node_a, node_b = Node.ref(), Node.ref()
|
|
2173
|
+
>>> where(node_a.id == 1, node_b.id == 3).define(pairs(node_a, node_b))
|
|
2174
|
+
>>> where(node_a.id == 2, node_b.id == 4).define(pairs(node_a, node_b))
|
|
2175
|
+
>>>
|
|
2176
|
+
>>> # Get common neighbors only for the specific pairs (1, 3) and (2, 4)
|
|
2177
|
+
>>> constrained_common_neighbor = graph.common_neighbor(between=pairs)
|
|
2178
|
+
>>> select(u.id, v.id, w.id).where(constrained_common_neighbor(u, v, w)).inspect()
|
|
2179
|
+
▰▰▰▰ Setup complete
|
|
2180
|
+
id id2 id3
|
|
2181
|
+
0 1 3 2
|
|
2182
|
+
1 2 4 3
|
|
2183
|
+
|
|
1916
2184
|
"""
|
|
1917
|
-
|
|
1918
|
-
|
|
1919
|
-
|
|
1920
|
-
"for all pairs of nodes in the graph. To provide better control "
|
|
1921
|
-
"over the computed subset, `common_neighbor`'s interface "
|
|
1922
|
-
"will soon need to change."
|
|
1923
|
-
),
|
|
1924
|
-
FutureWarning,
|
|
1925
|
-
stacklevel=2
|
|
2185
|
+
# Validate domain constraint parameters.
|
|
2186
|
+
self._validate_domain_constraint_parameters(
|
|
2187
|
+
'common_neighbor', full, from_, to, between
|
|
1926
2188
|
)
|
|
2189
|
+
|
|
2190
|
+
# At this point, exactly one of `full`, `from_`, or `between`
|
|
2191
|
+
# has been provided, and if `to` is provided, `from_` is also provided.
|
|
2192
|
+
|
|
2193
|
+
# Handle `between`.
|
|
2194
|
+
if between is not None:
|
|
2195
|
+
self._validate_pair_subset_parameter(between)
|
|
2196
|
+
return self._common_neighbor_between(between)
|
|
2197
|
+
|
|
2198
|
+
# Handle `from_` (and potentially `to`).
|
|
2199
|
+
if from_ is not None:
|
|
2200
|
+
self._validate_node_subset_parameter('from_', from_)
|
|
2201
|
+
if to is not None:
|
|
2202
|
+
self._validate_node_subset_parameter('to', to)
|
|
2203
|
+
return self._common_neighbor_from_to(from_, to)
|
|
2204
|
+
return self._common_neighbor_from(from_)
|
|
2205
|
+
|
|
2206
|
+
# Handle `full`.
|
|
1927
2207
|
return self._common_neighbor
|
|
1928
2208
|
|
|
1929
2209
|
@cached_property
|
|
1930
2210
|
def _common_neighbor(self):
|
|
1931
|
-
"""Lazily define and cache the
|
|
1932
|
-
_common_neighbor_rel = self.
|
|
2211
|
+
"""Lazily define and cache the full common_neighbor relationship."""
|
|
2212
|
+
_common_neighbor_rel = self._create_common_neighbor_relationship()
|
|
1933
2213
|
_common_neighbor_rel.annotate(annotations.track("graphs", "common_neighbor"))
|
|
2214
|
+
return _common_neighbor_rel
|
|
2215
|
+
|
|
2216
|
+
def _common_neighbor_from(self, node_subset_from: Relationship):
|
|
2217
|
+
"""
|
|
2218
|
+
Create a common_neighbor relationship, with the first position in each
|
|
2219
|
+
tuple constrained to be in the given subset of nodes. Note this relationship
|
|
2220
|
+
is not cached; it is specific to the callsite.
|
|
2221
|
+
"""
|
|
2222
|
+
_common_neighbor_rel = self._create_common_neighbor_relationship(
|
|
2223
|
+
node_subset_from=node_subset_from
|
|
2224
|
+
)
|
|
2225
|
+
_common_neighbor_rel.annotate(annotations.track("graphs", "common_neighbor_from"))
|
|
2226
|
+
return _common_neighbor_rel
|
|
2227
|
+
|
|
2228
|
+
def _common_neighbor_from_to(self, node_subset_from: Relationship, node_subset_to: Relationship):
|
|
2229
|
+
"""
|
|
2230
|
+
Create a common_neighbor relationship, with the first position in each
|
|
2231
|
+
tuple constrained to be in `node_subset_from`, and the second position in
|
|
2232
|
+
each tuple constrained to be in `node_subset_to`. Note this relationship
|
|
2233
|
+
is not cached; it is specific to the callsite.
|
|
2234
|
+
"""
|
|
2235
|
+
_common_neighbor_rel = self._create_common_neighbor_relationship(
|
|
2236
|
+
node_subset_from=node_subset_from,
|
|
2237
|
+
node_subset_to=node_subset_to
|
|
2238
|
+
)
|
|
2239
|
+
_common_neighbor_rel.annotate(annotations.track("graphs", "common_neighbor_from_to"))
|
|
2240
|
+
return _common_neighbor_rel
|
|
2241
|
+
|
|
2242
|
+
def _common_neighbor_between(self, pair_subset: Relationship):
|
|
2243
|
+
"""
|
|
2244
|
+
Create a common_neighbor relationship, with the first and second position
|
|
2245
|
+
in each tuple jointly constrained to be in the given set of pairs
|
|
2246
|
+
of nodes. Note this relationship is not cached;
|
|
2247
|
+
it is specific to the callsite.
|
|
2248
|
+
"""
|
|
2249
|
+
_common_neighbor_rel = self._create_common_neighbor_relationship(
|
|
2250
|
+
pair_subset_between=pair_subset
|
|
2251
|
+
)
|
|
2252
|
+
_common_neighbor_rel.annotate(annotations.track("graphs", "common_neighbor_between"))
|
|
2253
|
+
return _common_neighbor_rel
|
|
2254
|
+
|
|
2255
|
+
def _create_common_neighbor_relationship(
|
|
2256
|
+
self,
|
|
2257
|
+
*,
|
|
2258
|
+
node_subset_from: Optional[Relationship] = None,
|
|
2259
|
+
node_subset_to: Optional[Relationship] = None,
|
|
2260
|
+
pair_subset_between: Optional[Relationship] = None,
|
|
2261
|
+
):
|
|
2262
|
+
"""
|
|
2263
|
+
Create common_neighbor relationship, optionally constrained by the provided
|
|
2264
|
+
node subsets or pair subset.
|
|
2265
|
+
"""
|
|
2266
|
+
_common_neighbor_rel = self._model.Relationship(
|
|
2267
|
+
f"{{node_a:{self._NodeConceptStr}}} and {{node_b:{self._NodeConceptStr}}} "
|
|
2268
|
+
f"have common neighbor {{neighbor_node:{self._NodeConceptStr}}}"
|
|
2269
|
+
)
|
|
2270
|
+
node_a, node_b, neighbor_node = self.Node.ref(), self.Node.ref(), self.Node.ref()
|
|
2271
|
+
|
|
2272
|
+
# Handle the `between` case.
|
|
2273
|
+
if pair_subset_between is not None:
|
|
2274
|
+
# Extract all nodes that appear in any position of the pairs relationship
|
|
2275
|
+
# into a unary relation that we can use to constrain the neighbor computation.
|
|
2276
|
+
nodes_in_pairs = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} is in pairs subset")
|
|
2277
|
+
node_x, node_y = self.Node.ref(), self.Node.ref()
|
|
2278
|
+
where(
|
|
2279
|
+
pair_subset_between(node_x, node_y)
|
|
2280
|
+
).define(
|
|
2281
|
+
nodes_in_pairs(node_x),
|
|
2282
|
+
nodes_in_pairs(node_y)
|
|
2283
|
+
)
|
|
2284
|
+
|
|
2285
|
+
# Create a neighbor relation constrained to the nodes that appear in the pairs.
|
|
2286
|
+
neighbor_rel = self._neighbor_of(nodes_in_pairs)
|
|
2287
|
+
neighbor_a_rel = neighbor_rel
|
|
2288
|
+
neighbor_b_rel = neighbor_rel
|
|
2289
|
+
|
|
2290
|
+
# The constraint fragment ensures we only compute common neighbors for the
|
|
2291
|
+
# specific pairs provided, not for all combinations of nodes in those pairs.
|
|
2292
|
+
node_constraint = [pair_subset_between(node_a, node_b)]
|
|
2293
|
+
|
|
2294
|
+
# Handle the `from_` case.
|
|
2295
|
+
elif node_subset_from is not None and node_subset_to is None:
|
|
2296
|
+
# Note that in this case we must compute all of `_neighbor` anyway,
|
|
2297
|
+
# as the second position in each tuple is unconstrained. Given that,
|
|
2298
|
+
# computing `_neighbor_of` for `node_subset_from` to constrain the
|
|
2299
|
+
# first position that way would be less efficient than using
|
|
2300
|
+
# `_neighbor` and joining the relevant variable with `node_subset_from`.
|
|
2301
|
+
neighbor_a_rel = self._neighbor
|
|
2302
|
+
neighbor_b_rel = self._neighbor
|
|
2303
|
+
node_constraint = [node_subset_from(node_a)]
|
|
2304
|
+
# TODO: Nice observation from @rygao: We can instead implement this
|
|
2305
|
+
# as a depth-2 traversal starting from `node_subset_from`. Candidate code:
|
|
2306
|
+
|
|
2307
|
+
# neighbor_a_rel = self._neighbor_of(node_subset_from)
|
|
2308
|
+
#
|
|
2309
|
+
# domain_w = Relationship(f"{{node:{self._NodeConceptStr}}} is the domain of `w` in `common_neighbor(u, v, w)`")
|
|
2310
|
+
# node_x, node_y = graph.Node.ref(), graph.Node.ref()
|
|
2311
|
+
# where(neighbor_a_rel(node_x, node_y)).define(domain_w(node_y))
|
|
2312
|
+
# neighbor_b_rel = self._neighbor_of(domain_w)
|
|
2313
|
+
#
|
|
2314
|
+
# node_constraint = []
|
|
2315
|
+
#
|
|
2316
|
+
# # need to reverse the args of `neighbor_b_rel()`, due to its domain constraint
|
|
2317
|
+
# # relies on the symmetry of `neighbor`
|
|
2318
|
+
# where(
|
|
2319
|
+
# *node_constraint,
|
|
2320
|
+
# neighbor_a_rel(node_a, neighbor_node),
|
|
2321
|
+
# neighbor_b_rel(neighbor_node, node_b)
|
|
2322
|
+
# ).define(_common_neighbor_rel(node_a, node_b, neighbor_node))
|
|
2323
|
+
|
|
2324
|
+
# Handle the `from_`/`to` case.
|
|
2325
|
+
elif node_subset_from is not None and node_subset_to is not None:
|
|
2326
|
+
# There are two cases:
|
|
2327
|
+
#
|
|
2328
|
+
# NOTE: For both of the following branches, spiritually we are applying
|
|
2329
|
+
# `node_constraint = [node_subset_from(node_a), node_subset_to(node_b)]`,
|
|
2330
|
+
# but these are already enforced by the use of the constrained
|
|
2331
|
+
# `_neighbor_of` relationships, so we don't need to include them
|
|
2332
|
+
# again in `node_constraint`.
|
|
2333
|
+
if node_subset_from is node_subset_to:
|
|
2334
|
+
# If `node_subset_from` and `node_subset_to` are object-identical,
|
|
2335
|
+
# we can compute `_neighbor_of` once, use it for both positions,
|
|
2336
|
+
# and apply no further constraint.
|
|
2337
|
+
neighbor_rel = self._neighbor_of(node_subset_from)
|
|
2338
|
+
neighbor_a_rel = neighbor_rel
|
|
2339
|
+
neighbor_b_rel = neighbor_rel
|
|
2340
|
+
node_constraint = []
|
|
2341
|
+
else:
|
|
2342
|
+
# Otherwise, we have two options: 1) compute `_neighbor_of` twice,
|
|
2343
|
+
# once for each node subset; or 2) compute `_neighbor` once, over
|
|
2344
|
+
# the union of both subsets, and apply constraints to each position.
|
|
2345
|
+
# Which of these is more efficient depends on the detailed nature
|
|
2346
|
+
# of the subsets, which we don't have knowledge of here. Here
|
|
2347
|
+
# we choose the simpler/cleaner of the two options (1) as such:
|
|
2348
|
+
neighbor_a_rel = self._neighbor_of(node_subset_from)
|
|
2349
|
+
neighbor_b_rel = self._neighbor_of(node_subset_to)
|
|
2350
|
+
node_constraint = []
|
|
2351
|
+
|
|
2352
|
+
# Handle the `full` case.
|
|
2353
|
+
else:
|
|
2354
|
+
neighbor_a_rel = self._neighbor
|
|
2355
|
+
neighbor_b_rel = self._neighbor
|
|
2356
|
+
node_constraint = []
|
|
2357
|
+
|
|
2358
|
+
# Define the common neighbor relationship using the neighbor relations and
|
|
2359
|
+
# constraints determined above. This logic is shared across all constraint types.
|
|
2360
|
+
where(
|
|
2361
|
+
*node_constraint,
|
|
2362
|
+
neighbor_a_rel(node_a, neighbor_node),
|
|
2363
|
+
neighbor_b_rel(node_b, neighbor_node)
|
|
2364
|
+
).define(_common_neighbor_rel(node_a, node_b, neighbor_node))
|
|
1934
2365
|
|
|
1935
|
-
node_a, node_b, node_c = self.Node.ref(), self.Node.ref(), self.Node.ref()
|
|
1936
|
-
where(self._neighbor(node_a, node_c), self._neighbor(node_b, node_c)).define(_common_neighbor_rel(node_a, node_b, node_c))
|
|
1937
2366
|
return _common_neighbor_rel
|
|
1938
2367
|
|
|
1939
2368
|
|
|
@@ -2084,37 +2513,37 @@ class Graph():
|
|
|
2084
2513
|
return self._degree
|
|
2085
2514
|
else:
|
|
2086
2515
|
# Validate the 'of' parameter
|
|
2087
|
-
self._validate_node_subset_parameter(of)
|
|
2516
|
+
self._validate_node_subset_parameter('of', of)
|
|
2088
2517
|
return self._degree_of(of)
|
|
2089
2518
|
|
|
2090
2519
|
@cached_property
|
|
2091
2520
|
def _degree(self):
|
|
2092
2521
|
"""Lazily define and cache the self._degree relationship."""
|
|
2093
|
-
_degree_rel = self._create_degree_relationship(
|
|
2522
|
+
_degree_rel = self._create_degree_relationship(node_subset=None)
|
|
2094
2523
|
_degree_rel.annotate(annotations.track("graphs", "degree"))
|
|
2095
2524
|
return _degree_rel
|
|
2096
2525
|
|
|
2097
|
-
def _degree_of(self,
|
|
2526
|
+
def _degree_of(self, node_subset: Relationship):
|
|
2098
2527
|
"""
|
|
2099
2528
|
Create a degree relationship constrained to the subset of nodes
|
|
2100
|
-
in `
|
|
2529
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
2101
2530
|
specific to the callsite.
|
|
2102
2531
|
"""
|
|
2103
|
-
_degree_rel = self._create_degree_relationship(
|
|
2532
|
+
_degree_rel = self._create_degree_relationship(node_subset=node_subset)
|
|
2104
2533
|
_degree_rel.annotate(annotations.track("graphs", "degree_of"))
|
|
2105
2534
|
return _degree_rel
|
|
2106
2535
|
|
|
2107
|
-
def _create_degree_relationship(self, *,
|
|
2536
|
+
def _create_degree_relationship(self, *, node_subset: Optional[Relationship]):
|
|
2108
2537
|
_degree_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has degree {{count:Integer}}")
|
|
2109
2538
|
|
|
2110
2539
|
if self.directed:
|
|
2111
2540
|
# For directed graphs, degree is the sum of indegree and outdegree.
|
|
2112
|
-
if
|
|
2541
|
+
if node_subset is None:
|
|
2113
2542
|
indegree_rel = self._indegree
|
|
2114
2543
|
outdegree_rel = self._outdegree
|
|
2115
2544
|
else:
|
|
2116
|
-
indegree_rel = self._indegree_of(
|
|
2117
|
-
outdegree_rel = self._outdegree_of(
|
|
2545
|
+
indegree_rel = self._indegree_of(node_subset)
|
|
2546
|
+
outdegree_rel = self._outdegree_of(node_subset)
|
|
2118
2547
|
|
|
2119
2548
|
incount, outcount = Integer.ref(), Integer.ref()
|
|
2120
2549
|
where(
|
|
@@ -2123,12 +2552,12 @@ class Graph():
|
|
|
2123
2552
|
).define(_degree_rel(self.Node, incount + outcount))
|
|
2124
2553
|
else:
|
|
2125
2554
|
# For undirected graphs, degree is the count of neighbors.
|
|
2126
|
-
if
|
|
2555
|
+
if node_subset is None:
|
|
2127
2556
|
node_set = self.Node
|
|
2128
2557
|
count_neighbor_rel = self._count_neighbor
|
|
2129
2558
|
else:
|
|
2130
|
-
node_set =
|
|
2131
|
-
count_neighbor_rel = self._count_neighbor_of(
|
|
2559
|
+
node_set = node_subset
|
|
2560
|
+
count_neighbor_rel = self._count_neighbor_of(node_subset)
|
|
2132
2561
|
|
|
2133
2562
|
where(
|
|
2134
2563
|
node_set(self.Node), # Necessary given the match on the following line.
|
|
@@ -2279,38 +2708,38 @@ class Graph():
|
|
|
2279
2708
|
return self._indegree
|
|
2280
2709
|
else:
|
|
2281
2710
|
# Validate the 'of' parameter
|
|
2282
|
-
self._validate_node_subset_parameter(of)
|
|
2711
|
+
self._validate_node_subset_parameter('of', of)
|
|
2283
2712
|
return self._indegree_of(of)
|
|
2284
2713
|
|
|
2285
2714
|
@cached_property
|
|
2286
2715
|
def _indegree(self):
|
|
2287
2716
|
"""Lazily define and cache the self._indegree relationship."""
|
|
2288
|
-
_indegree_rel = self._create_indegree_relationship(
|
|
2717
|
+
_indegree_rel = self._create_indegree_relationship(node_subset=None)
|
|
2289
2718
|
_indegree_rel.annotate(annotations.track("graphs", "indegree"))
|
|
2290
2719
|
return _indegree_rel
|
|
2291
2720
|
|
|
2292
|
-
def _indegree_of(self,
|
|
2721
|
+
def _indegree_of(self, node_subset: Relationship):
|
|
2293
2722
|
"""
|
|
2294
2723
|
Create an indegree relationship constrained to the subset of nodes
|
|
2295
|
-
in `
|
|
2724
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
2296
2725
|
specific to the callsite.
|
|
2297
2726
|
"""
|
|
2298
|
-
_indegree_rel = self._create_indegree_relationship(
|
|
2727
|
+
_indegree_rel = self._create_indegree_relationship(node_subset=node_subset)
|
|
2299
2728
|
_indegree_rel.annotate(annotations.track("graphs", "indegree_of"))
|
|
2300
2729
|
return _indegree_rel
|
|
2301
2730
|
|
|
2302
|
-
def _create_indegree_relationship(self, *,
|
|
2731
|
+
def _create_indegree_relationship(self, *, node_subset: Optional[Relationship]):
|
|
2303
2732
|
_indegree_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has indegree {{count:Integer}}")
|
|
2304
2733
|
|
|
2305
2734
|
# Choose the appropriate count_inneighbor relationship and node set
|
|
2306
|
-
if
|
|
2735
|
+
if node_subset is None:
|
|
2307
2736
|
# No constraint - use cached count_inneighbor relationship and all nodes
|
|
2308
2737
|
count_inneighbor_rel = self._count_inneighbor
|
|
2309
2738
|
node_set = self.Node
|
|
2310
2739
|
else:
|
|
2311
2740
|
# Constrained to nodes in the subset - use constrained count_inneighbor relationship
|
|
2312
|
-
count_inneighbor_rel = self._count_inneighbor_of(
|
|
2313
|
-
node_set =
|
|
2741
|
+
count_inneighbor_rel = self._count_inneighbor_of(node_subset)
|
|
2742
|
+
node_set = node_subset
|
|
2314
2743
|
|
|
2315
2744
|
# Apply the same indegree logic for both cases
|
|
2316
2745
|
where(
|
|
@@ -2463,38 +2892,38 @@ class Graph():
|
|
|
2463
2892
|
return self._outdegree
|
|
2464
2893
|
else:
|
|
2465
2894
|
# Validate the 'of' parameter
|
|
2466
|
-
self._validate_node_subset_parameter(of)
|
|
2895
|
+
self._validate_node_subset_parameter('of', of)
|
|
2467
2896
|
return self._outdegree_of(of)
|
|
2468
2897
|
|
|
2469
2898
|
@cached_property
|
|
2470
2899
|
def _outdegree(self):
|
|
2471
2900
|
"""Lazily define and cache the self._outdegree relationship."""
|
|
2472
|
-
_outdegree_rel = self._create_outdegree_relationship(
|
|
2901
|
+
_outdegree_rel = self._create_outdegree_relationship(node_subset=None)
|
|
2473
2902
|
_outdegree_rel.annotate(annotations.track("graphs", "outdegree"))
|
|
2474
2903
|
return _outdegree_rel
|
|
2475
2904
|
|
|
2476
|
-
def _outdegree_of(self,
|
|
2905
|
+
def _outdegree_of(self, node_subset: Relationship):
|
|
2477
2906
|
"""
|
|
2478
2907
|
Create an outdegree relationship constrained to the subset of nodes
|
|
2479
|
-
in `
|
|
2908
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
2480
2909
|
specific to the callsite.
|
|
2481
2910
|
"""
|
|
2482
|
-
_outdegree_rel = self._create_outdegree_relationship(
|
|
2911
|
+
_outdegree_rel = self._create_outdegree_relationship(node_subset=node_subset)
|
|
2483
2912
|
_outdegree_rel.annotate(annotations.track("graphs", "outdegree_of"))
|
|
2484
2913
|
return _outdegree_rel
|
|
2485
2914
|
|
|
2486
|
-
def _create_outdegree_relationship(self, *,
|
|
2915
|
+
def _create_outdegree_relationship(self, *, node_subset: Optional[Relationship]):
|
|
2487
2916
|
_outdegree_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has outdegree {{count:Integer}}")
|
|
2488
2917
|
|
|
2489
2918
|
# Choose the appropriate count_outneighbor relationship and node set
|
|
2490
|
-
if
|
|
2919
|
+
if node_subset is None:
|
|
2491
2920
|
# No constraint - use cached count_outneighbor relationship and all nodes
|
|
2492
2921
|
count_outneighbor_rel = self._count_outneighbor
|
|
2493
2922
|
node_set = self.Node
|
|
2494
2923
|
else:
|
|
2495
2924
|
# Constrained to nodes in the subset - use constrained count_outneighbor relationship
|
|
2496
|
-
count_outneighbor_rel = self._count_outneighbor_of(
|
|
2497
|
-
node_set =
|
|
2925
|
+
count_outneighbor_rel = self._count_outneighbor_of(node_subset)
|
|
2926
|
+
node_set = node_subset
|
|
2498
2927
|
|
|
2499
2928
|
# Apply the same outdegree logic for both cases
|
|
2500
2929
|
where(
|
|
@@ -2612,37 +3041,37 @@ class Graph():
|
|
|
2612
3041
|
return self._weighted_degree
|
|
2613
3042
|
else:
|
|
2614
3043
|
# Validate the 'of' parameter
|
|
2615
|
-
self._validate_node_subset_parameter(of)
|
|
3044
|
+
self._validate_node_subset_parameter('of', of)
|
|
2616
3045
|
return self._weighted_degree_of(of)
|
|
2617
3046
|
|
|
2618
3047
|
@cached_property
|
|
2619
3048
|
def _weighted_degree(self):
|
|
2620
3049
|
"""Lazily define and cache the self._weighted_degree relationship."""
|
|
2621
|
-
_weighted_degree_rel = self._create_weighted_degree_relationship(
|
|
3050
|
+
_weighted_degree_rel = self._create_weighted_degree_relationship(node_subset=None)
|
|
2622
3051
|
_weighted_degree_rel.annotate(annotations.track("graphs", "weighted_degree"))
|
|
2623
3052
|
return _weighted_degree_rel
|
|
2624
3053
|
|
|
2625
|
-
def _weighted_degree_of(self,
|
|
3054
|
+
def _weighted_degree_of(self, node_subset: Relationship):
|
|
2626
3055
|
"""
|
|
2627
3056
|
Create a weighted degree relationship constrained to the subset of nodes
|
|
2628
|
-
in `
|
|
3057
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
2629
3058
|
specific to the callsite.
|
|
2630
3059
|
"""
|
|
2631
|
-
_weighted_degree_rel = self._create_weighted_degree_relationship(
|
|
3060
|
+
_weighted_degree_rel = self._create_weighted_degree_relationship(node_subset=node_subset)
|
|
2632
3061
|
_weighted_degree_rel.annotate(annotations.track("graphs", "weighted_degree_of"))
|
|
2633
3062
|
return _weighted_degree_rel
|
|
2634
3063
|
|
|
2635
|
-
def _create_weighted_degree_relationship(self, *,
|
|
3064
|
+
def _create_weighted_degree_relationship(self, *, node_subset: Optional[Relationship]):
|
|
2636
3065
|
_weighted_degree_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has weighted degree {{weight:Float}}")
|
|
2637
3066
|
|
|
2638
3067
|
if self.directed:
|
|
2639
3068
|
# For directed graphs, weighted degree is the sum of weighted indegree and weighted outdegree.
|
|
2640
|
-
if
|
|
3069
|
+
if node_subset is None:
|
|
2641
3070
|
weighted_indegree_rel = self._weighted_indegree
|
|
2642
3071
|
weighted_outdegree_rel = self._weighted_outdegree
|
|
2643
3072
|
else:
|
|
2644
|
-
weighted_indegree_rel = self._weighted_indegree_of(
|
|
2645
|
-
weighted_outdegree_rel = self._weighted_outdegree_of(
|
|
3073
|
+
weighted_indegree_rel = self._weighted_indegree_of(node_subset)
|
|
3074
|
+
weighted_outdegree_rel = self._weighted_outdegree_of(node_subset)
|
|
2646
3075
|
|
|
2647
3076
|
inweight, outweight = Float.ref(), Float.ref()
|
|
2648
3077
|
where(
|
|
@@ -2651,12 +3080,12 @@ class Graph():
|
|
|
2651
3080
|
).define(_weighted_degree_rel(self.Node, inweight + outweight))
|
|
2652
3081
|
elif not self.directed:
|
|
2653
3082
|
# Choose the appropriate node set
|
|
2654
|
-
if
|
|
3083
|
+
if node_subset is None:
|
|
2655
3084
|
# No constraint - use all nodes
|
|
2656
3085
|
node_set = self.Node
|
|
2657
3086
|
else:
|
|
2658
3087
|
# Constrained to nodes in the subset
|
|
2659
|
-
node_set =
|
|
3088
|
+
node_set = node_subset
|
|
2660
3089
|
|
|
2661
3090
|
dst, weight = self.Node.ref(), Float.ref()
|
|
2662
3091
|
where(
|
|
@@ -2772,36 +3201,36 @@ class Graph():
|
|
|
2772
3201
|
return self._weighted_indegree
|
|
2773
3202
|
else:
|
|
2774
3203
|
# Validate the 'of' parameter
|
|
2775
|
-
self._validate_node_subset_parameter(of)
|
|
3204
|
+
self._validate_node_subset_parameter('of', of)
|
|
2776
3205
|
return self._weighted_indegree_of(of)
|
|
2777
3206
|
|
|
2778
3207
|
@cached_property
|
|
2779
3208
|
def _weighted_indegree(self):
|
|
2780
3209
|
"""Lazily define and cache the self._weighted_indegree relationship."""
|
|
2781
|
-
_weighted_indegree_rel = self._create_weighted_indegree_relationship(
|
|
3210
|
+
_weighted_indegree_rel = self._create_weighted_indegree_relationship(node_subset=None)
|
|
2782
3211
|
_weighted_indegree_rel.annotate(annotations.track("graphs", "weighted_indegree"))
|
|
2783
3212
|
return _weighted_indegree_rel
|
|
2784
3213
|
|
|
2785
|
-
def _weighted_indegree_of(self,
|
|
3214
|
+
def _weighted_indegree_of(self, node_subset: Relationship):
|
|
2786
3215
|
"""
|
|
2787
3216
|
Create a weighted indegree relationship constrained to the subset of nodes
|
|
2788
|
-
in `
|
|
3217
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
2789
3218
|
specific to the callsite.
|
|
2790
3219
|
"""
|
|
2791
|
-
_weighted_indegree_rel = self._create_weighted_indegree_relationship(
|
|
3220
|
+
_weighted_indegree_rel = self._create_weighted_indegree_relationship(node_subset=node_subset)
|
|
2792
3221
|
_weighted_indegree_rel.annotate(annotations.track("graphs", "weighted_indegree_of"))
|
|
2793
3222
|
return _weighted_indegree_rel
|
|
2794
3223
|
|
|
2795
|
-
def _create_weighted_indegree_relationship(self, *,
|
|
3224
|
+
def _create_weighted_indegree_relationship(self, *, node_subset: Optional[Relationship]):
|
|
2796
3225
|
_weighted_indegree_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has weighted indegree {{weight:Float}}")
|
|
2797
3226
|
|
|
2798
3227
|
# Choose the appropriate node set
|
|
2799
|
-
if
|
|
3228
|
+
if node_subset is None:
|
|
2800
3229
|
# No constraint - use all nodes
|
|
2801
3230
|
node_set = self.Node
|
|
2802
3231
|
else:
|
|
2803
3232
|
# Constrained to nodes in the subset
|
|
2804
|
-
node_set =
|
|
3233
|
+
node_set = node_subset
|
|
2805
3234
|
# TODO: In a future cleanup pass, replace `node_set` with a `node_constraint`
|
|
2806
3235
|
# that replaces the `node_set(self.Node)` in the where clause below,
|
|
2807
3236
|
# and generates only `self.Node` (rather than `self.Node(self.Node)`)
|
|
@@ -2924,36 +3353,36 @@ class Graph():
|
|
|
2924
3353
|
return self._weighted_outdegree
|
|
2925
3354
|
else:
|
|
2926
3355
|
# Validate the 'of' parameter
|
|
2927
|
-
self._validate_node_subset_parameter(of)
|
|
3356
|
+
self._validate_node_subset_parameter('of', of)
|
|
2928
3357
|
return self._weighted_outdegree_of(of)
|
|
2929
3358
|
|
|
2930
3359
|
@cached_property
|
|
2931
3360
|
def _weighted_outdegree(self):
|
|
2932
3361
|
"""Lazily define and cache the self._weighted_outdegree relationship."""
|
|
2933
|
-
_weighted_outdegree_rel = self._create_weighted_outdegree_relationship(
|
|
3362
|
+
_weighted_outdegree_rel = self._create_weighted_outdegree_relationship(node_subset=None)
|
|
2934
3363
|
_weighted_outdegree_rel.annotate(annotations.track("graphs", "weighted_outdegree"))
|
|
2935
3364
|
return _weighted_outdegree_rel
|
|
2936
3365
|
|
|
2937
|
-
def _weighted_outdegree_of(self,
|
|
3366
|
+
def _weighted_outdegree_of(self, node_subset: Relationship):
|
|
2938
3367
|
"""
|
|
2939
3368
|
Create a weighted outdegree relationship constrained to the subset of nodes
|
|
2940
|
-
in `
|
|
3369
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
2941
3370
|
specific to the callsite.
|
|
2942
3371
|
"""
|
|
2943
|
-
_weighted_outdegree_rel = self._create_weighted_outdegree_relationship(
|
|
3372
|
+
_weighted_outdegree_rel = self._create_weighted_outdegree_relationship(node_subset=node_subset)
|
|
2944
3373
|
_weighted_outdegree_rel.annotate(annotations.track("graphs", "weighted_outdegree_of"))
|
|
2945
3374
|
return _weighted_outdegree_rel
|
|
2946
3375
|
|
|
2947
|
-
def _create_weighted_outdegree_relationship(self, *,
|
|
3376
|
+
def _create_weighted_outdegree_relationship(self, *, node_subset: Optional[Relationship]):
|
|
2948
3377
|
_weighted_outdegree_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has weighted outdegree {{weight:Float}}")
|
|
2949
3378
|
|
|
2950
3379
|
# Choose the appropriate node set
|
|
2951
|
-
if
|
|
3380
|
+
if node_subset is None:
|
|
2952
3381
|
# No constraint - use all nodes
|
|
2953
3382
|
node_set = self.Node
|
|
2954
3383
|
else:
|
|
2955
3384
|
# Constrained to nodes in the subset
|
|
2956
|
-
node_set =
|
|
3385
|
+
node_set = node_subset
|
|
2957
3386
|
|
|
2958
3387
|
# Apply the weighted outdegree logic for both cases
|
|
2959
3388
|
dst, outweight = self.Node.ref(), Float.ref()
|
|
@@ -3103,36 +3532,36 @@ class Graph():
|
|
|
3103
3532
|
return self._degree_centrality
|
|
3104
3533
|
else:
|
|
3105
3534
|
# Validate the 'of' parameter
|
|
3106
|
-
self._validate_node_subset_parameter(of)
|
|
3535
|
+
self._validate_node_subset_parameter('of', of)
|
|
3107
3536
|
return self._degree_centrality_of(of)
|
|
3108
3537
|
|
|
3109
3538
|
@cached_property
|
|
3110
3539
|
def _degree_centrality(self):
|
|
3111
3540
|
"""Lazily define and cache the self._degree_centrality relationship."""
|
|
3112
|
-
_degree_centrality_rel = self._create_degree_centrality_relationship(
|
|
3541
|
+
_degree_centrality_rel = self._create_degree_centrality_relationship(node_subset=None)
|
|
3113
3542
|
_degree_centrality_rel.annotate(annotations.track("graphs", "degree_centrality"))
|
|
3114
3543
|
return _degree_centrality_rel
|
|
3115
3544
|
|
|
3116
|
-
def _degree_centrality_of(self,
|
|
3545
|
+
def _degree_centrality_of(self, node_subset: Relationship):
|
|
3117
3546
|
"""
|
|
3118
3547
|
Create a degree centrality relationship constrained to the subset of nodes
|
|
3119
|
-
in `
|
|
3548
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
3120
3549
|
specific to the callsite.
|
|
3121
3550
|
"""
|
|
3122
|
-
_degree_centrality_rel = self._create_degree_centrality_relationship(
|
|
3551
|
+
_degree_centrality_rel = self._create_degree_centrality_relationship(node_subset=node_subset)
|
|
3123
3552
|
_degree_centrality_rel.annotate(annotations.track("graphs", "degree_centrality_of"))
|
|
3124
3553
|
return _degree_centrality_rel
|
|
3125
3554
|
|
|
3126
|
-
def _create_degree_centrality_relationship(self, *,
|
|
3555
|
+
def _create_degree_centrality_relationship(self, *, node_subset: Optional[Relationship]):
|
|
3127
3556
|
"""Create a degree centrality relationship, optionally constrained to a subset of nodes."""
|
|
3128
3557
|
_degree_centrality_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has {{degree_centrality:Float}}")
|
|
3129
3558
|
|
|
3130
|
-
if
|
|
3559
|
+
if node_subset is None:
|
|
3131
3560
|
degree_rel = self._degree
|
|
3132
3561
|
node_constraint = [] # No constraint on nodes.
|
|
3133
3562
|
else:
|
|
3134
|
-
degree_rel = self._degree_of(
|
|
3135
|
-
node_constraint = [
|
|
3563
|
+
degree_rel = self._degree_of(node_subset)
|
|
3564
|
+
node_constraint = [node_subset(self.Node)] # Nodes constrained to given subset.
|
|
3136
3565
|
|
|
3137
3566
|
degree = Integer.ref()
|
|
3138
3567
|
|
|
@@ -3154,10 +3583,10 @@ class Graph():
|
|
|
3154
3583
|
# General case, i.e. with more than one node.
|
|
3155
3584
|
if self.weighted:
|
|
3156
3585
|
maybe_weighted_degree = Float.ref()
|
|
3157
|
-
if
|
|
3586
|
+
if node_subset is None:
|
|
3158
3587
|
maybe_weighted_degree_rel = self._weighted_degree
|
|
3159
3588
|
else:
|
|
3160
|
-
maybe_weighted_degree_rel = self._weighted_degree_of(
|
|
3589
|
+
maybe_weighted_degree_rel = self._weighted_degree_of(node_subset)
|
|
3161
3590
|
else: # not self.weighted
|
|
3162
3591
|
maybe_weighted_degree = Integer.ref()
|
|
3163
3592
|
maybe_weighted_degree_rel = degree_rel
|
|
@@ -4015,35 +4444,35 @@ class Graph():
|
|
|
4015
4444
|
|
|
4016
4445
|
"""
|
|
4017
4446
|
if of is not None:
|
|
4018
|
-
self._validate_node_subset_parameter(of)
|
|
4447
|
+
self._validate_node_subset_parameter('of', of)
|
|
4019
4448
|
return self._triangle_count_of(of)
|
|
4020
4449
|
return self._triangle_count
|
|
4021
4450
|
|
|
4022
4451
|
@cached_property
|
|
4023
4452
|
def _triangle_count(self):
|
|
4024
4453
|
"""Lazily define and cache the self._triangle_count relationship."""
|
|
4025
|
-
_triangle_count_rel = self._create_triangle_count_relationship(
|
|
4454
|
+
_triangle_count_rel = self._create_triangle_count_relationship(node_subset=None)
|
|
4026
4455
|
_triangle_count_rel.annotate(annotations.track("graphs", "triangle_count"))
|
|
4027
4456
|
return _triangle_count_rel
|
|
4028
4457
|
|
|
4029
|
-
def _triangle_count_of(self,
|
|
4458
|
+
def _triangle_count_of(self, node_subset: Relationship):
|
|
4030
4459
|
"""
|
|
4031
4460
|
Create a triangle count relationship constrained to the subset of nodes
|
|
4032
|
-
in `
|
|
4461
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
4033
4462
|
specific to the callsite.
|
|
4034
4463
|
"""
|
|
4035
|
-
_triangle_count_rel = self._create_triangle_count_relationship(
|
|
4464
|
+
_triangle_count_rel = self._create_triangle_count_relationship(node_subset=node_subset)
|
|
4036
4465
|
_triangle_count_rel.annotate(annotations.track("graphs", "triangle_count_of"))
|
|
4037
4466
|
return _triangle_count_rel
|
|
4038
4467
|
|
|
4039
|
-
def _create_triangle_count_relationship(self, *,
|
|
4468
|
+
def _create_triangle_count_relationship(self, *, node_subset: Optional[Relationship]):
|
|
4040
4469
|
"""Create a triangle count relationship, optionally constrained to a subset of nodes."""
|
|
4041
4470
|
_triangle_count_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} belongs to {{count:Integer}} triangles")
|
|
4042
4471
|
|
|
4043
|
-
if
|
|
4472
|
+
if node_subset is None:
|
|
4044
4473
|
node_constraint = self.Node # No constraint on nodes.
|
|
4045
4474
|
else:
|
|
4046
|
-
node_constraint =
|
|
4475
|
+
node_constraint = node_subset(self.Node) # Nodes constrained to given subset.
|
|
4047
4476
|
|
|
4048
4477
|
where(
|
|
4049
4478
|
node_constraint,
|
|
@@ -4293,41 +4722,41 @@ class Graph():
|
|
|
4293
4722
|
)
|
|
4294
4723
|
|
|
4295
4724
|
if of is not None:
|
|
4296
|
-
self._validate_node_subset_parameter(of)
|
|
4725
|
+
self._validate_node_subset_parameter('of', of)
|
|
4297
4726
|
return self._local_clustering_coefficient_of(of)
|
|
4298
4727
|
return self._local_clustering_coefficient
|
|
4299
4728
|
|
|
4300
4729
|
@cached_property
|
|
4301
4730
|
def _local_clustering_coefficient(self):
|
|
4302
4731
|
"""Lazily define and cache the self._local_clustering_coefficient relationship."""
|
|
4303
|
-
_local_clustering_coefficient_rel = self._create_local_clustering_coefficient_relationship(
|
|
4732
|
+
_local_clustering_coefficient_rel = self._create_local_clustering_coefficient_relationship(node_subset=None)
|
|
4304
4733
|
_local_clustering_coefficient_rel.annotate(annotations.track("graphs", "local_clustering_coefficient"))
|
|
4305
4734
|
return _local_clustering_coefficient_rel
|
|
4306
4735
|
|
|
4307
|
-
def _local_clustering_coefficient_of(self,
|
|
4736
|
+
def _local_clustering_coefficient_of(self, node_subset: Relationship):
|
|
4308
4737
|
"""
|
|
4309
4738
|
Create a local clustering coefficient relationship constrained to the subset of nodes
|
|
4310
|
-
in `
|
|
4739
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
4311
4740
|
specific to the callsite.
|
|
4312
4741
|
"""
|
|
4313
|
-
_local_clustering_coefficient_rel = self._create_local_clustering_coefficient_relationship(
|
|
4742
|
+
_local_clustering_coefficient_rel = self._create_local_clustering_coefficient_relationship(node_subset=node_subset)
|
|
4314
4743
|
_local_clustering_coefficient_rel.annotate(annotations.track("graphs", "local_clustering_coefficient_of"))
|
|
4315
4744
|
return _local_clustering_coefficient_rel
|
|
4316
4745
|
|
|
4317
|
-
def _create_local_clustering_coefficient_relationship(self, *,
|
|
4746
|
+
def _create_local_clustering_coefficient_relationship(self, *, node_subset: Optional[Relationship]):
|
|
4318
4747
|
"""Create a local clustering coefficient relationship, optionally constrained to a subset of nodes."""
|
|
4319
4748
|
_local_clustering_coefficient_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has local clustering coefficient {{coefficient:Float}}")
|
|
4320
4749
|
|
|
4321
4750
|
node = self.Node.ref()
|
|
4322
4751
|
|
|
4323
|
-
if
|
|
4752
|
+
if node_subset is None:
|
|
4324
4753
|
degree_no_self_rel = self._degree_no_self
|
|
4325
4754
|
triangle_count_rel = self._triangle_count
|
|
4326
4755
|
node_constraint = node # No constraint on nodes.
|
|
4327
4756
|
else:
|
|
4328
|
-
degree_no_self_rel = self._degree_no_self_of(
|
|
4329
|
-
triangle_count_rel = self._triangle_count_of(
|
|
4330
|
-
node_constraint =
|
|
4757
|
+
degree_no_self_rel = self._degree_no_self_of(node_subset)
|
|
4758
|
+
triangle_count_rel = self._triangle_count_of(node_subset)
|
|
4759
|
+
node_constraint = node_subset(node) # Nodes constrained to given subset.
|
|
4331
4760
|
|
|
4332
4761
|
degree_no_self = Integer.ref()
|
|
4333
4762
|
triangle_count = Integer.ref()
|
|
@@ -4350,17 +4779,17 @@ class Graph():
|
|
|
4350
4779
|
Lazily define and cache the self._degree_no_self relationship,
|
|
4351
4780
|
a non-public helper for local_clustering_coefficient.
|
|
4352
4781
|
"""
|
|
4353
|
-
return self._create_degree_no_self_relationship(
|
|
4782
|
+
return self._create_degree_no_self_relationship(node_subset=None)
|
|
4354
4783
|
|
|
4355
|
-
def _degree_no_self_of(self,
|
|
4784
|
+
def _degree_no_self_of(self, node_subset: Relationship):
|
|
4356
4785
|
"""
|
|
4357
4786
|
Create a self-loop-exclusive degree relationship constrained to
|
|
4358
|
-
the subset of nodes in `
|
|
4787
|
+
the subset of nodes in `node_subset`. Note this relationship
|
|
4359
4788
|
is not cached; it is specific to the callsite.
|
|
4360
4789
|
"""
|
|
4361
|
-
return self._create_degree_no_self_relationship(
|
|
4790
|
+
return self._create_degree_no_self_relationship(node_subset=node_subset)
|
|
4362
4791
|
|
|
4363
|
-
def _create_degree_no_self_relationship(self, *,
|
|
4792
|
+
def _create_degree_no_self_relationship(self, *, node_subset: Optional[Relationship]):
|
|
4364
4793
|
"""
|
|
4365
4794
|
Create a self-loop-exclusive degree relationship,
|
|
4366
4795
|
optionally constrained to a subset of nodes.
|
|
@@ -4369,10 +4798,10 @@ class Graph():
|
|
|
4369
4798
|
|
|
4370
4799
|
node, neighbor = self.Node.ref(), self.Node.ref()
|
|
4371
4800
|
|
|
4372
|
-
if
|
|
4801
|
+
if node_subset is None:
|
|
4373
4802
|
node_constraint = node # No constraint on nodes.
|
|
4374
4803
|
else:
|
|
4375
|
-
node_constraint =
|
|
4804
|
+
node_constraint = node_subset(node) # Nodes constrained to given subset.
|
|
4376
4805
|
|
|
4377
4806
|
where(
|
|
4378
4807
|
node_constraint,
|
|
@@ -5417,19 +5846,72 @@ class Graph():
|
|
|
5417
5846
|
|
|
5418
5847
|
|
|
5419
5848
|
@include_in_docs
|
|
5420
|
-
def cosine_similarity(
|
|
5421
|
-
|
|
5849
|
+
def cosine_similarity(
|
|
5850
|
+
self,
|
|
5851
|
+
*,
|
|
5852
|
+
full: Optional[bool] = None,
|
|
5853
|
+
from_: Optional[Relationship] = None,
|
|
5854
|
+
to: Optional[Relationship] = None,
|
|
5855
|
+
between: Optional[Relationship] = None,
|
|
5856
|
+
):
|
|
5857
|
+
"""Returns a ternary relationship containing
|
|
5858
|
+
the cosine similarity for pairs of nodes.
|
|
5422
5859
|
|
|
5423
5860
|
The cosine similarity measures the similarity between two nodes based
|
|
5424
5861
|
on the angle between their neighborhood vectors. The score ranges from
|
|
5425
5862
|
0.0 to 1.0, inclusive, where 1.0 indicates identical sets of neighbors.
|
|
5426
5863
|
|
|
5864
|
+
Parameters
|
|
5865
|
+
----------
|
|
5866
|
+
full : bool, optional
|
|
5867
|
+
If ``True``, computes the cosine similarity for all pairs
|
|
5868
|
+
of nodes in the graph. This computation can be expensive for large graphs,
|
|
5869
|
+
as the result can scale quadratically in the number of nodes. Mutually exclusive
|
|
5870
|
+
with other parameters.
|
|
5871
|
+
Default is ``None``.
|
|
5872
|
+
from_ : Relationship, optional
|
|
5873
|
+
A unary relationship containing a subset of the graph's nodes. When
|
|
5874
|
+
provided, constrains the domain of the cosine similarity computation: only
|
|
5875
|
+
cosine similarity scores for node pairs where the first node is
|
|
5876
|
+
in this relationship are computed and returned. Mutually exclusive with
|
|
5877
|
+
``full`` and ``between``.
|
|
5878
|
+
Default is ``None``.
|
|
5879
|
+
to : Relationship, optional
|
|
5880
|
+
A unary relationship containing a subset of the graph's nodes. Can only
|
|
5881
|
+
be used together with the ``from_`` parameter. When provided with ``from_``,
|
|
5882
|
+
constrains the domain of the cosine similarity computation: only
|
|
5883
|
+
cosine similarity scores for node pairs where the first node is
|
|
5884
|
+
in ``from_`` and the second node is in ``to`` are computed and returned.
|
|
5885
|
+
Default is ``None``.
|
|
5886
|
+
between : Relationship, optional
|
|
5887
|
+
A binary relationship containing pairs of nodes. When provided,
|
|
5888
|
+
constrains the domain of the cosine similarity computation: only
|
|
5889
|
+
cosine similarity scores for the specific node pairs in
|
|
5890
|
+
this relationship are computed and returned. Mutually exclusive
|
|
5891
|
+
with other parameters.
|
|
5892
|
+
Default is ``None``.
|
|
5893
|
+
|
|
5427
5894
|
Returns
|
|
5428
5895
|
-------
|
|
5429
5896
|
Relationship
|
|
5430
5897
|
A ternary relationship where each tuple represents a pair of nodes
|
|
5431
5898
|
and their cosine similarity.
|
|
5432
5899
|
|
|
5900
|
+
Raises
|
|
5901
|
+
------
|
|
5902
|
+
ValueError
|
|
5903
|
+
If ``full`` is provided with any other parameter.
|
|
5904
|
+
If ``between`` is provided with any other parameter.
|
|
5905
|
+
If ``from_`` is provided with any parameter other than ``to``.
|
|
5906
|
+
If none of ``full``, ``from_``, or ``between`` is provided.
|
|
5907
|
+
If ``full`` is not ``True`` or ``None``.
|
|
5908
|
+
AssertionError
|
|
5909
|
+
If ``from_``, ``to``, or ``between`` is not a ``Relationship``.
|
|
5910
|
+
If ``from_``, ``to``, or ``between`` is not attached to the same model as the graph.
|
|
5911
|
+
If ``from_``, ``to``, or ``between`` does not contain the graph's ``Node`` concept.
|
|
5912
|
+
If ``from_`` or ``to`` is not a unary relationship.
|
|
5913
|
+
If ``between`` is not a binary relationship.
|
|
5914
|
+
|
|
5433
5915
|
Relationship Schema
|
|
5434
5916
|
-------------------
|
|
5435
5917
|
``cosine_similarity(node_u, node_v, score)``
|
|
@@ -5462,6 +5944,36 @@ class Graph():
|
|
|
5462
5944
|
vectors contain only non-negative elements. Therefore, the cosine
|
|
5463
5945
|
similarity score is always between 0.0 and 1.0, inclusive.
|
|
5464
5946
|
|
|
5947
|
+
The ``cosine_similarity(full=True)`` method computes and caches
|
|
5948
|
+
the full cosine similarity relationship for all pairs of nodes,
|
|
5949
|
+
providing efficient reuse across multiple calls. This can be expensive
|
|
5950
|
+
as the result can contain O(|V|²) tuples.
|
|
5951
|
+
|
|
5952
|
+
Calling ``cosine_similarity()`` without arguments raises a ``ValueError``,
|
|
5953
|
+
to ensure awareness and explicit acknowledgement (``full=True``) of this cost.
|
|
5954
|
+
|
|
5955
|
+
In contrast, ``cosine_similarity(from_=subset)`` constrains the computation to
|
|
5956
|
+
tuples with the first position in the passed-in ``subset``. The result is
|
|
5957
|
+
not cached; it is specific to the call site. When a significant fraction of
|
|
5958
|
+
the cosine similarity relation is needed across a program,
|
|
5959
|
+
``cosine_similarity(full=True)`` is typically more efficient. Use
|
|
5960
|
+
``cosine_similarity(from_=subset)`` only when small subsets of
|
|
5961
|
+
the cosine similarity relationship are needed
|
|
5962
|
+
collectively across the program.
|
|
5963
|
+
|
|
5964
|
+
The ``to`` parameter can be used together with ``from_`` to further
|
|
5965
|
+
constrain the computation: ``cosine_similarity(from_=subset_a, to=subset_b)``
|
|
5966
|
+
computes cosine similarity scores only for node pairs where the first node is in
|
|
5967
|
+
``subset_a`` and the second node is in ``subset_b``. (Since ``cosine_similarity``
|
|
5968
|
+
is symmetric in its first two positions, using ``to`` without ``from_`` would
|
|
5969
|
+
be functionally redundant, and is not allowed.)
|
|
5970
|
+
|
|
5971
|
+
The ``between`` parameter provides another way to constrain the computation.
|
|
5972
|
+
Unlike ``from_`` and ``to``, which allow you to independently constrain the first
|
|
5973
|
+
and second positions in ``cosine_similarity`` tuples to sets of nodes, ``between``
|
|
5974
|
+
allows you constrain the first and second positions, jointly, to specific pairs
|
|
5975
|
+
of nodes.
|
|
5976
|
+
|
|
5465
5977
|
Examples
|
|
5466
5978
|
--------
|
|
5467
5979
|
**Unweighted Graph Examples**
|
|
@@ -5483,7 +5995,7 @@ class Graph():
|
|
|
5483
5995
|
... Edge.new(src=n4, dst=n3),
|
|
5484
5996
|
... )
|
|
5485
5997
|
>>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
|
|
5486
|
-
>>> cosine_similarity = graph.cosine_similarity()
|
|
5998
|
+
>>> cosine_similarity = graph.cosine_similarity(full=True)
|
|
5487
5999
|
>>> select(score).where(cosine_similarity(u, v, score), u.id == 2, v.id == 4).inspect()
|
|
5488
6000
|
▰▰▰▰ Setup complete
|
|
5489
6001
|
score
|
|
@@ -5506,7 +6018,7 @@ class Graph():
|
|
|
5506
6018
|
... Edge.new(src=n4, dst=n3),
|
|
5507
6019
|
... )
|
|
5508
6020
|
>>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
|
|
5509
|
-
>>> cosine_similarity = graph.cosine_similarity()
|
|
6021
|
+
>>> cosine_similarity = graph.cosine_similarity(full=True)
|
|
5510
6022
|
>>> select(score).where(cosine_similarity(u, v, score), u.id == 2, v.id == 4).inspect()
|
|
5511
6023
|
▰▰▰▰ Setup complete
|
|
5512
6024
|
score
|
|
@@ -5531,7 +6043,7 @@ class Graph():
|
|
|
5531
6043
|
... Edge.new(src=n14, dst=n13, weight=1.0),
|
|
5532
6044
|
... )
|
|
5533
6045
|
>>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
|
|
5534
|
-
>>> cosine_similarity = graph.cosine_similarity()
|
|
6046
|
+
>>> cosine_similarity = graph.cosine_similarity(full=True)
|
|
5535
6047
|
>>> select(score).where(cosine_similarity(u, v, score), u.id == 1, v.id == 2).inspect()
|
|
5536
6048
|
▰▰▰▰ Setup complete
|
|
5537
6049
|
score
|
|
@@ -5553,49 +6065,246 @@ class Graph():
|
|
|
5553
6065
|
... Edge.new(src=n2, dst=n4, weight=5.0),
|
|
5554
6066
|
... )
|
|
5555
6067
|
>>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
|
|
5556
|
-
>>> cosine_similarity = graph.cosine_similarity()
|
|
6068
|
+
>>> cosine_similarity = graph.cosine_similarity(full=True)
|
|
5557
6069
|
>>> select(score).where(cosine_similarity(u, v, score), u.id == 1, v.id == 2).inspect()
|
|
5558
6070
|
▰▰▰▰ Setup complete
|
|
5559
6071
|
score
|
|
5560
6072
|
0 0.996241
|
|
5561
6073
|
|
|
6074
|
+
**Domain Constraint Examples**
|
|
6075
|
+
|
|
6076
|
+
>>> # Use 'from_' parameter to constrain the set of nodes for the first position
|
|
6077
|
+
>>> # Using the same undirected unweighted graph from above
|
|
6078
|
+
>>> from relationalai.semantics import where
|
|
6079
|
+
>>> subset = model.Relationship(f"{{node:{Node}}} is in subset")
|
|
6080
|
+
>>> node = Node.ref()
|
|
6081
|
+
>>> where(node.id == 2).define(subset(node))
|
|
6082
|
+
>>>
|
|
6083
|
+
>>> # Get cosine similarity scores only for pairs where first node is in subset
|
|
6084
|
+
>>> constrained_cosine_similarity = graph.cosine_similarity(from_=subset)
|
|
6085
|
+
>>> select(u.id, v.id, score).where(constrained_cosine_similarity(u, v, score)).inspect()
|
|
6086
|
+
▰▰▰▰ Setup complete
|
|
6087
|
+
id id2 score
|
|
6088
|
+
0 2 2 1.000000
|
|
6089
|
+
1 2 3 0.707107
|
|
6090
|
+
2 2 4 0.408248
|
|
6091
|
+
|
|
6092
|
+
>>> # Use both 'from_' and 'to' parameters to constrain both positions
|
|
6093
|
+
>>> from_subset = model.Relationship(f"{{node:{Node}}} is in from_subset")
|
|
6094
|
+
>>> to_subset = model.Relationship(f"{{node:{Node}}} is in to_subset")
|
|
6095
|
+
>>> where(node.id == 2).define(from_subset(node))
|
|
6096
|
+
>>> where(node.id == 4).define(to_subset(node))
|
|
6097
|
+
>>>
|
|
6098
|
+
>>> # Get cosine similarity scores only where first node is in from_subset and second node is in to_subset
|
|
6099
|
+
>>> constrained_cosine_similarity = graph.cosine_similarity(from_=from_subset, to=to_subset)
|
|
6100
|
+
>>> select(u.id, v.id, score).where(constrained_cosine_similarity(u, v, score)).inspect()
|
|
6101
|
+
▰▰▰▰ Setup complete
|
|
6102
|
+
id id2 score
|
|
6103
|
+
0 2 4 0.408248
|
|
6104
|
+
|
|
6105
|
+
>>> # Use 'between' parameter to constrain to specific pairs of nodes
|
|
6106
|
+
>>> pairs = model.Relationship(f"{{node_a:{Node}}} and {{node_b:{Node}}} are a pair")
|
|
6107
|
+
>>> node_a, node_b = Node.ref(), Node.ref()
|
|
6108
|
+
>>> where(node_a.id == 2, node_b.id == 4).define(pairs(node_a, node_b))
|
|
6109
|
+
>>> where(node_a.id == 3, node_b.id == 4).define(pairs(node_a, node_b))
|
|
6110
|
+
>>>
|
|
6111
|
+
>>> # Get cosine similarity scores only for the specific pairs (2, 4) and (3, 4)
|
|
6112
|
+
>>> constrained_cosine_similarity = graph.cosine_similarity(between=pairs)
|
|
6113
|
+
>>> select(u.id, v.id, score).where(constrained_cosine_similarity(u, v, score)).inspect()
|
|
6114
|
+
▰▰▰▰ Setup complete
|
|
6115
|
+
id id2 score
|
|
6116
|
+
0 2 4 0.408248
|
|
6117
|
+
1 3 4 0.707107
|
|
6118
|
+
|
|
5562
6119
|
"""
|
|
5563
|
-
|
|
5564
|
-
|
|
5565
|
-
|
|
5566
|
-
"of all pairs of nodes of the graph. To provide better control over "
|
|
5567
|
-
"the computed subset, `cosine_similarity`'s interface will soon "
|
|
5568
|
-
"need to change."
|
|
5569
|
-
),
|
|
5570
|
-
FutureWarning,
|
|
5571
|
-
stacklevel=2
|
|
6120
|
+
# Validate domain constraint parameters.
|
|
6121
|
+
self._validate_domain_constraint_parameters(
|
|
6122
|
+
'cosine_similarity', full, from_, to, between
|
|
5572
6123
|
)
|
|
5573
6124
|
|
|
6125
|
+
# At this point, exactly one of `full`, `from_`, or `between`
|
|
6126
|
+
# has been provided, and if `to` is provided, `from_` is also provided.
|
|
6127
|
+
|
|
6128
|
+
# Handle `between`.
|
|
6129
|
+
if between is not None:
|
|
6130
|
+
self._validate_pair_subset_parameter(between)
|
|
6131
|
+
return self._cosine_similarity_between(between)
|
|
6132
|
+
|
|
6133
|
+
# Handle `from_` (and potentially `to`).
|
|
6134
|
+
if from_ is not None:
|
|
6135
|
+
self._validate_node_subset_parameter('from_', from_)
|
|
6136
|
+
if to is not None:
|
|
6137
|
+
self._validate_node_subset_parameter('to', to)
|
|
6138
|
+
return self._cosine_similarity_from_to(from_, to)
|
|
6139
|
+
return self._cosine_similarity_from(from_)
|
|
6140
|
+
|
|
6141
|
+
# Handle `full`.
|
|
5574
6142
|
return self._cosine_similarity
|
|
5575
6143
|
|
|
5576
6144
|
@cached_property
|
|
5577
6145
|
def _cosine_similarity(self):
|
|
5578
|
-
"""Lazily define and cache the
|
|
5579
|
-
_cosine_similarity_rel = self.
|
|
6146
|
+
"""Lazily define and cache the full cosine_similarity relationship."""
|
|
6147
|
+
_cosine_similarity_rel = self._create_cosine_similarity_relationship()
|
|
5580
6148
|
_cosine_similarity_rel.annotate(annotations.track("graphs", "cosine_similarity"))
|
|
6149
|
+
return _cosine_similarity_rel
|
|
5581
6150
|
|
|
6151
|
+
def _cosine_similarity_from(self, node_subset_from: Relationship):
|
|
6152
|
+
"""
|
|
6153
|
+
Create a cosine_similarity relationship, with the first position in each
|
|
6154
|
+
tuple constrained to be in the given subset of nodes. Note this relationship
|
|
6155
|
+
is not cached; it is specific to the callsite.
|
|
6156
|
+
"""
|
|
6157
|
+
_cosine_similarity_rel = self._create_cosine_similarity_relationship(
|
|
6158
|
+
node_subset_from=node_subset_from
|
|
6159
|
+
)
|
|
6160
|
+
_cosine_similarity_rel.annotate(annotations.track("graphs", "cosine_similarity_from"))
|
|
6161
|
+
return _cosine_similarity_rel
|
|
6162
|
+
|
|
6163
|
+
def _cosine_similarity_from_to(self, node_subset_from: Relationship, node_subset_to: Relationship):
|
|
6164
|
+
"""
|
|
6165
|
+
Create a cosine_similarity relationship, with the first position in each
|
|
6166
|
+
tuple constrained to be in `node_subset_from`, and the second position in
|
|
6167
|
+
each tuple constrained to be in `node_subset_to`. Note this relationship
|
|
6168
|
+
is not cached; it is specific to the callsite.
|
|
6169
|
+
"""
|
|
6170
|
+
_cosine_similarity_rel = self._create_cosine_similarity_relationship(
|
|
6171
|
+
node_subset_from=node_subset_from,
|
|
6172
|
+
node_subset_to=node_subset_to
|
|
6173
|
+
)
|
|
6174
|
+
_cosine_similarity_rel.annotate(annotations.track("graphs", "cosine_similarity_from_to"))
|
|
6175
|
+
return _cosine_similarity_rel
|
|
6176
|
+
|
|
6177
|
+
def _cosine_similarity_between(self, pair_subset_between: Relationship):
|
|
6178
|
+
"""
|
|
6179
|
+
Create a cosine_similarity relationship, with the first and second position
|
|
6180
|
+
in each tuple jointly constrained to be in the given set of pairs
|
|
6181
|
+
of nodes. Note this relationship is not cached;
|
|
6182
|
+
it is specific to the callsite.
|
|
6183
|
+
"""
|
|
6184
|
+
_cosine_similarity_rel = self._create_cosine_similarity_relationship(
|
|
6185
|
+
pair_subset_between=pair_subset_between
|
|
6186
|
+
)
|
|
6187
|
+
_cosine_similarity_rel.annotate(annotations.track("graphs", "cosine_similarity_between"))
|
|
6188
|
+
return _cosine_similarity_rel
|
|
6189
|
+
|
|
6190
|
+
def _create_cosine_similarity_relationship(
|
|
6191
|
+
self,
|
|
6192
|
+
*,
|
|
6193
|
+
node_subset_from: Optional[Relationship] = None,
|
|
6194
|
+
node_subset_to: Optional[Relationship] = None,
|
|
6195
|
+
pair_subset_between: Optional[Relationship] = None,
|
|
6196
|
+
):
|
|
6197
|
+
"""
|
|
6198
|
+
Create cosine_similarity relationship, optionally constrained by
|
|
6199
|
+
the provided node subsets or pair subset.
|
|
6200
|
+
"""
|
|
6201
|
+
_cosine_similarity_rel = self._model.Relationship(
|
|
6202
|
+
f"{{node_u:{self._NodeConceptStr}}} has a cosine similarity to "
|
|
6203
|
+
f"{{node_v:{self._NodeConceptStr}}} of {{score:Float}}"
|
|
6204
|
+
)
|
|
6205
|
+
|
|
6206
|
+
# TODO: Optimization opportunity. In a number of branches below,
|
|
6207
|
+
# we compute _count_outneighbor_of, which transitively computes
|
|
6208
|
+
# _outneighbor_of, and then compute _outneighbor_of directly;
|
|
6209
|
+
# the present code structure makes this a developer-time-efficient
|
|
6210
|
+
# way to get this off the ground, but of course involves redundant
|
|
6211
|
+
# work. In future this redundant work could be eliminated.
|
|
6212
|
+
|
|
6213
|
+
# TODO: Optimization opportunity. In some of the cases below
|
|
6214
|
+
# (unweighted in particular), the node_constraint is redundant with
|
|
6215
|
+
# the constraints baked into the _count_outneigherbor_of and
|
|
6216
|
+
# _outneighbor_of relationships. The join with node_constraint
|
|
6217
|
+
# could be eliminated in those cases. Possibly also relevant to
|
|
6218
|
+
# other domain-constrained relations.
|
|
6219
|
+
|
|
6220
|
+
# Branch by case to select appropriate count_outneighbor and
|
|
6221
|
+
# outneighbor relationships, and build appropriate constraints
|
|
6222
|
+
# on the domain of the nodes.
|
|
6223
|
+
node_u, node_v = self.Node.ref(), self.Node.ref()
|
|
6224
|
+
|
|
6225
|
+
# Handle the `between` case.
|
|
6226
|
+
if pair_subset_between is not None:
|
|
6227
|
+
# Extract first-position and second-position nodes.
|
|
6228
|
+
first_position_subset = self._model.Relationship(f"{{node:{self._NodeConceptStr}}}")
|
|
6229
|
+
second_position_subset = self._model.Relationship(f"{{node:{self._NodeConceptStr}}}")
|
|
6230
|
+
node_x, node_y = self.Node.ref(), self.Node.ref()
|
|
6231
|
+
where(
|
|
6232
|
+
pair_subset_between(node_x, node_y)
|
|
6233
|
+
).define(
|
|
6234
|
+
first_position_subset(node_x),
|
|
6235
|
+
second_position_subset(node_y)
|
|
6236
|
+
)
|
|
6237
|
+
|
|
6238
|
+
count_outneighbor_u_rel = self._count_outneighbor_of(first_position_subset)
|
|
6239
|
+
count_outneighbor_v_rel = self._count_outneighbor_of(second_position_subset)
|
|
6240
|
+
outneighbor_u_rel = self._outneighbor_of(first_position_subset)
|
|
6241
|
+
outneighbor_v_rel = self._outneighbor_of(second_position_subset)
|
|
6242
|
+
|
|
6243
|
+
node_constraints = [pair_subset_between(node_u, node_v)]
|
|
6244
|
+
|
|
6245
|
+
# Handle the `from_` case.
|
|
6246
|
+
elif node_subset_from is not None and node_subset_to is None:
|
|
6247
|
+
count_outneighbor_u_rel = self._count_outneighbor_of(node_subset_from)
|
|
6248
|
+
count_outneighbor_v_rel = self._count_outneighbor
|
|
6249
|
+
outneighbor_u_rel = self._outneighbor_of(node_subset_from)
|
|
6250
|
+
outneighbor_v_rel = self._outneighbor
|
|
6251
|
+
# TODO: This case could be optimized via an analog of
|
|
6252
|
+
# the depth-2 traversal strategy suggested for the equivalent
|
|
6253
|
+
# case of common_neighbor, but for another day.
|
|
6254
|
+
|
|
6255
|
+
node_constraints = [node_subset_from(node_u)]
|
|
6256
|
+
|
|
6257
|
+
# Handle the `from_`/`to` case.
|
|
6258
|
+
elif node_subset_from is not None and node_subset_to is not None:
|
|
6259
|
+
# Check for object identity optimization.
|
|
6260
|
+
if node_subset_from is node_subset_to:
|
|
6261
|
+
count_outneighbor_u_rel = self._count_outneighbor_of(node_subset_from)
|
|
6262
|
+
count_outneighbor_v_rel = count_outneighbor_u_rel
|
|
6263
|
+
outneighbor_u_rel = self._outneighbor_of(node_subset_from)
|
|
6264
|
+
outneighbor_v_rel = outneighbor_u_rel
|
|
6265
|
+
else:
|
|
6266
|
+
count_outneighbor_u_rel = self._count_outneighbor_of(node_subset_from)
|
|
6267
|
+
count_outneighbor_v_rel = self._count_outneighbor_of(node_subset_to)
|
|
6268
|
+
outneighbor_u_rel = self._outneighbor_of(node_subset_from)
|
|
6269
|
+
outneighbor_v_rel = self._outneighbor_of(node_subset_to)
|
|
6270
|
+
|
|
6271
|
+
node_constraints = [node_subset_from(node_u), node_subset_to(node_v)]
|
|
6272
|
+
|
|
6273
|
+
# Handle the `full` case.
|
|
6274
|
+
else:
|
|
6275
|
+
count_outneighbor_u_rel = self._count_outneighbor
|
|
6276
|
+
count_outneighbor_v_rel = self._count_outneighbor
|
|
6277
|
+
outneighbor_u_rel = self._outneighbor
|
|
6278
|
+
outneighbor_v_rel = self._outneighbor
|
|
6279
|
+
|
|
6280
|
+
node_constraints = []
|
|
6281
|
+
|
|
6282
|
+
# Define cosine similarity logic for both weighted and unweighted cases.
|
|
5582
6283
|
if not self.weighted:
|
|
5583
|
-
|
|
5584
|
-
count_outneighor_u, count_outneighor_v
|
|
6284
|
+
# Unweighted case: use count of common outneighbors.
|
|
6285
|
+
count_outneighor_u, count_outneighor_v = Integer.ref(), Integer.ref()
|
|
6286
|
+
common_outneighbor_node = self.Node.ref()
|
|
6287
|
+
score = Float.ref()
|
|
5585
6288
|
|
|
5586
6289
|
where(
|
|
5587
|
-
|
|
5588
|
-
|
|
5589
|
-
|
|
6290
|
+
*node_constraints,
|
|
6291
|
+
count_outneighbor_u_rel(node_u, count_outneighor_u),
|
|
6292
|
+
count_outneighbor_v_rel(node_v, count_outneighor_v),
|
|
6293
|
+
c_common := count(common_outneighbor_node).per(node_u, node_v).where(
|
|
6294
|
+
outneighbor_u_rel(node_u, common_outneighbor_node),
|
|
6295
|
+
outneighbor_v_rel(node_v, common_outneighbor_node),
|
|
6296
|
+
),
|
|
5590
6297
|
score := c_common / sqrt(count_outneighor_u * count_outneighor_v),
|
|
5591
6298
|
).define(
|
|
5592
6299
|
_cosine_similarity_rel(node_u, node_v, score)
|
|
5593
6300
|
)
|
|
5594
6301
|
else:
|
|
5595
|
-
|
|
6302
|
+
# Weighted case: use dot product and norms.
|
|
5596
6303
|
node_uk, node_vk = self.Node.ref(), self.Node.ref()
|
|
5597
6304
|
wu, wv = Float.ref(), Float.ref()
|
|
6305
|
+
|
|
5598
6306
|
where(
|
|
6307
|
+
*node_constraints,
|
|
5599
6308
|
squared_norm_wu := sum(node_uk, wu * wu).per(node_u).where(self._weight(node_u, node_uk, wu)),
|
|
5600
6309
|
squared_norm_wv := sum(node_vk, wv * wv).per(node_v).where(self._weight(node_v, node_vk, wv)),
|
|
5601
6310
|
wu_dot_wv := self._wu_dot_wv_fragment(node_u, node_v),
|
|
@@ -5608,19 +6317,69 @@ class Graph():
|
|
|
5608
6317
|
|
|
5609
6318
|
|
|
5610
6319
|
@include_in_docs
|
|
5611
|
-
def adamic_adar(
|
|
5612
|
-
|
|
6320
|
+
def adamic_adar(
|
|
6321
|
+
self,
|
|
6322
|
+
*,
|
|
6323
|
+
full: Optional[bool] = None,
|
|
6324
|
+
from_: Optional[Relationship] = None,
|
|
6325
|
+
to: Optional[Relationship] = None,
|
|
6326
|
+
between: Optional[Relationship] = None,
|
|
6327
|
+
):
|
|
6328
|
+
"""Returns a ternary relationship containing the Adamic-Adar index for pairs of nodes.
|
|
5613
6329
|
|
|
5614
6330
|
The Adamic-Adar index is a similarity measure between two nodes based
|
|
5615
6331
|
on the amount of shared neighbors between them, giving more weight to
|
|
5616
6332
|
common neighbors that are less connected.
|
|
5617
6333
|
|
|
6334
|
+
Parameters
|
|
6335
|
+
----------
|
|
6336
|
+
full : bool, optional
|
|
6337
|
+
If ``True``, computes the Adamic-Adar index for all pairs of nodes in
|
|
6338
|
+
the graph. This computation can be expensive for large graphs, as
|
|
6339
|
+
dependencies can scale quadratically in the number of edges or cubically
|
|
6340
|
+
in the number of nodes. Mutually exclusive with other parameters.
|
|
6341
|
+
Default is ``None``.
|
|
6342
|
+
from_ : Relationship, optional
|
|
6343
|
+
A unary relationship containing a subset of the graph's nodes. When
|
|
6344
|
+
provided, constrains the domain of the Adamic-Adar computation: only
|
|
6345
|
+
Adamic-Adar indices for node pairs where the first node is in this relationship
|
|
6346
|
+
are computed and returned. Mutually exclusive with ``full`` and ``between``.
|
|
6347
|
+
Default is ``None``.
|
|
6348
|
+
to : Relationship, optional
|
|
6349
|
+
A unary relationship containing a subset of the graph's nodes. Can only
|
|
6350
|
+
be used together with the ``from_`` parameter. When provided with ``from_``,
|
|
6351
|
+
constrains the domain of the Adamic-Adar computation: only Adamic-Adar
|
|
6352
|
+
indices for node pairs where the first node is in ``from_`` and the
|
|
6353
|
+
second node is in ``to`` are computed and returned.
|
|
6354
|
+
Default is ``None``.
|
|
6355
|
+
between : Relationship, optional
|
|
6356
|
+
A binary relationship containing pairs of nodes. When provided,
|
|
6357
|
+
constrains the domain of the Adamic-Adar computation: only Adamic-Adar
|
|
6358
|
+
indices for the specific node pairs in this relationship are computed
|
|
6359
|
+
and returned. Mutually exclusive with other parameters.
|
|
6360
|
+
Default is ``None``.
|
|
6361
|
+
|
|
5618
6362
|
Returns
|
|
5619
6363
|
-------
|
|
5620
6364
|
Relationship
|
|
5621
6365
|
A ternary relationship where each tuple represents a pair of nodes
|
|
5622
6366
|
and their Adamic-Adar index.
|
|
5623
6367
|
|
|
6368
|
+
Raises
|
|
6369
|
+
------
|
|
6370
|
+
ValueError
|
|
6371
|
+
If ``full`` is provided with any other parameter.
|
|
6372
|
+
If ``between`` is provided with any other parameter.
|
|
6373
|
+
If ``from_`` is provided with any parameter other than ``to``.
|
|
6374
|
+
If none of ``full``, ``from_``, or ``between`` is provided.
|
|
6375
|
+
If ``full`` is not ``True`` or ``None``.
|
|
6376
|
+
AssertionError
|
|
6377
|
+
If ``from_``, ``to``, or ``between`` is not a ``Relationship``.
|
|
6378
|
+
If ``from_``, ``to``, or ``between`` is not attached to the same model as the graph.
|
|
6379
|
+
If ``from_``, ``to``, or ``between`` does not contain the graph's ``Node`` concept.
|
|
6380
|
+
If ``from_`` or ``to`` is not a unary relationship.
|
|
6381
|
+
If ``between`` is not a binary relationship.
|
|
6382
|
+
|
|
5624
6383
|
Relationship Schema
|
|
5625
6384
|
-------------------
|
|
5626
6385
|
``adamic_adar(node_u, node_v, score)``
|
|
@@ -5644,9 +6403,38 @@ class Graph():
|
|
|
5644
6403
|
|
|
5645
6404
|
AA(u,v) = Σ (1 / log(degree(w)))
|
|
5646
6405
|
|
|
6406
|
+
The ``adamic_adar(full=True)`` method computes and caches the full Adamic-Adar
|
|
6407
|
+
relationship for all pairs of nodes, providing efficient reuse across
|
|
6408
|
+
multiple calls. This can be expensive as dependencies can contain O(|E|²) or
|
|
6409
|
+
O(|V|³) tuples depending on graph density.
|
|
6410
|
+
|
|
6411
|
+
Calling ``adamic_adar()`` without arguments raises a ``ValueError``,
|
|
6412
|
+
to ensure awareness and explicit acknowledgement (``full=True``) of this cost.
|
|
6413
|
+
|
|
6414
|
+
In contrast, ``adamic_adar(from_=subset)`` constrains the computation to
|
|
6415
|
+
tuples with the first position in the passed-in ``subset``. The result is
|
|
6416
|
+
not cached; it is specific to the call site. When a significant fraction of
|
|
6417
|
+
the Adamic-Adar relation is needed across a program, ``adamic_adar(full=True)``
|
|
6418
|
+
is typically more efficient. Use ``adamic_adar(from_=subset)`` only
|
|
6419
|
+
when small subsets of the Adamic-Adar relationship are needed
|
|
6420
|
+
collectively across the program.
|
|
6421
|
+
|
|
6422
|
+
The ``to`` parameter can be used together with ``from_`` to further
|
|
6423
|
+
constrain the computation: ``adamic_adar(from_=subset_a, to=subset_b)``
|
|
6424
|
+
computes Adamic-Adar indices only for node pairs where the first node is in
|
|
6425
|
+
``subset_a`` and the second node is in ``subset_b``. (Since ``adamic_adar``
|
|
6426
|
+
is symmetric in its first two positions, using ``to`` without ``from_`` would
|
|
6427
|
+
be functionally redundant, and is not allowed.)
|
|
6428
|
+
|
|
6429
|
+
The ``between`` parameter provides another way to constrain the computation.
|
|
6430
|
+
Unlike ``from_`` and ``to``, which allow you to independently constrain the first
|
|
6431
|
+
and second positions in ``adamic_adar`` tuples to sets of nodes, ``between``
|
|
6432
|
+
allows you constrain the first and second positions, jointly, to specific pairs
|
|
6433
|
+
of nodes.
|
|
6434
|
+
|
|
5647
6435
|
Examples
|
|
5648
6436
|
--------
|
|
5649
|
-
>>> from relationalai.semantics import Model, define, select, Float
|
|
6437
|
+
>>> from relationalai.semantics import Model, define, select, where, Float
|
|
5650
6438
|
>>> from relationalai.semantics.reasoners.graph import Graph
|
|
5651
6439
|
>>>
|
|
5652
6440
|
>>> # 1. Set up an undirected graph
|
|
@@ -5665,10 +6453,10 @@ class Graph():
|
|
|
5665
6453
|
... Edge.new(src=n4, dst=n3),
|
|
5666
6454
|
... )
|
|
5667
6455
|
>>>
|
|
5668
|
-
>>> # 3. Select the Adamic-Adar
|
|
6456
|
+
>>> # 3. Select the Adamic-Adar indices from the full relationship
|
|
5669
6457
|
>>> u, v = Node.ref("u"), Node.ref("v")
|
|
5670
6458
|
>>> score = Float.ref("score")
|
|
5671
|
-
>>> adamic_adar = graph.adamic_adar()
|
|
6459
|
+
>>> adamic_adar = graph.adamic_adar(full=True)
|
|
5672
6460
|
>>> select(
|
|
5673
6461
|
... u.id, v.id, score,
|
|
5674
6462
|
... ).where(
|
|
@@ -5680,33 +6468,193 @@ class Graph():
|
|
|
5680
6468
|
id id2 score
|
|
5681
6469
|
0 2 4 0.910239
|
|
5682
6470
|
|
|
6471
|
+
>>> # 4. Use 'from_' parameter to constrain the set of nodes for the first position
|
|
6472
|
+
>>> # Define a subset containing only node 1
|
|
6473
|
+
>>> subset = model.Relationship(f"{{node:{Node}}} is in subset")
|
|
6474
|
+
>>> node = Node.ref()
|
|
6475
|
+
>>> where(node.id == 1).define(subset(node))
|
|
6476
|
+
>>>
|
|
6477
|
+
>>> # Get Adamic-Adar indices only for pairs where first node is in subset
|
|
6478
|
+
>>> constrained_adamic_adar = graph.adamic_adar(from_=subset)
|
|
6479
|
+
>>> select(u.id, v.id, score).where(constrained_adamic_adar(u, v, score)).inspect()
|
|
6480
|
+
▰▰▰▰ Setup complete
|
|
6481
|
+
id id2 score
|
|
6482
|
+
0 1 1 2.885390
|
|
6483
|
+
1 1 4 2.885390
|
|
6484
|
+
|
|
6485
|
+
>>> # 5. Use both 'from_' and 'to' parameters to constrain both positions
|
|
6486
|
+
>>> subset_a = model.Relationship(f"{{node:{Node}}} is in subset_a")
|
|
6487
|
+
>>> subset_b = model.Relationship(f"{{node:{Node}}} is in subset_b")
|
|
6488
|
+
>>> where(node.id == 1).define(subset_a(node))
|
|
6489
|
+
>>> where(node.id == 4).define(subset_b(node))
|
|
6490
|
+
>>>
|
|
6491
|
+
>>> # Get Adamic-Adar indices only where first node is in subset_a and second node is in subset_b
|
|
6492
|
+
>>> constrained_adamic_adar = graph.adamic_adar(from_=subset_a, to=subset_b)
|
|
6493
|
+
>>> select(u.id, v.id, score).where(constrained_adamic_adar(u, v, score)).inspect()
|
|
6494
|
+
▰▰▰▰ Setup complete
|
|
6495
|
+
id id2 score
|
|
6496
|
+
0 1 4 2.885390
|
|
6497
|
+
|
|
6498
|
+
>>> # 6. Use 'between' parameter to constrain to specific pairs of nodes
|
|
6499
|
+
>>> pairs = model.Relationship(f"{{node_a:{Node}}} and {{node_b:{Node}}} are a pair")
|
|
6500
|
+
>>> node_a, node_b = Node.ref(), Node.ref()
|
|
6501
|
+
>>> where(node_a.id == 1, node_b.id == 4).define(pairs(node_a, node_b))
|
|
6502
|
+
>>> where(node_a.id == 2, node_b.id == 3).define(pairs(node_a, node_b))
|
|
6503
|
+
>>>
|
|
6504
|
+
>>> # Get Adamic-Adar indices only for the specific pairs (1, 4) and (2, 3)
|
|
6505
|
+
>>> constrained_adamic_adar = graph.adamic_adar(between=pairs)
|
|
6506
|
+
>>> select(u.id, v.id, score).where(constrained_adamic_adar(u, v, score)).inspect()
|
|
6507
|
+
▰▰▰▰ Setup complete
|
|
6508
|
+
id id2 score
|
|
6509
|
+
0 1 4 2.885390
|
|
6510
|
+
1 2 3 1.442695
|
|
6511
|
+
|
|
5683
6512
|
"""
|
|
5684
|
-
|
|
5685
|
-
|
|
5686
|
-
|
|
5687
|
-
"of all pairs of nodes of the graph. To provide better control over "
|
|
5688
|
-
"the computed subset, `adamic_adar`'s interface will soon "
|
|
5689
|
-
"need to change."
|
|
5690
|
-
),
|
|
5691
|
-
FutureWarning,
|
|
5692
|
-
stacklevel=2
|
|
6513
|
+
# Validate domain constraint parameters.
|
|
6514
|
+
self._validate_domain_constraint_parameters(
|
|
6515
|
+
'adamic_adar', full, from_, to, between
|
|
5693
6516
|
)
|
|
5694
6517
|
|
|
6518
|
+
# At this point, exactly one of `full`, `from_`, or `between`
|
|
6519
|
+
# has been provided, and if `to` is provided, `from_` is also provided.
|
|
6520
|
+
|
|
6521
|
+
# Handle `between`.
|
|
6522
|
+
if between is not None:
|
|
6523
|
+
self._validate_pair_subset_parameter(between)
|
|
6524
|
+
return self._adamic_adar_between(between)
|
|
6525
|
+
|
|
6526
|
+
# Handle `from_` (and potentially `to`).
|
|
6527
|
+
if from_ is not None:
|
|
6528
|
+
self._validate_node_subset_parameter('from_', from_)
|
|
6529
|
+
if to is not None:
|
|
6530
|
+
self._validate_node_subset_parameter('to', to)
|
|
6531
|
+
return self._adamic_adar_from_to(from_, to)
|
|
6532
|
+
return self._adamic_adar_from(from_)
|
|
6533
|
+
|
|
6534
|
+
# Handle `full`.
|
|
5695
6535
|
return self._adamic_adar
|
|
5696
6536
|
|
|
5697
6537
|
@cached_property
|
|
5698
6538
|
def _adamic_adar(self):
|
|
5699
|
-
"""Lazily define and cache the
|
|
5700
|
-
_adamic_adar_rel = self.
|
|
6539
|
+
"""Lazily define and cache the full adamic_adar relationship."""
|
|
6540
|
+
_adamic_adar_rel = self._create_adamic_adar_relationship()
|
|
5701
6541
|
_adamic_adar_rel.annotate(annotations.track("graphs", "adamic_adar"))
|
|
6542
|
+
return _adamic_adar_rel
|
|
6543
|
+
|
|
6544
|
+
def _adamic_adar_from(self, node_subset_from: Relationship):
|
|
6545
|
+
"""
|
|
6546
|
+
Create an adamic_adar relationship, with the first position in each
|
|
6547
|
+
tuple constrained to be in the given subset of nodes. Note this relationship
|
|
6548
|
+
is not cached; it is specific to the callsite.
|
|
6549
|
+
"""
|
|
6550
|
+
_adamic_adar_rel = self._create_adamic_adar_relationship(
|
|
6551
|
+
node_subset_from=node_subset_from
|
|
6552
|
+
)
|
|
6553
|
+
_adamic_adar_rel.annotate(annotations.track("graphs", "adamic_adar_from"))
|
|
6554
|
+
return _adamic_adar_rel
|
|
6555
|
+
|
|
6556
|
+
def _adamic_adar_from_to(self, node_subset_from: Relationship, node_subset_to: Relationship):
|
|
6557
|
+
"""
|
|
6558
|
+
Create an adamic_adar relationship, with the first position in each
|
|
6559
|
+
tuple constrained to be in `node_subset_from`, and the second position in
|
|
6560
|
+
each tuple constrained to be in `node_subset_to`. Note this relationship
|
|
6561
|
+
is not cached; it is specific to the callsite.
|
|
6562
|
+
"""
|
|
6563
|
+
_adamic_adar_rel = self._create_adamic_adar_relationship(
|
|
6564
|
+
node_subset_from=node_subset_from,
|
|
6565
|
+
node_subset_to=node_subset_to
|
|
6566
|
+
)
|
|
6567
|
+
_adamic_adar_rel.annotate(annotations.track("graphs", "adamic_adar_from_to"))
|
|
6568
|
+
return _adamic_adar_rel
|
|
6569
|
+
|
|
6570
|
+
def _adamic_adar_between(self, pair_subset_between: Relationship):
|
|
6571
|
+
"""
|
|
6572
|
+
Create an adamic_adar relationship, with the first and second position
|
|
6573
|
+
in each tuple jointly constrained to be in the given set of pairs
|
|
6574
|
+
of nodes. Note this relationship is not cached;
|
|
6575
|
+
it is specific to the callsite.
|
|
6576
|
+
"""
|
|
6577
|
+
_adamic_adar_rel = self._create_adamic_adar_relationship(
|
|
6578
|
+
pair_subset_between=pair_subset_between
|
|
6579
|
+
)
|
|
6580
|
+
_adamic_adar_rel.annotate(annotations.track("graphs", "adamic_adar_between"))
|
|
6581
|
+
return _adamic_adar_rel
|
|
5702
6582
|
|
|
6583
|
+
def _create_adamic_adar_relationship(
|
|
6584
|
+
self,
|
|
6585
|
+
*,
|
|
6586
|
+
node_subset_from: Optional[Relationship] = None,
|
|
6587
|
+
node_subset_to: Optional[Relationship] = None,
|
|
6588
|
+
pair_subset_between: Optional[Relationship] = None,
|
|
6589
|
+
):
|
|
6590
|
+
"""
|
|
6591
|
+
Create adamic_adar relationship, optionally constrained by the provided
|
|
6592
|
+
node subsets or pair subset.
|
|
6593
|
+
"""
|
|
6594
|
+
_adamic_adar_rel = self._model.Relationship(
|
|
6595
|
+
f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} "
|
|
6596
|
+
f"have adamic adar score {{score:Float}}"
|
|
6597
|
+
)
|
|
6598
|
+
|
|
6599
|
+
# NOTE: Handling of the common_neighbor relation (`common_neighbor_rel`)
|
|
6600
|
+
# differs in each case, whereas handling of the count_neighbor relation
|
|
6601
|
+
# (`count_neighbor_rel`) is: a) the same among the constrained cases;
|
|
6602
|
+
# and b) different in the unconstrained case. As such we handle
|
|
6603
|
+
# `common_neighbor_rel` in the branches by case below, and handle
|
|
6604
|
+
# `count_neighbor_rel` in a separate constrained/unconstrained branch later.
|
|
6605
|
+
|
|
6606
|
+
# Handle the `between` case.
|
|
6607
|
+
if pair_subset_between is not None:
|
|
6608
|
+
# Get the appropriate common_neighbor relationship.
|
|
6609
|
+
common_neighbor_rel = self._common_neighbor_between(pair_subset_between)
|
|
6610
|
+
|
|
6611
|
+
# Handle the `from_` case.
|
|
6612
|
+
elif node_subset_from is not None and node_subset_to is None:
|
|
6613
|
+
# Get the appropriate common_neighbor relationship.
|
|
6614
|
+
common_neighbor_rel = self._common_neighbor_from(node_subset_from)
|
|
6615
|
+
|
|
6616
|
+
# Handle the `from_`/`to` case.
|
|
6617
|
+
elif node_subset_from is not None and node_subset_to is not None:
|
|
6618
|
+
common_neighbor_rel = self._common_neighbor_from_to(node_subset_from, node_subset_to)
|
|
6619
|
+
# Note that _common_neighbor_from_to handles optimization
|
|
6620
|
+
# when the from_ and to sets are object-identical.
|
|
6621
|
+
|
|
6622
|
+
# Handle the `full` case.
|
|
6623
|
+
else:
|
|
6624
|
+
# Use cached full relationship.
|
|
6625
|
+
common_neighbor_rel = self._common_neighbor
|
|
6626
|
+
|
|
6627
|
+
# Handle `count_neighbor_rel` for unconstrained versus constrained cases.
|
|
6628
|
+
if pair_subset_between is None and node_subset_from is None:
|
|
6629
|
+
# Unconstrained case.
|
|
6630
|
+
count_neighbor_rel = self._count_neighbor
|
|
6631
|
+
|
|
6632
|
+
else:
|
|
6633
|
+
# Constrained cases.
|
|
6634
|
+
|
|
6635
|
+
# Extract common neighbors that appear in
|
|
6636
|
+
# the constrained common_neighbor relationship.
|
|
6637
|
+
common_neighbors_subset = self._model.Relationship(
|
|
6638
|
+
f"{{node:{self._NodeConceptStr}}} is a relevant common neighbor"
|
|
6639
|
+
)
|
|
6640
|
+
node_x, node_y, neighbor_z = self.Node.ref(), self.Node.ref(), self.Node.ref()
|
|
6641
|
+
where(
|
|
6642
|
+
common_neighbor_rel(node_x, node_y, neighbor_z)
|
|
6643
|
+
).define(
|
|
6644
|
+
common_neighbors_subset(neighbor_z)
|
|
6645
|
+
)
|
|
6646
|
+
|
|
6647
|
+
# From those common neighbors,
|
|
6648
|
+
# build a constrained count_neighbor relationship.
|
|
6649
|
+
count_neighbor_rel = self._count_neighbor_of(common_neighbors_subset)
|
|
6650
|
+
|
|
6651
|
+
# Define the Adamic-Adar aggregation using the selected relationships.
|
|
5703
6652
|
node_u, node_v, common_neighbor = self.Node.ref(), self.Node.ref(), self.Node.ref()
|
|
5704
6653
|
neighbor_count = Integer.ref()
|
|
5705
|
-
|
|
5706
6654
|
where(
|
|
5707
6655
|
_score := sum(common_neighbor, 1.0 / natural_log(neighbor_count)).per(node_u, node_v).where(
|
|
5708
|
-
|
|
5709
|
-
|
|
6656
|
+
common_neighbor_rel(node_u, node_v, common_neighbor),
|
|
6657
|
+
count_neighbor_rel(common_neighbor, neighbor_count),
|
|
5710
6658
|
)
|
|
5711
6659
|
).define(_adamic_adar_rel(node_u, node_v, _score))
|
|
5712
6660
|
|