relationalai 0.11.4__py3-none-any.whl → 0.12.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- relationalai/clients/config.py +7 -0
- relationalai/clients/direct_access_client.py +113 -0
- relationalai/clients/snowflake.py +263 -189
- relationalai/clients/types.py +4 -1
- relationalai/clients/use_index_poller.py +72 -48
- relationalai/clients/util.py +9 -0
- relationalai/dsl.py +1 -2
- relationalai/early_access/metamodel/rewrite/__init__.py +5 -3
- relationalai/early_access/rel/rewrite/__init__.py +1 -1
- relationalai/environments/snowbook.py +10 -1
- relationalai/errors.py +24 -3
- relationalai/semantics/internal/annotations.py +1 -0
- relationalai/semantics/internal/internal.py +22 -3
- relationalai/semantics/lqp/builtins.py +1 -0
- relationalai/semantics/lqp/executor.py +12 -4
- relationalai/semantics/lqp/model2lqp.py +1 -0
- relationalai/semantics/lqp/passes.py +3 -4
- relationalai/semantics/{rel → lqp}/rewrite/__init__.py +6 -0
- relationalai/semantics/metamodel/builtins.py +12 -1
- relationalai/semantics/metamodel/executor.py +2 -1
- relationalai/semantics/metamodel/rewrite/__init__.py +3 -9
- relationalai/semantics/metamodel/rewrite/flatten.py +8 -7
- relationalai/semantics/reasoners/graph/core.py +1356 -258
- relationalai/semantics/rel/builtins.py +5 -1
- relationalai/semantics/rel/compiler.py +3 -3
- relationalai/semantics/rel/executor.py +20 -11
- relationalai/semantics/sql/compiler.py +2 -3
- relationalai/semantics/sql/executor/duck_db.py +8 -4
- relationalai/semantics/sql/executor/snowflake.py +1 -1
- relationalai/tools/cli.py +17 -6
- relationalai/tools/cli_controls.py +334 -352
- relationalai/tools/constants.py +1 -0
- relationalai/tools/query_utils.py +27 -0
- relationalai/util/otel_configuration.py +1 -1
- {relationalai-0.11.4.dist-info → relationalai-0.12.1.dist-info}/METADATA +5 -4
- {relationalai-0.11.4.dist-info → relationalai-0.12.1.dist-info}/RECORD +45 -45
- relationalai/semantics/metamodel/rewrite/gc_nodes.py +0 -58
- relationalai/semantics/metamodel/rewrite/list_types.py +0 -109
- /relationalai/semantics/{rel → lqp}/rewrite/cdc.py +0 -0
- /relationalai/semantics/{rel → lqp}/rewrite/extract_common.py +0 -0
- /relationalai/semantics/{metamodel → lqp}/rewrite/extract_keys.py +0 -0
- /relationalai/semantics/{metamodel → lqp}/rewrite/fd_constraints.py +0 -0
- /relationalai/semantics/{rel → lqp}/rewrite/quantify_vars.py +0 -0
- /relationalai/semantics/{metamodel → lqp}/rewrite/splinter.py +0 -0
- {relationalai-0.11.4.dist-info → relationalai-0.12.1.dist-info}/WHEEL +0 -0
- {relationalai-0.11.4.dist-info → relationalai-0.12.1.dist-info}/entry_points.txt +0 -0
- {relationalai-0.11.4.dist-info → relationalai-0.12.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -20,6 +20,7 @@ from relationalai.semantics import (
|
|
|
20
20
|
count, sum, avg,
|
|
21
21
|
)
|
|
22
22
|
from relationalai.docutils import include_in_docs
|
|
23
|
+
from relationalai.semantics.internal import annotations
|
|
23
24
|
from relationalai.semantics.std.math import abs, isnan, isinf, maximum, natural_log, sqrt
|
|
24
25
|
|
|
25
26
|
Numeric = Union[int, float, Decimal]
|
|
@@ -1055,6 +1056,181 @@ class Graph():
|
|
|
1055
1056
|
|
|
1056
1057
|
# End Visualization --------------------------------------------------------
|
|
1057
1058
|
|
|
1059
|
+
# The following three helper methods validate
|
|
1060
|
+
# `from_`, `to`, and `between`
|
|
1061
|
+
# parameters to public methods that accept them.
|
|
1062
|
+
|
|
1063
|
+
def _validate_domain_constraint_parameters(
|
|
1064
|
+
self,
|
|
1065
|
+
method_name: str,
|
|
1066
|
+
full: Optional[bool],
|
|
1067
|
+
from_: Optional[Relationship],
|
|
1068
|
+
to: Optional[Relationship],
|
|
1069
|
+
between: Optional[Relationship],
|
|
1070
|
+
):
|
|
1071
|
+
"""
|
|
1072
|
+
Validate the domain constraint parameters for methods that accept
|
|
1073
|
+
`full`, `from_`, `to`, and `between` parameters.
|
|
1074
|
+
|
|
1075
|
+
This helper method performs common validation logic that applies
|
|
1076
|
+
across multiple graph methods (e.g., common_neighbor, adamic_adar).
|
|
1077
|
+
|
|
1078
|
+
Parameters
|
|
1079
|
+
----------
|
|
1080
|
+
method_name : str
|
|
1081
|
+
The name of the method being validated (for error messages).
|
|
1082
|
+
full : bool, optional
|
|
1083
|
+
The full parameter value.
|
|
1084
|
+
from_ : Relationship, optional
|
|
1085
|
+
The from_ parameter value.
|
|
1086
|
+
to : Relationship, optional
|
|
1087
|
+
The to parameter value.
|
|
1088
|
+
between : Relationship, optional
|
|
1089
|
+
The between parameter value.
|
|
1090
|
+
|
|
1091
|
+
Raises
|
|
1092
|
+
------
|
|
1093
|
+
ValueError
|
|
1094
|
+
If parameter combinations are invalid.
|
|
1095
|
+
"""
|
|
1096
|
+
# Confirm that `full` was not provided with any other parameter.
|
|
1097
|
+
if (
|
|
1098
|
+
full is not None
|
|
1099
|
+
and (
|
|
1100
|
+
from_ is not None or
|
|
1101
|
+
to is not None or
|
|
1102
|
+
between is not None
|
|
1103
|
+
)
|
|
1104
|
+
):
|
|
1105
|
+
raise ValueError(
|
|
1106
|
+
"The 'full' parameter is mutually exclusive with 'from_', 'to', and 'between'. "
|
|
1107
|
+
f"Use 'full=True' to compute {method_name} for all node pairs, "
|
|
1108
|
+
"or use 'from_'/'to'/'between' to constrain computation to "
|
|
1109
|
+
"specific nodes or pairs."
|
|
1110
|
+
)
|
|
1111
|
+
|
|
1112
|
+
# Confirm that `between` was not provided with any other parameter.
|
|
1113
|
+
if (between is not None
|
|
1114
|
+
and (
|
|
1115
|
+
from_ is not None or
|
|
1116
|
+
to is not None
|
|
1117
|
+
# `full` is implied by the preceding check.
|
|
1118
|
+
)
|
|
1119
|
+
):
|
|
1120
|
+
raise ValueError(
|
|
1121
|
+
"The 'between' parameter is mutually exclusive with 'from_' and 'to'. "
|
|
1122
|
+
"Use 'between' to constrain computation to specific node pairs, "
|
|
1123
|
+
"or use 'from_'/'to' to constrain by position."
|
|
1124
|
+
)
|
|
1125
|
+
|
|
1126
|
+
# Confirm that 'to' is only used with 'from_'.
|
|
1127
|
+
if to is not None and from_ is None:
|
|
1128
|
+
raise ValueError(
|
|
1129
|
+
"The 'to' parameter can only be used together with the 'from_' parameter. "
|
|
1130
|
+
f"The 'from_' parameter constrains the first position in {method_name} tuples, "
|
|
1131
|
+
f"while 'to' constrains the second position. Since {method_name} is symmetric "
|
|
1132
|
+
"in its first two positions, 'to' without 'from_' would be functionally redundant. "
|
|
1133
|
+
"Please either provide both 'from_' and 'to' parameters, or only 'from_'."
|
|
1134
|
+
)
|
|
1135
|
+
|
|
1136
|
+
# If no parameters are provided, raise an exception
|
|
1137
|
+
# to avoid unintentional, potentially expensive full computation.
|
|
1138
|
+
if (
|
|
1139
|
+
full is None and
|
|
1140
|
+
from_ is None and
|
|
1141
|
+
between is None
|
|
1142
|
+
):
|
|
1143
|
+
raise ValueError(
|
|
1144
|
+
f"Computing {method_name} for all pairs of nodes can be expensive. "
|
|
1145
|
+
f"To compute the full {method_name} relationship, "
|
|
1146
|
+
f"please call `{method_name}(full=True)`. To constrain computation to specific nodes, "
|
|
1147
|
+
f"please use `{method_name}(from_=node_subset)`, "
|
|
1148
|
+
f"`{method_name}(from_=node_subset_a, to=node_subset_b)`, "
|
|
1149
|
+
f"or `{method_name}(between=node_pairs)`."
|
|
1150
|
+
)
|
|
1151
|
+
|
|
1152
|
+
# Validate that full is True (not just not None).
|
|
1153
|
+
# This check is only reached if full is not None
|
|
1154
|
+
# and no other parameters are provided.
|
|
1155
|
+
if full is not None and full is not True:
|
|
1156
|
+
raise ValueError(
|
|
1157
|
+
f"Invalid value (`{full}`) for 'full' parameter. Use `full=True` "
|
|
1158
|
+
f"to compute the full {method_name} relationship, or use 'from_', "
|
|
1159
|
+
"'from_' and 'to', or 'between' to constrain computation."
|
|
1160
|
+
)
|
|
1161
|
+
|
|
1162
|
+
def _validate_node_subset_parameter(
|
|
1163
|
+
self,
|
|
1164
|
+
parameter_name: str,
|
|
1165
|
+
node_subset_relation: Relationship,
|
|
1166
|
+
):
|
|
1167
|
+
"""
|
|
1168
|
+
Validate that a parameter identifying a subset of nodes of interest is
|
|
1169
|
+
is a unary relationship, of nodes, attached to the same model
|
|
1170
|
+
that the graph is attached to.
|
|
1171
|
+
"""
|
|
1172
|
+
# Validate that the parameter is a relationship.
|
|
1173
|
+
assert isinstance(node_subset_relation, Relationship), (
|
|
1174
|
+
f"The '{parameter_name}' parameter must be a `Relationship`, "
|
|
1175
|
+
f"but is a `{type(node_subset_relation).__name__}`."
|
|
1176
|
+
)
|
|
1177
|
+
|
|
1178
|
+
# Validate that the relationship is attached to the same model as the graph.
|
|
1179
|
+
assert node_subset_relation._model is self._model, (
|
|
1180
|
+
f"The given '{parameter_name}' relationship must "
|
|
1181
|
+
"be attached to the same model as the graph."
|
|
1182
|
+
)
|
|
1183
|
+
|
|
1184
|
+
# Validate that it's a unary relationship (has exactly one field).
|
|
1185
|
+
assert len(node_subset_relation._fields) == 1, (
|
|
1186
|
+
f"The '{parameter_name}' parameter must be a unary relationship, "
|
|
1187
|
+
f"but it has {len(node_subset_relation._fields)} fields."
|
|
1188
|
+
)
|
|
1189
|
+
|
|
1190
|
+
# Validate that the concept type matches the graph's Node concept.
|
|
1191
|
+
assert node_subset_relation._fields[0].type_str == self.Node._name, (
|
|
1192
|
+
f"The '{parameter_name}' relationship must be over "
|
|
1193
|
+
f"the graph's Node concept ('{self.Node._name}'), "
|
|
1194
|
+
f"but is over '{node_subset_relation._fields[0].type_str}'."
|
|
1195
|
+
)
|
|
1196
|
+
|
|
1197
|
+
# No parameter name at this time, as pertains only to `between` for now.
|
|
1198
|
+
def _validate_pair_subset_parameter(self, pairs_relation):
|
|
1199
|
+
"""
|
|
1200
|
+
Validate that a parameter identifying pairs of nodes of interest is
|
|
1201
|
+
a binary relationship, of pairs of nodes, attached to the same model
|
|
1202
|
+
that the graph is attached to.
|
|
1203
|
+
"""
|
|
1204
|
+
# Validate that the parameter is a relationship.
|
|
1205
|
+
assert isinstance(pairs_relation, Relationship), (
|
|
1206
|
+
"The 'between' parameter must be a `Relationship`, "
|
|
1207
|
+
f"but is a `{type(pairs_relation).__name__}`."
|
|
1208
|
+
)
|
|
1209
|
+
|
|
1210
|
+
# Validate that the relationship is attached to the same model as the graph.
|
|
1211
|
+
assert pairs_relation._model is self._model, (
|
|
1212
|
+
"The given 'between' relationship must be "
|
|
1213
|
+
"attached to the same model as the graph."
|
|
1214
|
+
)
|
|
1215
|
+
|
|
1216
|
+
# Validate that it's a binary relationship (has exactly two fields).
|
|
1217
|
+
assert len(pairs_relation._fields) == 2, (
|
|
1218
|
+
"The 'between' parameter must be a binary relationship, "
|
|
1219
|
+
f"but it has {len(pairs_relation._fields)} fields."
|
|
1220
|
+
)
|
|
1221
|
+
|
|
1222
|
+
# Validate that both fields are typed as the graph's Node concept.
|
|
1223
|
+
assert pairs_relation._fields[0].type_str == self.Node._name, (
|
|
1224
|
+
"The 'between' relationship's first field must be "
|
|
1225
|
+
f"the graph's Node concept ('{self.Node._name}'), "
|
|
1226
|
+
f"but is '{pairs_relation._fields[0].type_str}'."
|
|
1227
|
+
)
|
|
1228
|
+
assert pairs_relation._fields[1].type_str == self.Node._name, (
|
|
1229
|
+
f"The 'between' relationship's second field must be "
|
|
1230
|
+
f"the graph's Node concept ('{self.Node._name}'), "
|
|
1231
|
+
f"but is '{pairs_relation._fields[1].type_str}'."
|
|
1232
|
+
)
|
|
1233
|
+
|
|
1058
1234
|
|
|
1059
1235
|
# The following three `_count_[in,out]neighbor` relationships are
|
|
1060
1236
|
# primarily for internal consumption. They differ from corresponding
|
|
@@ -1064,26 +1240,26 @@ class Graph():
|
|
|
1064
1240
|
@cached_property
|
|
1065
1241
|
def _count_neighbor(self):
|
|
1066
1242
|
"""Lazily define and cache the self._count_neighbor relationship."""
|
|
1067
|
-
return self._create_count_neighbor_relationship(
|
|
1243
|
+
return self._create_count_neighbor_relationship(node_subset=None)
|
|
1068
1244
|
|
|
1069
|
-
def _count_neighbor_of(self,
|
|
1245
|
+
def _count_neighbor_of(self, node_subset: Relationship):
|
|
1070
1246
|
"""
|
|
1071
1247
|
Create a _count_neighbor relationship constrained to the subset of nodes
|
|
1072
|
-
in `
|
|
1248
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
1073
1249
|
specific to the callsite.
|
|
1074
1250
|
"""
|
|
1075
|
-
return self._create_count_neighbor_relationship(
|
|
1251
|
+
return self._create_count_neighbor_relationship(node_subset=node_subset)
|
|
1076
1252
|
|
|
1077
|
-
def _create_count_neighbor_relationship(self, *,
|
|
1253
|
+
def _create_count_neighbor_relationship(self, *, node_subset: Optional[Relationship]):
|
|
1078
1254
|
_count_neighbor_rel = self._model.Relationship(f"{{src:{self._NodeConceptStr}}} has neighbor count {{count:Integer}}")
|
|
1079
1255
|
|
|
1080
1256
|
# Choose the appropriate neighbor relationship based on whether we have constraints
|
|
1081
|
-
if
|
|
1257
|
+
if node_subset is None:
|
|
1082
1258
|
# No constraint - use cached neighbor relationship
|
|
1083
1259
|
neighbor_rel = self._neighbor
|
|
1084
1260
|
else:
|
|
1085
1261
|
# Constrained to nodes in the subset - use constrained neighbor relationship
|
|
1086
|
-
neighbor_rel = self._neighbor_of(
|
|
1262
|
+
neighbor_rel = self._neighbor_of(node_subset)
|
|
1087
1263
|
|
|
1088
1264
|
# Apply the same counting logic for both cases
|
|
1089
1265
|
src, dst = self.Node.ref(), self.Node.ref()
|
|
@@ -1094,26 +1270,26 @@ class Graph():
|
|
|
1094
1270
|
@cached_property
|
|
1095
1271
|
def _count_inneighbor(self):
|
|
1096
1272
|
"""Lazily define and cache the self._count_inneighbor relationship."""
|
|
1097
|
-
return self._create_count_inneighbor_relationship(
|
|
1273
|
+
return self._create_count_inneighbor_relationship(node_subset=None)
|
|
1098
1274
|
|
|
1099
|
-
def _count_inneighbor_of(self,
|
|
1275
|
+
def _count_inneighbor_of(self, node_subset: Relationship):
|
|
1100
1276
|
"""
|
|
1101
1277
|
Create a _count_inneighbor relationship constrained to the subset of nodes
|
|
1102
|
-
in `
|
|
1278
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
1103
1279
|
specific to the callsite.
|
|
1104
1280
|
"""
|
|
1105
|
-
return self._create_count_inneighbor_relationship(
|
|
1281
|
+
return self._create_count_inneighbor_relationship(node_subset=node_subset)
|
|
1106
1282
|
|
|
1107
|
-
def _create_count_inneighbor_relationship(self, *,
|
|
1283
|
+
def _create_count_inneighbor_relationship(self, *, node_subset: Optional[Relationship]):
|
|
1108
1284
|
_count_inneighbor_rel = self._model.Relationship(f"{{dst:{self._NodeConceptStr}}} has inneighbor count {{count:Integer}}")
|
|
1109
1285
|
|
|
1110
1286
|
# Choose the appropriate inneighbor relationship based on whether we have constraints
|
|
1111
|
-
if
|
|
1287
|
+
if node_subset is None:
|
|
1112
1288
|
# No constraint - use cached inneighbor relationship
|
|
1113
1289
|
inneighbor_rel = self._inneighbor
|
|
1114
1290
|
else:
|
|
1115
1291
|
# Constrained to nodes in the subset - use constrained inneighbor relationship
|
|
1116
|
-
inneighbor_rel = self._inneighbor_of(
|
|
1292
|
+
inneighbor_rel = self._inneighbor_of(node_subset)
|
|
1117
1293
|
|
|
1118
1294
|
# Apply the same counting logic for both cases
|
|
1119
1295
|
dst, src = self.Node.ref(), self.Node.ref()
|
|
@@ -1124,26 +1300,26 @@ class Graph():
|
|
|
1124
1300
|
@cached_property
|
|
1125
1301
|
def _count_outneighbor(self):
|
|
1126
1302
|
"""Lazily define and cache the self._count_outneighbor relationship."""
|
|
1127
|
-
return self._create_count_outneighbor_relationship(
|
|
1303
|
+
return self._create_count_outneighbor_relationship(node_subset=None)
|
|
1128
1304
|
|
|
1129
|
-
def _count_outneighbor_of(self,
|
|
1305
|
+
def _count_outneighbor_of(self, node_subset: Relationship):
|
|
1130
1306
|
"""
|
|
1131
1307
|
Create a _count_outneighbor relationship constrained to the subset of nodes
|
|
1132
|
-
in `
|
|
1308
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
1133
1309
|
specific to the callsite.
|
|
1134
1310
|
"""
|
|
1135
|
-
return self._create_count_outneighbor_relationship(
|
|
1311
|
+
return self._create_count_outneighbor_relationship(node_subset=node_subset)
|
|
1136
1312
|
|
|
1137
|
-
def _create_count_outneighbor_relationship(self, *,
|
|
1313
|
+
def _create_count_outneighbor_relationship(self, *, node_subset: Optional[Relationship]):
|
|
1138
1314
|
_count_outneighbor_rel = self._model.Relationship(f"{{src:{self._NodeConceptStr}}} has outneighbor count {{count:Integer}}")
|
|
1139
1315
|
|
|
1140
1316
|
# Choose the appropriate outneighbor relationship based on whether we have constraints
|
|
1141
|
-
if
|
|
1317
|
+
if node_subset is None:
|
|
1142
1318
|
# No constraint - use cached outneighbor relationship
|
|
1143
1319
|
outneighbor_rel = self._outneighbor
|
|
1144
1320
|
else:
|
|
1145
1321
|
# Constrained to nodes in the subset - use constrained outneighbor relationship
|
|
1146
|
-
outneighbor_rel = self._outneighbor_of(
|
|
1322
|
+
outneighbor_rel = self._outneighbor_of(node_subset)
|
|
1147
1323
|
|
|
1148
1324
|
# Apply the same counting logic for both cases
|
|
1149
1325
|
src, dst = self.Node.ref(), self.Node.ref()
|
|
@@ -1250,6 +1426,8 @@ class Graph():
|
|
|
1250
1426
|
def _num_nodes(self):
|
|
1251
1427
|
"""Lazily define and cache the self._num_nodes relationship."""
|
|
1252
1428
|
_num_nodes_rel = self._model.Relationship("The graph has {num_nodes:Integer} nodes")
|
|
1429
|
+
_num_nodes_rel.annotate(annotations.track("graphs", "num_nodes"))
|
|
1430
|
+
|
|
1253
1431
|
define(_num_nodes_rel(count(self.Node) | 0))
|
|
1254
1432
|
return _num_nodes_rel
|
|
1255
1433
|
|
|
@@ -1316,6 +1494,7 @@ class Graph():
|
|
|
1316
1494
|
def _num_edges(self):
|
|
1317
1495
|
"""Lazily define and cache the self._num_edges relationship."""
|
|
1318
1496
|
_num_edges_rel = self._model.Relationship("The graph has {num_edges:Integer} edges")
|
|
1497
|
+
_num_edges_rel.annotate(annotations.track("graphs", "num_edges"))
|
|
1319
1498
|
|
|
1320
1499
|
src, dst = self.Node.ref(), self.Node.ref()
|
|
1321
1500
|
if self.directed:
|
|
@@ -1433,52 +1612,27 @@ class Graph():
|
|
|
1433
1612
|
return self._neighbor
|
|
1434
1613
|
else:
|
|
1435
1614
|
# Validate the 'of' parameter
|
|
1436
|
-
self._validate_node_subset_parameter(of)
|
|
1615
|
+
self._validate_node_subset_parameter('of', of)
|
|
1437
1616
|
return self._neighbor_of(of)
|
|
1438
1617
|
|
|
1439
|
-
def _validate_node_subset_parameter(self, of_relation):
|
|
1440
|
-
"""
|
|
1441
|
-
Validate that a parameter identifying a subset of nodes of interest is
|
|
1442
|
-
is a unary relationship containing nodes that is attached to
|
|
1443
|
-
the same model that the graph is attached to.
|
|
1444
|
-
"""
|
|
1445
|
-
# Validate that the parameter is a relationship.
|
|
1446
|
-
assert isinstance(of_relation, Relationship), (
|
|
1447
|
-
"The 'of' parameter must be a `Relationship`, "
|
|
1448
|
-
f"but is a `{type(of_relation).__name__}`."
|
|
1449
|
-
)
|
|
1450
|
-
|
|
1451
|
-
# Validate that the relationship is attached to the same model as the graph.
|
|
1452
|
-
assert of_relation._model is self._model, (
|
|
1453
|
-
"The given 'of' relationship must be attached to the same model as the graph."
|
|
1454
|
-
)
|
|
1455
|
-
|
|
1456
|
-
# Validate that it's a unary relationship (has exactly one field).
|
|
1457
|
-
assert len(of_relation._fields) == 1, (
|
|
1458
|
-
"The 'of' parameter must be a unary relationship, "
|
|
1459
|
-
f"but it has {len(of_relation._fields)} fields."
|
|
1460
|
-
)
|
|
1461
|
-
|
|
1462
|
-
# Validate that the concept type matches the graph's Node concept.
|
|
1463
|
-
assert of_relation._fields[0].type_str == self.Node._name, (
|
|
1464
|
-
f"The 'of' relationship must be over the graph's Node concept ('{self.Node._name}'), "
|
|
1465
|
-
f"but is over '{of_relation._fields[0].type_str}'."
|
|
1466
|
-
)
|
|
1467
|
-
|
|
1468
1618
|
@cached_property
|
|
1469
1619
|
def _neighbor(self):
|
|
1470
1620
|
"""Lazily define and cache the self._neighbor relationship."""
|
|
1471
|
-
|
|
1621
|
+
_neighbor_rel = self._create_neighbor_relationship(node_subset=None)
|
|
1622
|
+
_neighbor_rel.annotate(annotations.track("graphs", "neighbor"))
|
|
1623
|
+
return _neighbor_rel
|
|
1472
1624
|
|
|
1473
|
-
def _neighbor_of(self,
|
|
1625
|
+
def _neighbor_of(self, node_subset: Relationship):
|
|
1474
1626
|
"""
|
|
1475
1627
|
Create a neighbor relationship constrained to the subset of nodes
|
|
1476
|
-
in `
|
|
1628
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
1477
1629
|
specific to the callsite.
|
|
1478
1630
|
"""
|
|
1479
|
-
|
|
1631
|
+
_neighbor_rel = self._create_neighbor_relationship(node_subset=node_subset)
|
|
1632
|
+
_neighbor_rel.annotate(annotations.track("graphs", "neighbor_of"))
|
|
1633
|
+
return _neighbor_rel
|
|
1480
1634
|
|
|
1481
|
-
def _create_neighbor_relationship(self, *,
|
|
1635
|
+
def _create_neighbor_relationship(self, *, node_subset: Optional[Relationship]):
|
|
1482
1636
|
_neighbor_rel = self._model.Relationship(f"{{src:{self._NodeConceptStr}}} has neighbor {{dst:{self._NodeConceptStr}}}")
|
|
1483
1637
|
src, dst = self.Node.ref(), self.Node.ref()
|
|
1484
1638
|
|
|
@@ -1489,14 +1643,14 @@ class Graph():
|
|
|
1489
1643
|
# Capture out-neighbors.
|
|
1490
1644
|
where(
|
|
1491
1645
|
self._edge(src, dst),
|
|
1492
|
-
*([
|
|
1646
|
+
*([node_subset(src)] if node_subset else [])
|
|
1493
1647
|
).define(
|
|
1494
1648
|
_neighbor_rel(src, dst)
|
|
1495
1649
|
)
|
|
1496
1650
|
# Capture in-neighbors.
|
|
1497
1651
|
where(
|
|
1498
1652
|
self._edge(src, dst),
|
|
1499
|
-
*([
|
|
1653
|
+
*([node_subset(dst)] if node_subset else [])
|
|
1500
1654
|
).define(
|
|
1501
1655
|
_neighbor_rel(dst, src)
|
|
1502
1656
|
)
|
|
@@ -1505,7 +1659,7 @@ class Graph():
|
|
|
1505
1659
|
# so a single rule suffices to capture all neighbors.
|
|
1506
1660
|
where(
|
|
1507
1661
|
self._edge(src, dst),
|
|
1508
|
-
*([
|
|
1662
|
+
*([node_subset(src)] if node_subset else [])
|
|
1509
1663
|
).define(
|
|
1510
1664
|
_neighbor_rel(src, dst)
|
|
1511
1665
|
)
|
|
@@ -1620,23 +1774,27 @@ class Graph():
|
|
|
1620
1774
|
return self._inneighbor
|
|
1621
1775
|
else:
|
|
1622
1776
|
# Validate the 'of' parameter
|
|
1623
|
-
self._validate_node_subset_parameter(of)
|
|
1777
|
+
self._validate_node_subset_parameter('of', of)
|
|
1624
1778
|
return self._inneighbor_of(of)
|
|
1625
1779
|
|
|
1626
1780
|
@cached_property
|
|
1627
1781
|
def _inneighbor(self):
|
|
1628
1782
|
"""Lazily define and cache the self._inneighbor relationship."""
|
|
1629
|
-
|
|
1783
|
+
_inneighbor_rel = self._create_inneighbor_relationship(node_subset=None)
|
|
1784
|
+
_inneighbor_rel.annotate(annotations.track("graphs", "inneighbor"))
|
|
1785
|
+
return _inneighbor_rel
|
|
1630
1786
|
|
|
1631
|
-
def _inneighbor_of(self,
|
|
1787
|
+
def _inneighbor_of(self, node_subset: Relationship):
|
|
1632
1788
|
"""
|
|
1633
1789
|
Create an inneighbor relationship constrained to the subset of nodes
|
|
1634
|
-
in `
|
|
1790
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
1635
1791
|
specific to the callsite.
|
|
1636
1792
|
"""
|
|
1637
|
-
|
|
1793
|
+
_inneighbor_rel = self._create_inneighbor_relationship(node_subset=node_subset)
|
|
1794
|
+
_inneighbor_rel.annotate(annotations.track("graphs", "inneighbor_of"))
|
|
1795
|
+
return _inneighbor_rel
|
|
1638
1796
|
|
|
1639
|
-
def _create_inneighbor_relationship(self, *,
|
|
1797
|
+
def _create_inneighbor_relationship(self, *, node_subset: Optional[Relationship]):
|
|
1640
1798
|
_inneighbor_rel = self._model.Relationship(f"{{dst:{self._NodeConceptStr}}} has inneighbor {{src:{self._NodeConceptStr}}}")
|
|
1641
1799
|
src, dst = self.Node.ref(), self.Node.ref()
|
|
1642
1800
|
|
|
@@ -1645,7 +1803,7 @@ class Graph():
|
|
|
1645
1803
|
# have an edge to the destination nodes in our subset.
|
|
1646
1804
|
where(
|
|
1647
1805
|
self._edge(src, dst),
|
|
1648
|
-
*([
|
|
1806
|
+
*([node_subset(dst)] if node_subset else [])
|
|
1649
1807
|
).define(
|
|
1650
1808
|
_inneighbor_rel(dst, src)
|
|
1651
1809
|
)
|
|
@@ -1654,7 +1812,7 @@ class Graph():
|
|
|
1654
1812
|
# so neighbors and in-neighbors are the same.
|
|
1655
1813
|
where(
|
|
1656
1814
|
self._edge(src, dst),
|
|
1657
|
-
*([
|
|
1815
|
+
*([node_subset(dst)] if node_subset else [])
|
|
1658
1816
|
).define(
|
|
1659
1817
|
_inneighbor_rel(dst, src)
|
|
1660
1818
|
)
|
|
@@ -1771,23 +1929,27 @@ class Graph():
|
|
|
1771
1929
|
return self._outneighbor
|
|
1772
1930
|
else:
|
|
1773
1931
|
# Validate the 'of' parameter
|
|
1774
|
-
self._validate_node_subset_parameter(of)
|
|
1932
|
+
self._validate_node_subset_parameter('of', of)
|
|
1775
1933
|
return self._outneighbor_of(of)
|
|
1776
1934
|
|
|
1777
1935
|
@cached_property
|
|
1778
1936
|
def _outneighbor(self):
|
|
1779
1937
|
"""Lazily define and cache the self._outneighbor relationship."""
|
|
1780
|
-
|
|
1938
|
+
_outneighbor_rel = self._create_outneighbor_relationship(node_subset=None)
|
|
1939
|
+
_outneighbor_rel.annotate(annotations.track("graphs", "outneighbor"))
|
|
1940
|
+
return _outneighbor_rel
|
|
1781
1941
|
|
|
1782
|
-
def _outneighbor_of(self,
|
|
1942
|
+
def _outneighbor_of(self, node_subset: Relationship):
|
|
1783
1943
|
"""
|
|
1784
1944
|
Create an outneighbor relationship constrained to the subset of nodes
|
|
1785
|
-
in `
|
|
1945
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
1786
1946
|
specific to the callsite.
|
|
1787
1947
|
"""
|
|
1788
|
-
|
|
1948
|
+
_outneighbor_rel = self._create_outneighbor_relationship(node_subset=node_subset)
|
|
1949
|
+
_outneighbor_rel.annotate(annotations.track("graphs", "outneighbor_of"))
|
|
1950
|
+
return _outneighbor_rel
|
|
1789
1951
|
|
|
1790
|
-
def _create_outneighbor_relationship(self, *,
|
|
1952
|
+
def _create_outneighbor_relationship(self, *, node_subset: Optional[Relationship]):
|
|
1791
1953
|
_outneighbor_rel = self._model.Relationship(f"{{src:{self._NodeConceptStr}}} has outneighbor {{dst:{self._NodeConceptStr}}}")
|
|
1792
1954
|
src, dst = self.Node.ref(), self.Node.ref()
|
|
1793
1955
|
|
|
@@ -1796,7 +1958,7 @@ class Graph():
|
|
|
1796
1958
|
# have an edge from the source nodes in our subset.
|
|
1797
1959
|
where(
|
|
1798
1960
|
self._edge(src, dst),
|
|
1799
|
-
*([
|
|
1961
|
+
*([node_subset(src)] if node_subset else [])
|
|
1800
1962
|
).define(
|
|
1801
1963
|
_outneighbor_rel(src, dst)
|
|
1802
1964
|
)
|
|
@@ -1805,7 +1967,7 @@ class Graph():
|
|
|
1805
1967
|
# so neighbors and out-neighbors are the same.
|
|
1806
1968
|
where(
|
|
1807
1969
|
self._edge(src, dst),
|
|
1808
|
-
*([
|
|
1970
|
+
*([node_subset(src)] if node_subset else [])
|
|
1809
1971
|
).define(
|
|
1810
1972
|
_outneighbor_rel(src, dst)
|
|
1811
1973
|
)
|
|
@@ -1814,18 +1976,67 @@ class Graph():
|
|
|
1814
1976
|
|
|
1815
1977
|
|
|
1816
1978
|
@include_in_docs
|
|
1817
|
-
def common_neighbor(self
|
|
1818
|
-
|
|
1979
|
+
def common_neighbor(self,
|
|
1980
|
+
*,
|
|
1981
|
+
full: Optional[bool] = None,
|
|
1982
|
+
from_: Optional[Relationship] = None,
|
|
1983
|
+
to: Optional[Relationship] = None,
|
|
1984
|
+
between: Optional[Relationship] = None,
|
|
1985
|
+
):
|
|
1986
|
+
"""Returns a ternary relationship of common neighbor triplets.
|
|
1819
1987
|
|
|
1820
1988
|
A node `w` is a common neighbor of a pair of nodes `u` and `v` if
|
|
1821
1989
|
`w` is a neighbor of both `u` and `v`.
|
|
1822
1990
|
|
|
1991
|
+
Parameters
|
|
1992
|
+
----------
|
|
1993
|
+
full : bool, optional
|
|
1994
|
+
If ``True``, computes common neighbors for all pairs of nodes in
|
|
1995
|
+
the graph. This computation can be expensive for large graphs, as the
|
|
1996
|
+
result can scale quadratically in the number of edges or cubically in
|
|
1997
|
+
the number of nodes. Mutually exclusive with other parameters.
|
|
1998
|
+
Default is ``None``.
|
|
1999
|
+
from_ : Relationship, optional
|
|
2000
|
+
A unary relationship containing a subset of the graph's nodes. When
|
|
2001
|
+
provided, constrains the domain of the common neighbor computation: only
|
|
2002
|
+
common neighbors of node pairs where the first node is in this relationship
|
|
2003
|
+
are computed and returned. Mutually exclusive with ``full`` and ``between``.
|
|
2004
|
+
Default is ``None``.
|
|
2005
|
+
to : Relationship, optional
|
|
2006
|
+
A unary relationship containing a subset of the graph's nodes. Can only
|
|
2007
|
+
be used together with the ``from_`` parameter. When provided with ``from_``,
|
|
2008
|
+
constrains the domain of the common neighbor computation: only common
|
|
2009
|
+
neighbors of node pairs where the first node is in ``from_`` and the
|
|
2010
|
+
second node is in ``to`` are computed and returned.
|
|
2011
|
+
Default is ``None``.
|
|
2012
|
+
between : Relationship, optional
|
|
2013
|
+
A binary relationship containing pairs of nodes. When provided,
|
|
2014
|
+
constrains the domain of the common neighbor computation: only common
|
|
2015
|
+
neighbors for the specific node pairs in this relationship are computed
|
|
2016
|
+
and returned. Mutually exclusive with other parameters.
|
|
2017
|
+
Default is ``None``.
|
|
2018
|
+
|
|
1823
2019
|
Returns
|
|
1824
2020
|
-------
|
|
1825
2021
|
Relationship
|
|
1826
2022
|
A ternary relationship where each tuple represents a pair of nodes
|
|
1827
2023
|
and one of their common neighbors.
|
|
1828
2024
|
|
|
2025
|
+
Raises
|
|
2026
|
+
------
|
|
2027
|
+
ValueError
|
|
2028
|
+
If ``full`` is provided with any other parameter.
|
|
2029
|
+
If ``between`` is provided with any other parameter.
|
|
2030
|
+
If ``from_`` is provided with any parameter other than ``to``.
|
|
2031
|
+
If none of ``full``, ``from_``, or ``between`` is provided.
|
|
2032
|
+
If ``full`` is not ``True`` or ``None``.
|
|
2033
|
+
AssertionError
|
|
2034
|
+
If ``from_``, ``to``, or ``between`` is not a ``Relationship``.
|
|
2035
|
+
If ``from_``, ``to``, or ``between`` is not attached to the same model as the graph.
|
|
2036
|
+
If ``from_``, ``to``, or ``between`` does not contain the graph's ``Node`` concept.
|
|
2037
|
+
If ``from_`` or ``to`` is not a unary relationship.
|
|
2038
|
+
If ``between`` is not a binary relationship.
|
|
2039
|
+
|
|
1829
2040
|
Relationship Schema
|
|
1830
2041
|
-------------------
|
|
1831
2042
|
``common_neighbor(node_u, node_v, common_neighbor_node)``
|
|
@@ -1842,6 +2053,37 @@ class Graph():
|
|
|
1842
2053
|
| Directed | Yes | |
|
|
1843
2054
|
| Weighted | Yes | Weights are ignored. |
|
|
1844
2055
|
|
|
2056
|
+
Notes
|
|
2057
|
+
-----
|
|
2058
|
+
The ``common_neighbor(full=True)`` method computes and caches the full common
|
|
2059
|
+
neighbor relationship for all pairs of nodes, providing efficient reuse across
|
|
2060
|
+
multiple calls. This can be expensive as the result can contain O(|E|²) or
|
|
2061
|
+
O(|V|³) tuples depending on graph density.
|
|
2062
|
+
|
|
2063
|
+
Calling ``common_neighbor()`` without arguments raises a ``ValueError``,
|
|
2064
|
+
to ensure awareness and explicit acknowledgement (``full=True``) of this cost.
|
|
2065
|
+
|
|
2066
|
+
In contrast, ``common_neighbor(from_=subset)`` constrains the computation to
|
|
2067
|
+
tuples with the first position in the passed-in ``subset``. The result is
|
|
2068
|
+
not cached; it is specific to the call site. When a significant fraction of
|
|
2069
|
+
the common neighbor relation is needed across a program, ``common_neighbor(full=True)``
|
|
2070
|
+
is typically more efficient. Use ``common_neighbor(from_=subset)`` only
|
|
2071
|
+
when small subsets of the common neighbor relationship are needed
|
|
2072
|
+
collectively across the program.
|
|
2073
|
+
|
|
2074
|
+
The ``to`` parameter can be used together with ``from_`` to further
|
|
2075
|
+
constrain the computation: ``common_neighbor(from_=subset_a, to=subset_b)``
|
|
2076
|
+
computes common neighbors only for node pairs where the first node is in
|
|
2077
|
+
``subset_a`` and the second node is in ``subset_b``. (Since ``common_neighbor``
|
|
2078
|
+
is symmetric in its first two positions, using ``to`` without ``from_`` would
|
|
2079
|
+
be functionally redundant, and is not allowed.)
|
|
2080
|
+
|
|
2081
|
+
The ``between`` parameter provides another way to constrain the computation:
|
|
2082
|
+
Unlike ``from_`` and ``to``, which allow you to independently constrain the first
|
|
2083
|
+
and second positions in ``common_neighbor`` tuples to sets of nodes, ``between``
|
|
2084
|
+
allows you to constrain the first and second positions, jointly, to specific pairs
|
|
2085
|
+
of nodes.
|
|
2086
|
+
|
|
1845
2087
|
Examples
|
|
1846
2088
|
--------
|
|
1847
2089
|
>>> from relationalai.semantics import Model, define, select
|
|
@@ -1865,7 +2107,7 @@ class Graph():
|
|
|
1865
2107
|
>>>
|
|
1866
2108
|
>>> # 3. Select the IDs from the common_neighbor relationship and inspect
|
|
1867
2109
|
>>> u, v, w = Node.ref("u"), Node.ref("v"), Node.ref("w")
|
|
1868
|
-
>>> common_neighbor = graph.common_neighbor()
|
|
2110
|
+
>>> common_neighbor = graph.common_neighbor(full=True)
|
|
1869
2111
|
>>> select(
|
|
1870
2112
|
... u.id, v.id, w.id
|
|
1871
2113
|
... ).where(
|
|
@@ -1897,25 +2139,230 @@ class Graph():
|
|
|
1897
2139
|
21 4 4 2
|
|
1898
2140
|
22 4 4 3
|
|
1899
2141
|
|
|
2142
|
+
>>> # 4. Use 'from_' parameter to constrain the set of nodes to compute common neighbors for
|
|
2143
|
+
>>> # Define a subset containing only node 1
|
|
2144
|
+
>>> subset = model.Relationship(f"{{node:{Node}}} is in subset")
|
|
2145
|
+
>>> node = Node.ref()
|
|
2146
|
+
>>> where(node.id == 1).define(subset(node))
|
|
2147
|
+
>>>
|
|
2148
|
+
>>> # Get common neighbors only for pairs where first node is in subset
|
|
2149
|
+
>>> constrained_common_neighbor = graph.common_neighbor(from_=subset)
|
|
2150
|
+
>>> select(u.id, v.id, w.id).where(constrained_common_neighbor(u, v, w)).inspect()
|
|
2151
|
+
▰▰▰▰ Setup complete
|
|
2152
|
+
id id2 id3
|
|
2153
|
+
0 1 1 2
|
|
2154
|
+
1 1 3 2
|
|
2155
|
+
2 1 4 2
|
|
2156
|
+
|
|
2157
|
+
>>> # 5. Use both 'from_' and 'to' parameters to constrain the first two positions
|
|
2158
|
+
>>> subset_a = model.Relationship(f"{{node:{Node}}} is in subset_a")
|
|
2159
|
+
>>> subset_b = model.Relationship(f"{{node:{Node}}} is in subset_b")
|
|
2160
|
+
>>> where(node.id == 1).define(subset_a(node))
|
|
2161
|
+
>>> where(node.id == 3).define(subset_b(node))
|
|
2162
|
+
>>>
|
|
2163
|
+
>>> # Get common neighbors only where the first node is in subset_a and the second node is in subset_b
|
|
2164
|
+
>>> constrained_common_neighbor = graph.common_neighbor(from_=subset_a, to=subset_b)
|
|
2165
|
+
>>> select(u.id, v.id, w.id).where(constrained_common_neighbor(u, v, w)).inspect()
|
|
2166
|
+
▰▰▰▰ Setup complete
|
|
2167
|
+
id id2 id3
|
|
2168
|
+
0 1 3 2
|
|
2169
|
+
|
|
2170
|
+
>>> # 6. Use 'between' parameter to constrain to specific pairs of nodes
|
|
2171
|
+
>>> pairs = model.Relationship(f"{{node_a:{Node}}} and {{node_b:{Node}}} are a pair")
|
|
2172
|
+
>>> node_a, node_b = Node.ref(), Node.ref()
|
|
2173
|
+
>>> where(node_a.id == 1, node_b.id == 3).define(pairs(node_a, node_b))
|
|
2174
|
+
>>> where(node_a.id == 2, node_b.id == 4).define(pairs(node_a, node_b))
|
|
2175
|
+
>>>
|
|
2176
|
+
>>> # Get common neighbors only for the specific pairs (1, 3) and (2, 4)
|
|
2177
|
+
>>> constrained_common_neighbor = graph.common_neighbor(between=pairs)
|
|
2178
|
+
>>> select(u.id, v.id, w.id).where(constrained_common_neighbor(u, v, w)).inspect()
|
|
2179
|
+
▰▰▰▰ Setup complete
|
|
2180
|
+
id id2 id3
|
|
2181
|
+
0 1 3 2
|
|
2182
|
+
1 2 4 3
|
|
2183
|
+
|
|
1900
2184
|
"""
|
|
1901
|
-
|
|
1902
|
-
|
|
1903
|
-
|
|
1904
|
-
"for all pairs of nodes in the graph. To provide better control "
|
|
1905
|
-
"over the computed subset, `common_neighbor`'s interface "
|
|
1906
|
-
"will soon need to change."
|
|
1907
|
-
),
|
|
1908
|
-
FutureWarning,
|
|
1909
|
-
stacklevel=2
|
|
2185
|
+
# Validate domain constraint parameters.
|
|
2186
|
+
self._validate_domain_constraint_parameters(
|
|
2187
|
+
'common_neighbor', full, from_, to, between
|
|
1910
2188
|
)
|
|
2189
|
+
|
|
2190
|
+
# At this point, exactly one of `full`, `from_`, or `between`
|
|
2191
|
+
# has been provided, and if `to` is provided, `from_` is also provided.
|
|
2192
|
+
|
|
2193
|
+
# Handle `between`.
|
|
2194
|
+
if between is not None:
|
|
2195
|
+
self._validate_pair_subset_parameter(between)
|
|
2196
|
+
return self._common_neighbor_between(between)
|
|
2197
|
+
|
|
2198
|
+
# Handle `from_` (and potentially `to`).
|
|
2199
|
+
if from_ is not None:
|
|
2200
|
+
self._validate_node_subset_parameter('from_', from_)
|
|
2201
|
+
if to is not None:
|
|
2202
|
+
self._validate_node_subset_parameter('to', to)
|
|
2203
|
+
return self._common_neighbor_from_to(from_, to)
|
|
2204
|
+
return self._common_neighbor_from(from_)
|
|
2205
|
+
|
|
2206
|
+
# Handle `full`.
|
|
1911
2207
|
return self._common_neighbor
|
|
1912
2208
|
|
|
1913
2209
|
@cached_property
|
|
1914
2210
|
def _common_neighbor(self):
|
|
1915
|
-
"""Lazily define and cache the
|
|
1916
|
-
_common_neighbor_rel = self.
|
|
1917
|
-
|
|
1918
|
-
|
|
2211
|
+
"""Lazily define and cache the full common_neighbor relationship."""
|
|
2212
|
+
_common_neighbor_rel = self._create_common_neighbor_relationship()
|
|
2213
|
+
_common_neighbor_rel.annotate(annotations.track("graphs", "common_neighbor"))
|
|
2214
|
+
return _common_neighbor_rel
|
|
2215
|
+
|
|
2216
|
+
def _common_neighbor_from(self, node_subset_from: Relationship):
|
|
2217
|
+
"""
|
|
2218
|
+
Create a common_neighbor relationship, with the first position in each
|
|
2219
|
+
tuple constrained to be in the given subset of nodes. Note this relationship
|
|
2220
|
+
is not cached; it is specific to the callsite.
|
|
2221
|
+
"""
|
|
2222
|
+
_common_neighbor_rel = self._create_common_neighbor_relationship(
|
|
2223
|
+
node_subset_from=node_subset_from
|
|
2224
|
+
)
|
|
2225
|
+
_common_neighbor_rel.annotate(annotations.track("graphs", "common_neighbor_from"))
|
|
2226
|
+
return _common_neighbor_rel
|
|
2227
|
+
|
|
2228
|
+
def _common_neighbor_from_to(self, node_subset_from: Relationship, node_subset_to: Relationship):
|
|
2229
|
+
"""
|
|
2230
|
+
Create a common_neighbor relationship, with the first position in each
|
|
2231
|
+
tuple constrained to be in `node_subset_from`, and the second position in
|
|
2232
|
+
each tuple constrained to be in `node_subset_to`. Note this relationship
|
|
2233
|
+
is not cached; it is specific to the callsite.
|
|
2234
|
+
"""
|
|
2235
|
+
_common_neighbor_rel = self._create_common_neighbor_relationship(
|
|
2236
|
+
node_subset_from=node_subset_from,
|
|
2237
|
+
node_subset_to=node_subset_to
|
|
2238
|
+
)
|
|
2239
|
+
_common_neighbor_rel.annotate(annotations.track("graphs", "common_neighbor_from_to"))
|
|
2240
|
+
return _common_neighbor_rel
|
|
2241
|
+
|
|
2242
|
+
def _common_neighbor_between(self, pair_subset: Relationship):
|
|
2243
|
+
"""
|
|
2244
|
+
Create a common_neighbor relationship, with the first and second position
|
|
2245
|
+
in each tuple jointly constrained to be in the given set of pairs
|
|
2246
|
+
of nodes. Note this relationship is not cached;
|
|
2247
|
+
it is specific to the callsite.
|
|
2248
|
+
"""
|
|
2249
|
+
_common_neighbor_rel = self._create_common_neighbor_relationship(
|
|
2250
|
+
pair_subset_between=pair_subset
|
|
2251
|
+
)
|
|
2252
|
+
_common_neighbor_rel.annotate(annotations.track("graphs", "common_neighbor_between"))
|
|
2253
|
+
return _common_neighbor_rel
|
|
2254
|
+
|
|
2255
|
+
def _create_common_neighbor_relationship(
|
|
2256
|
+
self,
|
|
2257
|
+
*,
|
|
2258
|
+
node_subset_from: Optional[Relationship] = None,
|
|
2259
|
+
node_subset_to: Optional[Relationship] = None,
|
|
2260
|
+
pair_subset_between: Optional[Relationship] = None,
|
|
2261
|
+
):
|
|
2262
|
+
"""
|
|
2263
|
+
Create common_neighbor relationship, optionally constrained by the provided
|
|
2264
|
+
node subsets or pair subset.
|
|
2265
|
+
"""
|
|
2266
|
+
_common_neighbor_rel = self._model.Relationship(
|
|
2267
|
+
f"{{node_a:{self._NodeConceptStr}}} and {{node_b:{self._NodeConceptStr}}} "
|
|
2268
|
+
f"have common neighbor {{neighbor_node:{self._NodeConceptStr}}}"
|
|
2269
|
+
)
|
|
2270
|
+
node_a, node_b, neighbor_node = self.Node.ref(), self.Node.ref(), self.Node.ref()
|
|
2271
|
+
|
|
2272
|
+
# Handle the `between` case.
|
|
2273
|
+
if pair_subset_between is not None:
|
|
2274
|
+
# Extract all nodes that appear in any position of the pairs relationship
|
|
2275
|
+
# into a unary relation that we can use to constrain the neighbor computation.
|
|
2276
|
+
nodes_in_pairs = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} is in pairs subset")
|
|
2277
|
+
node_x, node_y = self.Node.ref(), self.Node.ref()
|
|
2278
|
+
where(
|
|
2279
|
+
pair_subset_between(node_x, node_y)
|
|
2280
|
+
).define(
|
|
2281
|
+
nodes_in_pairs(node_x),
|
|
2282
|
+
nodes_in_pairs(node_y)
|
|
2283
|
+
)
|
|
2284
|
+
|
|
2285
|
+
# Create a neighbor relation constrained to the nodes that appear in the pairs.
|
|
2286
|
+
neighbor_rel = self._neighbor_of(nodes_in_pairs)
|
|
2287
|
+
neighbor_a_rel = neighbor_rel
|
|
2288
|
+
neighbor_b_rel = neighbor_rel
|
|
2289
|
+
|
|
2290
|
+
# The constraint fragment ensures we only compute common neighbors for the
|
|
2291
|
+
# specific pairs provided, not for all combinations of nodes in those pairs.
|
|
2292
|
+
node_constraint = [pair_subset_between(node_a, node_b)]
|
|
2293
|
+
|
|
2294
|
+
# Handle the `from_` case.
|
|
2295
|
+
elif node_subset_from is not None and node_subset_to is None:
|
|
2296
|
+
# Note that in this case we must compute all of `_neighbor` anyway,
|
|
2297
|
+
# as the second position in each tuple is unconstrained. Given that,
|
|
2298
|
+
# computing `_neighbor_of` for `node_subset_from` to constrain the
|
|
2299
|
+
# first position that way would be less efficient than using
|
|
2300
|
+
# `_neighbor` and joining the relevant variable with `node_subset_from`.
|
|
2301
|
+
neighbor_a_rel = self._neighbor
|
|
2302
|
+
neighbor_b_rel = self._neighbor
|
|
2303
|
+
node_constraint = [node_subset_from(node_a)]
|
|
2304
|
+
# TODO: Nice observation from @rygao: We can instead implement this
|
|
2305
|
+
# as a depth-2 traversal starting from `node_subset_from`. Candidate code:
|
|
2306
|
+
|
|
2307
|
+
# neighbor_a_rel = self._neighbor_of(node_subset_from)
|
|
2308
|
+
#
|
|
2309
|
+
# domain_w = Relationship(f"{{node:{self._NodeConceptStr}}} is the domain of `w` in `common_neighbor(u, v, w)`")
|
|
2310
|
+
# node_x, node_y = graph.Node.ref(), graph.Node.ref()
|
|
2311
|
+
# where(neighbor_a_rel(node_x, node_y)).define(domain_w(node_y))
|
|
2312
|
+
# neighbor_b_rel = self._neighbor_of(domain_w)
|
|
2313
|
+
#
|
|
2314
|
+
# node_constraint = []
|
|
2315
|
+
#
|
|
2316
|
+
# # need to reverse the args of `neighbor_b_rel()`, due to its domain constraint
|
|
2317
|
+
# # relies on the symmetry of `neighbor`
|
|
2318
|
+
# where(
|
|
2319
|
+
# *node_constraint,
|
|
2320
|
+
# neighbor_a_rel(node_a, neighbor_node),
|
|
2321
|
+
# neighbor_b_rel(neighbor_node, node_b)
|
|
2322
|
+
# ).define(_common_neighbor_rel(node_a, node_b, neighbor_node))
|
|
2323
|
+
|
|
2324
|
+
# Handle the `from_`/`to` case.
|
|
2325
|
+
elif node_subset_from is not None and node_subset_to is not None:
|
|
2326
|
+
# There are two cases:
|
|
2327
|
+
#
|
|
2328
|
+
# NOTE: For both of the following branches, spiritually we are applying
|
|
2329
|
+
# `node_constraint = [node_subset_from(node_a), node_subset_to(node_b)]`,
|
|
2330
|
+
# but these are already enforced by the use of the constrained
|
|
2331
|
+
# `_neighbor_of` relationships, so we don't need to include them
|
|
2332
|
+
# again in `node_constraint`.
|
|
2333
|
+
if node_subset_from is node_subset_to:
|
|
2334
|
+
# If `node_subset_from` and `node_subset_to` are object-identical,
|
|
2335
|
+
# we can compute `_neighbor_of` once, use it for both positions,
|
|
2336
|
+
# and apply no further constraint.
|
|
2337
|
+
neighbor_rel = self._neighbor_of(node_subset_from)
|
|
2338
|
+
neighbor_a_rel = neighbor_rel
|
|
2339
|
+
neighbor_b_rel = neighbor_rel
|
|
2340
|
+
node_constraint = []
|
|
2341
|
+
else:
|
|
2342
|
+
# Otherwise, we have two options: 1) compute `_neighbor_of` twice,
|
|
2343
|
+
# once for each node subset; or 2) compute `_neighbor` once, over
|
|
2344
|
+
# the union of both subsets, and apply constraints to each position.
|
|
2345
|
+
# Which of these is more efficient depends on the detailed nature
|
|
2346
|
+
# of the subsets, which we don't have knowledge of here. Here
|
|
2347
|
+
# we choose the simpler/cleaner of the two options (1) as such:
|
|
2348
|
+
neighbor_a_rel = self._neighbor_of(node_subset_from)
|
|
2349
|
+
neighbor_b_rel = self._neighbor_of(node_subset_to)
|
|
2350
|
+
node_constraint = []
|
|
2351
|
+
|
|
2352
|
+
# Handle the `full` case.
|
|
2353
|
+
else:
|
|
2354
|
+
neighbor_a_rel = self._neighbor
|
|
2355
|
+
neighbor_b_rel = self._neighbor
|
|
2356
|
+
node_constraint = []
|
|
2357
|
+
|
|
2358
|
+
# Define the common neighbor relationship using the neighbor relations and
|
|
2359
|
+
# constraints determined above. This logic is shared across all constraint types.
|
|
2360
|
+
where(
|
|
2361
|
+
*node_constraint,
|
|
2362
|
+
neighbor_a_rel(node_a, neighbor_node),
|
|
2363
|
+
neighbor_b_rel(node_b, neighbor_node)
|
|
2364
|
+
).define(_common_neighbor_rel(node_a, node_b, neighbor_node))
|
|
2365
|
+
|
|
1919
2366
|
return _common_neighbor_rel
|
|
1920
2367
|
|
|
1921
2368
|
|
|
@@ -2066,33 +2513,37 @@ class Graph():
|
|
|
2066
2513
|
return self._degree
|
|
2067
2514
|
else:
|
|
2068
2515
|
# Validate the 'of' parameter
|
|
2069
|
-
self._validate_node_subset_parameter(of)
|
|
2516
|
+
self._validate_node_subset_parameter('of', of)
|
|
2070
2517
|
return self._degree_of(of)
|
|
2071
2518
|
|
|
2072
2519
|
@cached_property
|
|
2073
2520
|
def _degree(self):
|
|
2074
2521
|
"""Lazily define and cache the self._degree relationship."""
|
|
2075
|
-
|
|
2522
|
+
_degree_rel = self._create_degree_relationship(node_subset=None)
|
|
2523
|
+
_degree_rel.annotate(annotations.track("graphs", "degree"))
|
|
2524
|
+
return _degree_rel
|
|
2076
2525
|
|
|
2077
|
-
def _degree_of(self,
|
|
2526
|
+
def _degree_of(self, node_subset: Relationship):
|
|
2078
2527
|
"""
|
|
2079
2528
|
Create a degree relationship constrained to the subset of nodes
|
|
2080
|
-
in `
|
|
2529
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
2081
2530
|
specific to the callsite.
|
|
2082
2531
|
"""
|
|
2083
|
-
|
|
2532
|
+
_degree_rel = self._create_degree_relationship(node_subset=node_subset)
|
|
2533
|
+
_degree_rel.annotate(annotations.track("graphs", "degree_of"))
|
|
2534
|
+
return _degree_rel
|
|
2084
2535
|
|
|
2085
|
-
def _create_degree_relationship(self, *,
|
|
2536
|
+
def _create_degree_relationship(self, *, node_subset: Optional[Relationship]):
|
|
2086
2537
|
_degree_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has degree {{count:Integer}}")
|
|
2087
2538
|
|
|
2088
2539
|
if self.directed:
|
|
2089
2540
|
# For directed graphs, degree is the sum of indegree and outdegree.
|
|
2090
|
-
if
|
|
2541
|
+
if node_subset is None:
|
|
2091
2542
|
indegree_rel = self._indegree
|
|
2092
2543
|
outdegree_rel = self._outdegree
|
|
2093
2544
|
else:
|
|
2094
|
-
indegree_rel = self._indegree_of(
|
|
2095
|
-
outdegree_rel = self._outdegree_of(
|
|
2545
|
+
indegree_rel = self._indegree_of(node_subset)
|
|
2546
|
+
outdegree_rel = self._outdegree_of(node_subset)
|
|
2096
2547
|
|
|
2097
2548
|
incount, outcount = Integer.ref(), Integer.ref()
|
|
2098
2549
|
where(
|
|
@@ -2101,12 +2552,12 @@ class Graph():
|
|
|
2101
2552
|
).define(_degree_rel(self.Node, incount + outcount))
|
|
2102
2553
|
else:
|
|
2103
2554
|
# For undirected graphs, degree is the count of neighbors.
|
|
2104
|
-
if
|
|
2555
|
+
if node_subset is None:
|
|
2105
2556
|
node_set = self.Node
|
|
2106
2557
|
count_neighbor_rel = self._count_neighbor
|
|
2107
2558
|
else:
|
|
2108
|
-
node_set =
|
|
2109
|
-
count_neighbor_rel = self._count_neighbor_of(
|
|
2559
|
+
node_set = node_subset
|
|
2560
|
+
count_neighbor_rel = self._count_neighbor_of(node_subset)
|
|
2110
2561
|
|
|
2111
2562
|
where(
|
|
2112
2563
|
node_set(self.Node), # Necessary given the match on the following line.
|
|
@@ -2257,34 +2708,38 @@ class Graph():
|
|
|
2257
2708
|
return self._indegree
|
|
2258
2709
|
else:
|
|
2259
2710
|
# Validate the 'of' parameter
|
|
2260
|
-
self._validate_node_subset_parameter(of)
|
|
2711
|
+
self._validate_node_subset_parameter('of', of)
|
|
2261
2712
|
return self._indegree_of(of)
|
|
2262
2713
|
|
|
2263
2714
|
@cached_property
|
|
2264
2715
|
def _indegree(self):
|
|
2265
2716
|
"""Lazily define and cache the self._indegree relationship."""
|
|
2266
|
-
|
|
2717
|
+
_indegree_rel = self._create_indegree_relationship(node_subset=None)
|
|
2718
|
+
_indegree_rel.annotate(annotations.track("graphs", "indegree"))
|
|
2719
|
+
return _indegree_rel
|
|
2267
2720
|
|
|
2268
|
-
def _indegree_of(self,
|
|
2721
|
+
def _indegree_of(self, node_subset: Relationship):
|
|
2269
2722
|
"""
|
|
2270
2723
|
Create an indegree relationship constrained to the subset of nodes
|
|
2271
|
-
in `
|
|
2724
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
2272
2725
|
specific to the callsite.
|
|
2273
2726
|
"""
|
|
2274
|
-
|
|
2727
|
+
_indegree_rel = self._create_indegree_relationship(node_subset=node_subset)
|
|
2728
|
+
_indegree_rel.annotate(annotations.track("graphs", "indegree_of"))
|
|
2729
|
+
return _indegree_rel
|
|
2275
2730
|
|
|
2276
|
-
def _create_indegree_relationship(self, *,
|
|
2731
|
+
def _create_indegree_relationship(self, *, node_subset: Optional[Relationship]):
|
|
2277
2732
|
_indegree_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has indegree {{count:Integer}}")
|
|
2278
2733
|
|
|
2279
2734
|
# Choose the appropriate count_inneighbor relationship and node set
|
|
2280
|
-
if
|
|
2735
|
+
if node_subset is None:
|
|
2281
2736
|
# No constraint - use cached count_inneighbor relationship and all nodes
|
|
2282
2737
|
count_inneighbor_rel = self._count_inneighbor
|
|
2283
2738
|
node_set = self.Node
|
|
2284
2739
|
else:
|
|
2285
2740
|
# Constrained to nodes in the subset - use constrained count_inneighbor relationship
|
|
2286
|
-
count_inneighbor_rel = self._count_inneighbor_of(
|
|
2287
|
-
node_set =
|
|
2741
|
+
count_inneighbor_rel = self._count_inneighbor_of(node_subset)
|
|
2742
|
+
node_set = node_subset
|
|
2288
2743
|
|
|
2289
2744
|
# Apply the same indegree logic for both cases
|
|
2290
2745
|
where(
|
|
@@ -2437,34 +2892,38 @@ class Graph():
|
|
|
2437
2892
|
return self._outdegree
|
|
2438
2893
|
else:
|
|
2439
2894
|
# Validate the 'of' parameter
|
|
2440
|
-
self._validate_node_subset_parameter(of)
|
|
2895
|
+
self._validate_node_subset_parameter('of', of)
|
|
2441
2896
|
return self._outdegree_of(of)
|
|
2442
2897
|
|
|
2443
2898
|
@cached_property
|
|
2444
2899
|
def _outdegree(self):
|
|
2445
2900
|
"""Lazily define and cache the self._outdegree relationship."""
|
|
2446
|
-
|
|
2901
|
+
_outdegree_rel = self._create_outdegree_relationship(node_subset=None)
|
|
2902
|
+
_outdegree_rel.annotate(annotations.track("graphs", "outdegree"))
|
|
2903
|
+
return _outdegree_rel
|
|
2447
2904
|
|
|
2448
|
-
def _outdegree_of(self,
|
|
2905
|
+
def _outdegree_of(self, node_subset: Relationship):
|
|
2449
2906
|
"""
|
|
2450
2907
|
Create an outdegree relationship constrained to the subset of nodes
|
|
2451
|
-
in `
|
|
2908
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
2452
2909
|
specific to the callsite.
|
|
2453
2910
|
"""
|
|
2454
|
-
|
|
2911
|
+
_outdegree_rel = self._create_outdegree_relationship(node_subset=node_subset)
|
|
2912
|
+
_outdegree_rel.annotate(annotations.track("graphs", "outdegree_of"))
|
|
2913
|
+
return _outdegree_rel
|
|
2455
2914
|
|
|
2456
|
-
def _create_outdegree_relationship(self, *,
|
|
2915
|
+
def _create_outdegree_relationship(self, *, node_subset: Optional[Relationship]):
|
|
2457
2916
|
_outdegree_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has outdegree {{count:Integer}}")
|
|
2458
2917
|
|
|
2459
2918
|
# Choose the appropriate count_outneighbor relationship and node set
|
|
2460
|
-
if
|
|
2919
|
+
if node_subset is None:
|
|
2461
2920
|
# No constraint - use cached count_outneighbor relationship and all nodes
|
|
2462
2921
|
count_outneighbor_rel = self._count_outneighbor
|
|
2463
2922
|
node_set = self.Node
|
|
2464
2923
|
else:
|
|
2465
2924
|
# Constrained to nodes in the subset - use constrained count_outneighbor relationship
|
|
2466
|
-
count_outneighbor_rel = self._count_outneighbor_of(
|
|
2467
|
-
node_set =
|
|
2925
|
+
count_outneighbor_rel = self._count_outneighbor_of(node_subset)
|
|
2926
|
+
node_set = node_subset
|
|
2468
2927
|
|
|
2469
2928
|
# Apply the same outdegree logic for both cases
|
|
2470
2929
|
where(
|
|
@@ -2582,33 +3041,37 @@ class Graph():
|
|
|
2582
3041
|
return self._weighted_degree
|
|
2583
3042
|
else:
|
|
2584
3043
|
# Validate the 'of' parameter
|
|
2585
|
-
self._validate_node_subset_parameter(of)
|
|
3044
|
+
self._validate_node_subset_parameter('of', of)
|
|
2586
3045
|
return self._weighted_degree_of(of)
|
|
2587
3046
|
|
|
2588
3047
|
@cached_property
|
|
2589
3048
|
def _weighted_degree(self):
|
|
2590
3049
|
"""Lazily define and cache the self._weighted_degree relationship."""
|
|
2591
|
-
|
|
3050
|
+
_weighted_degree_rel = self._create_weighted_degree_relationship(node_subset=None)
|
|
3051
|
+
_weighted_degree_rel.annotate(annotations.track("graphs", "weighted_degree"))
|
|
3052
|
+
return _weighted_degree_rel
|
|
2592
3053
|
|
|
2593
|
-
def _weighted_degree_of(self,
|
|
3054
|
+
def _weighted_degree_of(self, node_subset: Relationship):
|
|
2594
3055
|
"""
|
|
2595
3056
|
Create a weighted degree relationship constrained to the subset of nodes
|
|
2596
|
-
in `
|
|
3057
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
2597
3058
|
specific to the callsite.
|
|
2598
3059
|
"""
|
|
2599
|
-
|
|
3060
|
+
_weighted_degree_rel = self._create_weighted_degree_relationship(node_subset=node_subset)
|
|
3061
|
+
_weighted_degree_rel.annotate(annotations.track("graphs", "weighted_degree_of"))
|
|
3062
|
+
return _weighted_degree_rel
|
|
2600
3063
|
|
|
2601
|
-
def _create_weighted_degree_relationship(self, *,
|
|
3064
|
+
def _create_weighted_degree_relationship(self, *, node_subset: Optional[Relationship]):
|
|
2602
3065
|
_weighted_degree_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has weighted degree {{weight:Float}}")
|
|
2603
3066
|
|
|
2604
3067
|
if self.directed:
|
|
2605
3068
|
# For directed graphs, weighted degree is the sum of weighted indegree and weighted outdegree.
|
|
2606
|
-
if
|
|
3069
|
+
if node_subset is None:
|
|
2607
3070
|
weighted_indegree_rel = self._weighted_indegree
|
|
2608
3071
|
weighted_outdegree_rel = self._weighted_outdegree
|
|
2609
3072
|
else:
|
|
2610
|
-
weighted_indegree_rel = self._weighted_indegree_of(
|
|
2611
|
-
weighted_outdegree_rel = self._weighted_outdegree_of(
|
|
3073
|
+
weighted_indegree_rel = self._weighted_indegree_of(node_subset)
|
|
3074
|
+
weighted_outdegree_rel = self._weighted_outdegree_of(node_subset)
|
|
2612
3075
|
|
|
2613
3076
|
inweight, outweight = Float.ref(), Float.ref()
|
|
2614
3077
|
where(
|
|
@@ -2617,12 +3080,12 @@ class Graph():
|
|
|
2617
3080
|
).define(_weighted_degree_rel(self.Node, inweight + outweight))
|
|
2618
3081
|
elif not self.directed:
|
|
2619
3082
|
# Choose the appropriate node set
|
|
2620
|
-
if
|
|
3083
|
+
if node_subset is None:
|
|
2621
3084
|
# No constraint - use all nodes
|
|
2622
3085
|
node_set = self.Node
|
|
2623
3086
|
else:
|
|
2624
3087
|
# Constrained to nodes in the subset
|
|
2625
|
-
node_set =
|
|
3088
|
+
node_set = node_subset
|
|
2626
3089
|
|
|
2627
3090
|
dst, weight = self.Node.ref(), Float.ref()
|
|
2628
3091
|
where(
|
|
@@ -2738,32 +3201,36 @@ class Graph():
|
|
|
2738
3201
|
return self._weighted_indegree
|
|
2739
3202
|
else:
|
|
2740
3203
|
# Validate the 'of' parameter
|
|
2741
|
-
self._validate_node_subset_parameter(of)
|
|
3204
|
+
self._validate_node_subset_parameter('of', of)
|
|
2742
3205
|
return self._weighted_indegree_of(of)
|
|
2743
3206
|
|
|
2744
3207
|
@cached_property
|
|
2745
3208
|
def _weighted_indegree(self):
|
|
2746
3209
|
"""Lazily define and cache the self._weighted_indegree relationship."""
|
|
2747
|
-
|
|
3210
|
+
_weighted_indegree_rel = self._create_weighted_indegree_relationship(node_subset=None)
|
|
3211
|
+
_weighted_indegree_rel.annotate(annotations.track("graphs", "weighted_indegree"))
|
|
3212
|
+
return _weighted_indegree_rel
|
|
2748
3213
|
|
|
2749
|
-
def _weighted_indegree_of(self,
|
|
3214
|
+
def _weighted_indegree_of(self, node_subset: Relationship):
|
|
2750
3215
|
"""
|
|
2751
3216
|
Create a weighted indegree relationship constrained to the subset of nodes
|
|
2752
|
-
in `
|
|
3217
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
2753
3218
|
specific to the callsite.
|
|
2754
3219
|
"""
|
|
2755
|
-
|
|
3220
|
+
_weighted_indegree_rel = self._create_weighted_indegree_relationship(node_subset=node_subset)
|
|
3221
|
+
_weighted_indegree_rel.annotate(annotations.track("graphs", "weighted_indegree_of"))
|
|
3222
|
+
return _weighted_indegree_rel
|
|
2756
3223
|
|
|
2757
|
-
def _create_weighted_indegree_relationship(self, *,
|
|
3224
|
+
def _create_weighted_indegree_relationship(self, *, node_subset: Optional[Relationship]):
|
|
2758
3225
|
_weighted_indegree_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has weighted indegree {{weight:Float}}")
|
|
2759
3226
|
|
|
2760
3227
|
# Choose the appropriate node set
|
|
2761
|
-
if
|
|
3228
|
+
if node_subset is None:
|
|
2762
3229
|
# No constraint - use all nodes
|
|
2763
3230
|
node_set = self.Node
|
|
2764
3231
|
else:
|
|
2765
3232
|
# Constrained to nodes in the subset
|
|
2766
|
-
node_set =
|
|
3233
|
+
node_set = node_subset
|
|
2767
3234
|
# TODO: In a future cleanup pass, replace `node_set` with a `node_constraint`
|
|
2768
3235
|
# that replaces the `node_set(self.Node)` in the where clause below,
|
|
2769
3236
|
# and generates only `self.Node` (rather than `self.Node(self.Node)`)
|
|
@@ -2886,32 +3353,36 @@ class Graph():
|
|
|
2886
3353
|
return self._weighted_outdegree
|
|
2887
3354
|
else:
|
|
2888
3355
|
# Validate the 'of' parameter
|
|
2889
|
-
self._validate_node_subset_parameter(of)
|
|
3356
|
+
self._validate_node_subset_parameter('of', of)
|
|
2890
3357
|
return self._weighted_outdegree_of(of)
|
|
2891
3358
|
|
|
2892
3359
|
@cached_property
|
|
2893
3360
|
def _weighted_outdegree(self):
|
|
2894
3361
|
"""Lazily define and cache the self._weighted_outdegree relationship."""
|
|
2895
|
-
|
|
3362
|
+
_weighted_outdegree_rel = self._create_weighted_outdegree_relationship(node_subset=None)
|
|
3363
|
+
_weighted_outdegree_rel.annotate(annotations.track("graphs", "weighted_outdegree"))
|
|
3364
|
+
return _weighted_outdegree_rel
|
|
2896
3365
|
|
|
2897
|
-
def _weighted_outdegree_of(self,
|
|
3366
|
+
def _weighted_outdegree_of(self, node_subset: Relationship):
|
|
2898
3367
|
"""
|
|
2899
3368
|
Create a weighted outdegree relationship constrained to the subset of nodes
|
|
2900
|
-
in `
|
|
3369
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
2901
3370
|
specific to the callsite.
|
|
2902
3371
|
"""
|
|
2903
|
-
|
|
3372
|
+
_weighted_outdegree_rel = self._create_weighted_outdegree_relationship(node_subset=node_subset)
|
|
3373
|
+
_weighted_outdegree_rel.annotate(annotations.track("graphs", "weighted_outdegree_of"))
|
|
3374
|
+
return _weighted_outdegree_rel
|
|
2904
3375
|
|
|
2905
|
-
def _create_weighted_outdegree_relationship(self, *,
|
|
3376
|
+
def _create_weighted_outdegree_relationship(self, *, node_subset: Optional[Relationship]):
|
|
2906
3377
|
_weighted_outdegree_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has weighted outdegree {{weight:Float}}")
|
|
2907
3378
|
|
|
2908
3379
|
# Choose the appropriate node set
|
|
2909
|
-
if
|
|
3380
|
+
if node_subset is None:
|
|
2910
3381
|
# No constraint - use all nodes
|
|
2911
3382
|
node_set = self.Node
|
|
2912
3383
|
else:
|
|
2913
3384
|
# Constrained to nodes in the subset
|
|
2914
|
-
node_set =
|
|
3385
|
+
node_set = node_subset
|
|
2915
3386
|
|
|
2916
3387
|
# Apply the weighted outdegree logic for both cases
|
|
2917
3388
|
dst, outweight = self.Node.ref(), Float.ref()
|
|
@@ -3061,32 +3532,36 @@ class Graph():
|
|
|
3061
3532
|
return self._degree_centrality
|
|
3062
3533
|
else:
|
|
3063
3534
|
# Validate the 'of' parameter
|
|
3064
|
-
self._validate_node_subset_parameter(of)
|
|
3535
|
+
self._validate_node_subset_parameter('of', of)
|
|
3065
3536
|
return self._degree_centrality_of(of)
|
|
3066
3537
|
|
|
3067
3538
|
@cached_property
|
|
3068
3539
|
def _degree_centrality(self):
|
|
3069
3540
|
"""Lazily define and cache the self._degree_centrality relationship."""
|
|
3070
|
-
|
|
3541
|
+
_degree_centrality_rel = self._create_degree_centrality_relationship(node_subset=None)
|
|
3542
|
+
_degree_centrality_rel.annotate(annotations.track("graphs", "degree_centrality"))
|
|
3543
|
+
return _degree_centrality_rel
|
|
3071
3544
|
|
|
3072
|
-
def _degree_centrality_of(self,
|
|
3545
|
+
def _degree_centrality_of(self, node_subset: Relationship):
|
|
3073
3546
|
"""
|
|
3074
3547
|
Create a degree centrality relationship constrained to the subset of nodes
|
|
3075
|
-
in `
|
|
3548
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
3076
3549
|
specific to the callsite.
|
|
3077
3550
|
"""
|
|
3078
|
-
|
|
3551
|
+
_degree_centrality_rel = self._create_degree_centrality_relationship(node_subset=node_subset)
|
|
3552
|
+
_degree_centrality_rel.annotate(annotations.track("graphs", "degree_centrality_of"))
|
|
3553
|
+
return _degree_centrality_rel
|
|
3079
3554
|
|
|
3080
|
-
def _create_degree_centrality_relationship(self, *,
|
|
3555
|
+
def _create_degree_centrality_relationship(self, *, node_subset: Optional[Relationship]):
|
|
3081
3556
|
"""Create a degree centrality relationship, optionally constrained to a subset of nodes."""
|
|
3082
3557
|
_degree_centrality_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has {{degree_centrality:Float}}")
|
|
3083
3558
|
|
|
3084
|
-
if
|
|
3559
|
+
if node_subset is None:
|
|
3085
3560
|
degree_rel = self._degree
|
|
3086
3561
|
node_constraint = [] # No constraint on nodes.
|
|
3087
3562
|
else:
|
|
3088
|
-
degree_rel = self._degree_of(
|
|
3089
|
-
node_constraint = [
|
|
3563
|
+
degree_rel = self._degree_of(node_subset)
|
|
3564
|
+
node_constraint = [node_subset(self.Node)] # Nodes constrained to given subset.
|
|
3090
3565
|
|
|
3091
3566
|
degree = Integer.ref()
|
|
3092
3567
|
|
|
@@ -3108,10 +3583,10 @@ class Graph():
|
|
|
3108
3583
|
# General case, i.e. with more than one node.
|
|
3109
3584
|
if self.weighted:
|
|
3110
3585
|
maybe_weighted_degree = Float.ref()
|
|
3111
|
-
if
|
|
3586
|
+
if node_subset is None:
|
|
3112
3587
|
maybe_weighted_degree_rel = self._weighted_degree
|
|
3113
3588
|
else:
|
|
3114
|
-
maybe_weighted_degree_rel = self._weighted_degree_of(
|
|
3589
|
+
maybe_weighted_degree_rel = self._weighted_degree_of(node_subset)
|
|
3115
3590
|
else: # not self.weighted
|
|
3116
3591
|
maybe_weighted_degree = Integer.ref()
|
|
3117
3592
|
maybe_weighted_degree_rel = degree_rel
|
|
@@ -3572,6 +4047,7 @@ class Graph():
|
|
|
3572
4047
|
def _triangle(self):
|
|
3573
4048
|
"""Lazily define and cache the self._triangle relationship."""
|
|
3574
4049
|
_triangle_rel = self._model.Relationship(f"{{node_a:{self._NodeConceptStr}}} and {{node_b:{self._NodeConceptStr}}} and {{node_c:{self._NodeConceptStr}}} form a triangle")
|
|
4050
|
+
_triangle_rel.annotate(annotations.track("graphs", "triangle"))
|
|
3575
4051
|
|
|
3576
4052
|
a, b, c = self.Node.ref(), self.Node.ref(), self.Node.ref()
|
|
3577
4053
|
|
|
@@ -3714,6 +4190,7 @@ class Graph():
|
|
|
3714
4190
|
def _unique_triangle(self):
|
|
3715
4191
|
"""Lazily define and cache the self._unique_triangle relationship."""
|
|
3716
4192
|
_unique_triangle_rel = self._model.Relationship(f"{{node_a:{self._NodeConceptStr}}} and {{node_b:{self._NodeConceptStr}}} and {{node_c:{self._NodeConceptStr}}} form unique triangle")
|
|
4193
|
+
_unique_triangle_rel.annotate(annotations.track("graphs", "unique_triangle"))
|
|
3717
4194
|
|
|
3718
4195
|
node_a, node_b, node_c = self.Node.ref(), self.Node.ref(), self.Node.ref()
|
|
3719
4196
|
|
|
@@ -3849,6 +4326,7 @@ class Graph():
|
|
|
3849
4326
|
def _num_triangles(self):
|
|
3850
4327
|
"""Lazily define and cache the self._num_triangles relationship."""
|
|
3851
4328
|
_num_triangles_rel = self._model.Relationship("The graph has {num_triangles:Integer} triangles")
|
|
4329
|
+
_num_triangles_rel.annotate(annotations.track("graphs", "num_triangles"))
|
|
3852
4330
|
|
|
3853
4331
|
_num_triangles = Integer.ref()
|
|
3854
4332
|
node_a, node_b, node_c = self.Node.ref(), self.Node.ref(), self.Node.ref()
|
|
@@ -3966,31 +4444,35 @@ class Graph():
|
|
|
3966
4444
|
|
|
3967
4445
|
"""
|
|
3968
4446
|
if of is not None:
|
|
3969
|
-
self._validate_node_subset_parameter(of)
|
|
4447
|
+
self._validate_node_subset_parameter('of', of)
|
|
3970
4448
|
return self._triangle_count_of(of)
|
|
3971
4449
|
return self._triangle_count
|
|
3972
4450
|
|
|
3973
4451
|
@cached_property
|
|
3974
4452
|
def _triangle_count(self):
|
|
3975
4453
|
"""Lazily define and cache the self._triangle_count relationship."""
|
|
3976
|
-
|
|
4454
|
+
_triangle_count_rel = self._create_triangle_count_relationship(node_subset=None)
|
|
4455
|
+
_triangle_count_rel.annotate(annotations.track("graphs", "triangle_count"))
|
|
4456
|
+
return _triangle_count_rel
|
|
3977
4457
|
|
|
3978
|
-
def _triangle_count_of(self,
|
|
4458
|
+
def _triangle_count_of(self, node_subset: Relationship):
|
|
3979
4459
|
"""
|
|
3980
4460
|
Create a triangle count relationship constrained to the subset of nodes
|
|
3981
|
-
in `
|
|
4461
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
3982
4462
|
specific to the callsite.
|
|
3983
4463
|
"""
|
|
3984
|
-
|
|
4464
|
+
_triangle_count_rel = self._create_triangle_count_relationship(node_subset=node_subset)
|
|
4465
|
+
_triangle_count_rel.annotate(annotations.track("graphs", "triangle_count_of"))
|
|
4466
|
+
return _triangle_count_rel
|
|
3985
4467
|
|
|
3986
|
-
def _create_triangle_count_relationship(self, *,
|
|
4468
|
+
def _create_triangle_count_relationship(self, *, node_subset: Optional[Relationship]):
|
|
3987
4469
|
"""Create a triangle count relationship, optionally constrained to a subset of nodes."""
|
|
3988
4470
|
_triangle_count_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} belongs to {{count:Integer}} triangles")
|
|
3989
4471
|
|
|
3990
|
-
if
|
|
4472
|
+
if node_subset is None:
|
|
3991
4473
|
node_constraint = self.Node # No constraint on nodes.
|
|
3992
4474
|
else:
|
|
3993
|
-
node_constraint =
|
|
4475
|
+
node_constraint = node_subset(self.Node) # Nodes constrained to given subset.
|
|
3994
4476
|
|
|
3995
4477
|
where(
|
|
3996
4478
|
node_constraint,
|
|
@@ -4113,7 +4595,7 @@ class Graph():
|
|
|
4113
4595
|
|
|
4114
4596
|
|
|
4115
4597
|
@include_in_docs
|
|
4116
|
-
def local_clustering_coefficient(self):
|
|
4598
|
+
def local_clustering_coefficient(self, *, of: Optional[Relationship] = None):
|
|
4117
4599
|
"""Returns a binary relationship containing the local clustering coefficient of each node.
|
|
4118
4600
|
|
|
4119
4601
|
The local clustering coefficient quantifies how close a node's neighbors
|
|
@@ -4122,6 +4604,14 @@ class Graph():
|
|
|
4122
4604
|
directly connecting them, and 1.0 indicates all neighbors have edges
|
|
4123
4605
|
directly connecting them.
|
|
4124
4606
|
|
|
4607
|
+
Parameters
|
|
4608
|
+
----------
|
|
4609
|
+
of : Relationship, optional
|
|
4610
|
+
A unary relationship containing a subset of the graph's nodes. When
|
|
4611
|
+
provided, constrains the domain of the local clustering coefficient
|
|
4612
|
+
computation: only coefficients of nodes in this relationship are
|
|
4613
|
+
computed and returned.
|
|
4614
|
+
|
|
4125
4615
|
Returns
|
|
4126
4616
|
-------
|
|
4127
4617
|
Relationship
|
|
@@ -4148,17 +4638,6 @@ class Graph():
|
|
|
4148
4638
|
| Directed | No | Undirected only. |
|
|
4149
4639
|
| Weighted | Yes | Weights are ignored. |
|
|
4150
4640
|
|
|
4151
|
-
Notes
|
|
4152
|
-
-----
|
|
4153
|
-
The formal definition of the local clustering coefficient (`C`) for a
|
|
4154
|
-
node (`v`) can be given as::
|
|
4155
|
-
|
|
4156
|
-
C(v) = (2 * num_edges) / (degree(v) * (degree(v) - 1))
|
|
4157
|
-
|
|
4158
|
-
Here, `num_edges` represents the number of edges between the
|
|
4159
|
-
neighbors of node `v`, and `degree(v)` represents the degree of the
|
|
4160
|
-
node, i.e., the number of edges connected to the node.
|
|
4161
|
-
|
|
4162
4641
|
Examples
|
|
4163
4642
|
--------
|
|
4164
4643
|
>>> from relationalai.semantics import Model, define, select, Float
|
|
@@ -4194,6 +4673,41 @@ class Graph():
|
|
|
4194
4673
|
3 4 0.333333
|
|
4195
4674
|
4 5 0.000000
|
|
4196
4675
|
|
|
4676
|
+
>>> # 4. Use 'of' parameter to constrain the set of nodes to compute local clustering coefficients of
|
|
4677
|
+
>>> # Define a subset containing only nodes 1 and 3
|
|
4678
|
+
>>> subset = model.Relationship(f"{{node:{Node}}} is in subset")
|
|
4679
|
+
>>> node = Node.ref()
|
|
4680
|
+
>>> where(union(node.id == 1, node.id == 3)).define(subset(node))
|
|
4681
|
+
>>>
|
|
4682
|
+
>>> # Get local clustering coefficients only of nodes in the subset
|
|
4683
|
+
>>> constrained_lcc = graph.local_clustering_coefficient(of=subset)
|
|
4684
|
+
>>> select(node.id, coeff).where(constrained_lcc(node, coeff)).inspect()
|
|
4685
|
+
▰▰▰▰ Setup complete
|
|
4686
|
+
id coeff
|
|
4687
|
+
0 1 1.000000
|
|
4688
|
+
1 3 0.666667
|
|
4689
|
+
|
|
4690
|
+
Notes
|
|
4691
|
+
-----
|
|
4692
|
+
The local clustering coefficient for node `v` is::
|
|
4693
|
+
|
|
4694
|
+
(2 * num_neighbor_edges(v)) / (ext_degree(v) * (ext_degree(v) - 1))
|
|
4695
|
+
|
|
4696
|
+
where `num_neighbor_edges(v)` is the number of edges between
|
|
4697
|
+
the neighbors of node `v`, and `ext_degree(v)` is the degree of the
|
|
4698
|
+
node excluding self-loops. If `ext_degree(v)` is less than 2,
|
|
4699
|
+
the local clustering coefficient is 0.0.
|
|
4700
|
+
|
|
4701
|
+
The ``local_clustering_coefficient()`` method, called with no parameters, computes
|
|
4702
|
+
and caches the full local clustering coefficient relationship, providing efficient
|
|
4703
|
+
reuse across multiple calls to ``local_clustering_coefficient()``. In contrast,
|
|
4704
|
+
``local_clustering_coefficient(of=subset)`` computes a constrained relationship
|
|
4705
|
+
specific to the passed-in ``subset`` and that call site. When a significant fraction
|
|
4706
|
+
of the local clustering coefficient relation is needed across a program,
|
|
4707
|
+
``local_clustering_coefficient()`` is typically more efficient; this is the typical
|
|
4708
|
+
case. Use ``local_clustering_coefficient(of=subset)`` only when small subsets of the
|
|
4709
|
+
local clustering coefficient relationship are needed collectively across the program.
|
|
4710
|
+
|
|
4197
4711
|
|
|
4198
4712
|
See Also
|
|
4199
4713
|
--------
|
|
@@ -4206,29 +4720,51 @@ class Graph():
|
|
|
4206
4720
|
raise NotImplementedError(
|
|
4207
4721
|
"`local_clustering_coefficient` is not applicable to directed graphs"
|
|
4208
4722
|
)
|
|
4723
|
+
|
|
4724
|
+
if of is not None:
|
|
4725
|
+
self._validate_node_subset_parameter('of', of)
|
|
4726
|
+
return self._local_clustering_coefficient_of(of)
|
|
4209
4727
|
return self._local_clustering_coefficient
|
|
4210
4728
|
|
|
4211
4729
|
@cached_property
|
|
4212
4730
|
def _local_clustering_coefficient(self):
|
|
4731
|
+
"""Lazily define and cache the self._local_clustering_coefficient relationship."""
|
|
4732
|
+
_local_clustering_coefficient_rel = self._create_local_clustering_coefficient_relationship(node_subset=None)
|
|
4733
|
+
_local_clustering_coefficient_rel.annotate(annotations.track("graphs", "local_clustering_coefficient"))
|
|
4734
|
+
return _local_clustering_coefficient_rel
|
|
4735
|
+
|
|
4736
|
+
def _local_clustering_coefficient_of(self, node_subset: Relationship):
|
|
4213
4737
|
"""
|
|
4214
|
-
|
|
4215
|
-
|
|
4738
|
+
Create a local clustering coefficient relationship constrained to the subset of nodes
|
|
4739
|
+
in `node_subset`. Note this relationship is not cached; it is
|
|
4740
|
+
specific to the callsite.
|
|
4216
4741
|
"""
|
|
4217
|
-
_local_clustering_coefficient_rel = self.
|
|
4742
|
+
_local_clustering_coefficient_rel = self._create_local_clustering_coefficient_relationship(node_subset=node_subset)
|
|
4743
|
+
_local_clustering_coefficient_rel.annotate(annotations.track("graphs", "local_clustering_coefficient_of"))
|
|
4744
|
+
return _local_clustering_coefficient_rel
|
|
4218
4745
|
|
|
4219
|
-
|
|
4220
|
-
|
|
4221
|
-
|
|
4222
|
-
)
|
|
4746
|
+
def _create_local_clustering_coefficient_relationship(self, *, node_subset: Optional[Relationship]):
|
|
4747
|
+
"""Create a local clustering coefficient relationship, optionally constrained to a subset of nodes."""
|
|
4748
|
+
_local_clustering_coefficient_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has local clustering coefficient {{coefficient:Float}}")
|
|
4223
4749
|
|
|
4224
4750
|
node = self.Node.ref()
|
|
4751
|
+
|
|
4752
|
+
if node_subset is None:
|
|
4753
|
+
degree_no_self_rel = self._degree_no_self
|
|
4754
|
+
triangle_count_rel = self._triangle_count
|
|
4755
|
+
node_constraint = node # No constraint on nodes.
|
|
4756
|
+
else:
|
|
4757
|
+
degree_no_self_rel = self._degree_no_self_of(node_subset)
|
|
4758
|
+
triangle_count_rel = self._triangle_count_of(node_subset)
|
|
4759
|
+
node_constraint = node_subset(node) # Nodes constrained to given subset.
|
|
4760
|
+
|
|
4225
4761
|
degree_no_self = Integer.ref()
|
|
4226
4762
|
triangle_count = Integer.ref()
|
|
4227
4763
|
where(
|
|
4228
|
-
|
|
4764
|
+
node_constraint,
|
|
4229
4765
|
_lcc := where(
|
|
4230
|
-
|
|
4231
|
-
|
|
4766
|
+
degree_no_self_rel(node, degree_no_self),
|
|
4767
|
+
triangle_count_rel(node, triangle_count),
|
|
4232
4768
|
degree_no_self > 1
|
|
4233
4769
|
).select(
|
|
4234
4770
|
2.0 * triangle_count / (degree_no_self * (degree_no_self - 1.0))
|
|
@@ -4243,11 +4779,32 @@ class Graph():
|
|
|
4243
4779
|
Lazily define and cache the self._degree_no_self relationship,
|
|
4244
4780
|
a non-public helper for local_clustering_coefficient.
|
|
4245
4781
|
"""
|
|
4782
|
+
return self._create_degree_no_self_relationship(node_subset=None)
|
|
4783
|
+
|
|
4784
|
+
def _degree_no_self_of(self, node_subset: Relationship):
|
|
4785
|
+
"""
|
|
4786
|
+
Create a self-loop-exclusive degree relationship constrained to
|
|
4787
|
+
the subset of nodes in `node_subset`. Note this relationship
|
|
4788
|
+
is not cached; it is specific to the callsite.
|
|
4789
|
+
"""
|
|
4790
|
+
return self._create_degree_no_self_relationship(node_subset=node_subset)
|
|
4791
|
+
|
|
4792
|
+
def _create_degree_no_self_relationship(self, *, node_subset: Optional[Relationship]):
|
|
4793
|
+
"""
|
|
4794
|
+
Create a self-loop-exclusive degree relationship,
|
|
4795
|
+
optionally constrained to a subset of nodes.
|
|
4796
|
+
"""
|
|
4246
4797
|
_degree_no_self_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} has degree excluding self loops {{num:Integer}}")
|
|
4247
4798
|
|
|
4248
4799
|
node, neighbor = self.Node.ref(), self.Node.ref()
|
|
4800
|
+
|
|
4801
|
+
if node_subset is None:
|
|
4802
|
+
node_constraint = node # No constraint on nodes.
|
|
4803
|
+
else:
|
|
4804
|
+
node_constraint = node_subset(node) # Nodes constrained to given subset.
|
|
4805
|
+
|
|
4249
4806
|
where(
|
|
4250
|
-
|
|
4807
|
+
node_constraint,
|
|
4251
4808
|
_dns := count(neighbor).per(node).where(self._no_loop_edge(node, neighbor)) | 0,
|
|
4252
4809
|
).define(_degree_no_self_rel(node, _dns))
|
|
4253
4810
|
|
|
@@ -4331,6 +4888,7 @@ class Graph():
|
|
|
4331
4888
|
which only applies to undirected graphs.
|
|
4332
4889
|
"""
|
|
4333
4890
|
_average_clustering_coefficient_rel = self._model.Relationship("The graph has average clustering coefficient {{coefficient:Float}}")
|
|
4891
|
+
_average_clustering_coefficient_rel.annotate(annotations.track("graphs", "average_clustering_coefficient"))
|
|
4334
4892
|
|
|
4335
4893
|
if self.directed:
|
|
4336
4894
|
raise NotImplementedError(
|
|
@@ -4471,6 +5029,7 @@ class Graph():
|
|
|
4471
5029
|
def _reachable_from(self):
|
|
4472
5030
|
"""Lazily define and cache the self._reachable_from relationship."""
|
|
4473
5031
|
_reachable_from_rel = self._model.Relationship(f"{{node_a:{self._NodeConceptStr}}} reaches {{node_b:{self._NodeConceptStr}}}")
|
|
5032
|
+
_reachable_from_rel.annotate(annotations.track("graphs", "reachable_from"))
|
|
4474
5033
|
|
|
4475
5034
|
node_a, node_b, node_c = self.Node.ref(), self.Node.ref(), self.Node.ref()
|
|
4476
5035
|
define(_reachable_from_rel(node_a, node_a))
|
|
@@ -4613,9 +5172,12 @@ class Graph():
|
|
|
4613
5172
|
def _distance(self):
|
|
4614
5173
|
"""Lazily define and cache the self._distance relationship."""
|
|
4615
5174
|
if not self.weighted:
|
|
4616
|
-
|
|
5175
|
+
_distance_rel = self._distance_non_weighted
|
|
4617
5176
|
else:
|
|
4618
|
-
|
|
5177
|
+
_distance_rel = self._distance_weighted
|
|
5178
|
+
|
|
5179
|
+
_distance_rel.annotate(annotations.track("graphs", "distance"))
|
|
5180
|
+
return _distance_rel
|
|
4619
5181
|
|
|
4620
5182
|
@cached_property
|
|
4621
5183
|
def _distance_weighted(self):
|
|
@@ -4741,6 +5303,7 @@ class Graph():
|
|
|
4741
5303
|
def _weakly_connected_component(self):
|
|
4742
5304
|
"""Lazily define and cache the self._weakly_connected_component relationship."""
|
|
4743
5305
|
_weakly_connected_component_rel = self._model.Relationship(f"{{node:{self._NodeConceptStr}}} is in the connected component {{id:{self._NodeConceptStr}}}")
|
|
5306
|
+
_weakly_connected_component_rel.annotate(annotations.track("graphs", "weakly_connected_component"))
|
|
4744
5307
|
|
|
4745
5308
|
node, node_v, component = self.Node.ref(), self.Node.ref(), self.Node.ref()
|
|
4746
5309
|
node, component = union(
|
|
@@ -4864,6 +5427,8 @@ class Graph():
|
|
|
4864
5427
|
"""
|
|
4865
5428
|
_diameter_range_min_rel = self._model.Relationship("The graph has a min diameter range of {value:Integer}")
|
|
4866
5429
|
_diameter_range_max_rel = self._model.Relationship("The graph has a max diameter range of {value:Integer}")
|
|
5430
|
+
_diameter_range_min_rel.annotate(annotations.track("graphs", "diameter_range_min"))
|
|
5431
|
+
_diameter_range_max_rel.annotate(annotations.track("graphs", "diameter_range_max"))
|
|
4867
5432
|
|
|
4868
5433
|
component_node_pairs = self._model.Relationship(f"component id {{cid:{self._NodeConceptStr}}} has node id {{nid:{self._NodeConceptStr}}}")
|
|
4869
5434
|
nodeid, cid, degreevalue = self.Node.ref(), self.Node.ref(), Integer.ref()
|
|
@@ -4924,16 +5489,22 @@ class Graph():
|
|
|
4924
5489
|
|
|
4925
5490
|
@include_in_docs
|
|
4926
5491
|
def is_connected(self):
|
|
4927
|
-
"""Returns a
|
|
5492
|
+
"""Returns a unary relationship containing whether the graph is connected.
|
|
4928
5493
|
|
|
4929
5494
|
A graph is considered connected if every node is reachable from every
|
|
4930
5495
|
other node in the underlying undirected graph.
|
|
4931
5496
|
|
|
4932
5497
|
Returns
|
|
4933
5498
|
-------
|
|
4934
|
-
|
|
4935
|
-
A
|
|
4936
|
-
|
|
5499
|
+
Relationship
|
|
5500
|
+
A unary relationship containing a boolean indicator of whether the graph
|
|
5501
|
+
is connected.
|
|
5502
|
+
|
|
5503
|
+
Relationship Schema
|
|
5504
|
+
-------------------
|
|
5505
|
+
``is_connected(connected)``
|
|
5506
|
+
|
|
5507
|
+
* **connected** (*Boolean*): Whether the graph is connected.
|
|
4937
5508
|
|
|
4938
5509
|
Supported Graph Types
|
|
4939
5510
|
---------------------
|
|
@@ -4951,8 +5522,6 @@ class Graph():
|
|
|
4951
5522
|
--------
|
|
4952
5523
|
**Connected Graph Example**
|
|
4953
5524
|
|
|
4954
|
-
The following query will produce a result because the graph is connected.
|
|
4955
|
-
|
|
4956
5525
|
>>> from relationalai.semantics import Model, define, select
|
|
4957
5526
|
>>> from relationalai.semantics.reasoners.graph import Graph
|
|
4958
5527
|
>>>
|
|
@@ -4970,17 +5539,14 @@ class Graph():
|
|
|
4970
5539
|
... Edge.new(src=n4, dst=n3),
|
|
4971
5540
|
... )
|
|
4972
5541
|
>>>
|
|
4973
|
-
>>> # 3.
|
|
4974
|
-
>>> select(
|
|
5542
|
+
>>> # 3. Select and inspect the relation
|
|
5543
|
+
>>> select(graph.is_connected()).inspect()
|
|
4975
5544
|
▰▰▰▰ Setup complete
|
|
4976
|
-
|
|
4977
|
-
0
|
|
5545
|
+
is_connected
|
|
5546
|
+
0 True
|
|
4978
5547
|
|
|
4979
5548
|
**Disconnected Graph Example**
|
|
4980
5549
|
|
|
4981
|
-
The following query will produce no results because the graph is not
|
|
4982
|
-
connected.
|
|
4983
|
-
|
|
4984
5550
|
>>> from relationalai.semantics import Model, define, select
|
|
4985
5551
|
>>> from relationalai.semantics.reasoners.graph import Graph
|
|
4986
5552
|
>>>
|
|
@@ -4998,22 +5564,31 @@ class Graph():
|
|
|
4998
5564
|
... Edge.new(src=n4, dst=n5), # This edge creates a separate component
|
|
4999
5565
|
... )
|
|
5000
5566
|
>>>
|
|
5001
|
-
>>> # 3.
|
|
5002
|
-
>>> select(
|
|
5567
|
+
>>> # 3. Select and inspect the relation
|
|
5568
|
+
>>> select(graph.is_connected()).inspect()
|
|
5003
5569
|
▰▰▰▰ Setup complete
|
|
5004
|
-
|
|
5005
|
-
|
|
5006
|
-
Index: []
|
|
5570
|
+
is_connected
|
|
5571
|
+
0 False
|
|
5007
5572
|
|
|
5008
5573
|
"""
|
|
5009
|
-
|
|
5010
|
-
|
|
5011
|
-
|
|
5012
|
-
|
|
5013
|
-
|
|
5574
|
+
return self._is_connected
|
|
5575
|
+
|
|
5576
|
+
@cached_property
|
|
5577
|
+
def _is_connected(self):
|
|
5578
|
+
"""Lazily define and cache the self._is_connected relationship."""
|
|
5579
|
+
_is_connected_rel = self._model.Relationship("'The graph is connected' is {is_connected:Boolean}")
|
|
5580
|
+
_is_connected_rel.annotate(annotations.track("graphs", "is_connected"))
|
|
5581
|
+
|
|
5582
|
+
where(
|
|
5014
5583
|
self._num_nodes(0) |
|
|
5015
5584
|
count(self._reachable_from_min_node(self.Node.ref())) == self._num_nodes(Integer.ref())
|
|
5016
|
-
)
|
|
5585
|
+
).define(_is_connected_rel(True))
|
|
5586
|
+
|
|
5587
|
+
where(
|
|
5588
|
+
not_(_is_connected_rel(True))
|
|
5589
|
+
).define(_is_connected_rel(False))
|
|
5590
|
+
|
|
5591
|
+
return _is_connected_rel
|
|
5017
5592
|
|
|
5018
5593
|
|
|
5019
5594
|
@include_in_docs
|
|
@@ -5179,6 +5754,7 @@ class Graph():
|
|
|
5179
5754
|
def _jaccard_similarity(self):
|
|
5180
5755
|
"""Lazily define and cache the self._jaccard_similarity relationship."""
|
|
5181
5756
|
_jaccard_similarity_rel = self._model.Relationship(f"{{node_u:{self._NodeConceptStr}}} has a similarity to {{node_v:{self._NodeConceptStr}}} of {{similarity:Float}}")
|
|
5757
|
+
_jaccard_similarity_rel.annotate(annotations.track("graphs", "jaccard_similarity"))
|
|
5182
5758
|
|
|
5183
5759
|
if not self.weighted:
|
|
5184
5760
|
node_u, node_v = self.Node.ref(), self.Node.ref()
|
|
@@ -5270,19 +5846,72 @@ class Graph():
|
|
|
5270
5846
|
|
|
5271
5847
|
|
|
5272
5848
|
@include_in_docs
|
|
5273
|
-
def cosine_similarity(
|
|
5274
|
-
|
|
5849
|
+
def cosine_similarity(
|
|
5850
|
+
self,
|
|
5851
|
+
*,
|
|
5852
|
+
full: Optional[bool] = None,
|
|
5853
|
+
from_: Optional[Relationship] = None,
|
|
5854
|
+
to: Optional[Relationship] = None,
|
|
5855
|
+
between: Optional[Relationship] = None,
|
|
5856
|
+
):
|
|
5857
|
+
"""Returns a ternary relationship containing
|
|
5858
|
+
the cosine similarity for pairs of nodes.
|
|
5275
5859
|
|
|
5276
5860
|
The cosine similarity measures the similarity between two nodes based
|
|
5277
5861
|
on the angle between their neighborhood vectors. The score ranges from
|
|
5278
5862
|
0.0 to 1.0, inclusive, where 1.0 indicates identical sets of neighbors.
|
|
5279
5863
|
|
|
5864
|
+
Parameters
|
|
5865
|
+
----------
|
|
5866
|
+
full : bool, optional
|
|
5867
|
+
If ``True``, computes the cosine similarity for all pairs
|
|
5868
|
+
of nodes in the graph. This computation can be expensive for large graphs,
|
|
5869
|
+
as the result can scale quadratically in the number of nodes. Mutually exclusive
|
|
5870
|
+
with other parameters.
|
|
5871
|
+
Default is ``None``.
|
|
5872
|
+
from_ : Relationship, optional
|
|
5873
|
+
A unary relationship containing a subset of the graph's nodes. When
|
|
5874
|
+
provided, constrains the domain of the cosine similarity computation: only
|
|
5875
|
+
cosine similarity scores for node pairs where the first node is
|
|
5876
|
+
in this relationship are computed and returned. Mutually exclusive with
|
|
5877
|
+
``full`` and ``between``.
|
|
5878
|
+
Default is ``None``.
|
|
5879
|
+
to : Relationship, optional
|
|
5880
|
+
A unary relationship containing a subset of the graph's nodes. Can only
|
|
5881
|
+
be used together with the ``from_`` parameter. When provided with ``from_``,
|
|
5882
|
+
constrains the domain of the cosine similarity computation: only
|
|
5883
|
+
cosine similarity scores for node pairs where the first node is
|
|
5884
|
+
in ``from_`` and the second node is in ``to`` are computed and returned.
|
|
5885
|
+
Default is ``None``.
|
|
5886
|
+
between : Relationship, optional
|
|
5887
|
+
A binary relationship containing pairs of nodes. When provided,
|
|
5888
|
+
constrains the domain of the cosine similarity computation: only
|
|
5889
|
+
cosine similarity scores for the specific node pairs in
|
|
5890
|
+
this relationship are computed and returned. Mutually exclusive
|
|
5891
|
+
with other parameters.
|
|
5892
|
+
Default is ``None``.
|
|
5893
|
+
|
|
5280
5894
|
Returns
|
|
5281
5895
|
-------
|
|
5282
5896
|
Relationship
|
|
5283
5897
|
A ternary relationship where each tuple represents a pair of nodes
|
|
5284
5898
|
and their cosine similarity.
|
|
5285
5899
|
|
|
5900
|
+
Raises
|
|
5901
|
+
------
|
|
5902
|
+
ValueError
|
|
5903
|
+
If ``full`` is provided with any other parameter.
|
|
5904
|
+
If ``between`` is provided with any other parameter.
|
|
5905
|
+
If ``from_`` is provided with any parameter other than ``to``.
|
|
5906
|
+
If none of ``full``, ``from_``, or ``between`` is provided.
|
|
5907
|
+
If ``full`` is not ``True`` or ``None``.
|
|
5908
|
+
AssertionError
|
|
5909
|
+
If ``from_``, ``to``, or ``between`` is not a ``Relationship``.
|
|
5910
|
+
If ``from_``, ``to``, or ``between`` is not attached to the same model as the graph.
|
|
5911
|
+
If ``from_``, ``to``, or ``between`` does not contain the graph's ``Node`` concept.
|
|
5912
|
+
If ``from_`` or ``to`` is not a unary relationship.
|
|
5913
|
+
If ``between`` is not a binary relationship.
|
|
5914
|
+
|
|
5286
5915
|
Relationship Schema
|
|
5287
5916
|
-------------------
|
|
5288
5917
|
``cosine_similarity(node_u, node_v, score)``
|
|
@@ -5315,6 +5944,36 @@ class Graph():
|
|
|
5315
5944
|
vectors contain only non-negative elements. Therefore, the cosine
|
|
5316
5945
|
similarity score is always between 0.0 and 1.0, inclusive.
|
|
5317
5946
|
|
|
5947
|
+
The ``cosine_similarity(full=True)`` method computes and caches
|
|
5948
|
+
the full cosine similarity relationship for all pairs of nodes,
|
|
5949
|
+
providing efficient reuse across multiple calls. This can be expensive
|
|
5950
|
+
as the result can contain O(|V|²) tuples.
|
|
5951
|
+
|
|
5952
|
+
Calling ``cosine_similarity()`` without arguments raises a ``ValueError``,
|
|
5953
|
+
to ensure awareness and explicit acknowledgement (``full=True``) of this cost.
|
|
5954
|
+
|
|
5955
|
+
In contrast, ``cosine_similarity(from_=subset)`` constrains the computation to
|
|
5956
|
+
tuples with the first position in the passed-in ``subset``. The result is
|
|
5957
|
+
not cached; it is specific to the call site. When a significant fraction of
|
|
5958
|
+
the cosine similarity relation is needed across a program,
|
|
5959
|
+
``cosine_similarity(full=True)`` is typically more efficient. Use
|
|
5960
|
+
``cosine_similarity(from_=subset)`` only when small subsets of
|
|
5961
|
+
the cosine similarity relationship are needed
|
|
5962
|
+
collectively across the program.
|
|
5963
|
+
|
|
5964
|
+
The ``to`` parameter can be used together with ``from_`` to further
|
|
5965
|
+
constrain the computation: ``cosine_similarity(from_=subset_a, to=subset_b)``
|
|
5966
|
+
computes cosine similarity scores only for node pairs where the first node is in
|
|
5967
|
+
``subset_a`` and the second node is in ``subset_b``. (Since ``cosine_similarity``
|
|
5968
|
+
is symmetric in its first two positions, using ``to`` without ``from_`` would
|
|
5969
|
+
be functionally redundant, and is not allowed.)
|
|
5970
|
+
|
|
5971
|
+
The ``between`` parameter provides another way to constrain the computation.
|
|
5972
|
+
Unlike ``from_`` and ``to``, which allow you to independently constrain the first
|
|
5973
|
+
and second positions in ``cosine_similarity`` tuples to sets of nodes, ``between``
|
|
5974
|
+
allows you constrain the first and second positions, jointly, to specific pairs
|
|
5975
|
+
of nodes.
|
|
5976
|
+
|
|
5318
5977
|
Examples
|
|
5319
5978
|
--------
|
|
5320
5979
|
**Unweighted Graph Examples**
|
|
@@ -5336,7 +5995,7 @@ class Graph():
|
|
|
5336
5995
|
... Edge.new(src=n4, dst=n3),
|
|
5337
5996
|
... )
|
|
5338
5997
|
>>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
|
|
5339
|
-
>>> cosine_similarity = graph.cosine_similarity()
|
|
5998
|
+
>>> cosine_similarity = graph.cosine_similarity(full=True)
|
|
5340
5999
|
>>> select(score).where(cosine_similarity(u, v, score), u.id == 2, v.id == 4).inspect()
|
|
5341
6000
|
▰▰▰▰ Setup complete
|
|
5342
6001
|
score
|
|
@@ -5359,7 +6018,7 @@ class Graph():
|
|
|
5359
6018
|
... Edge.new(src=n4, dst=n3),
|
|
5360
6019
|
... )
|
|
5361
6020
|
>>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
|
|
5362
|
-
>>> cosine_similarity = graph.cosine_similarity()
|
|
6021
|
+
>>> cosine_similarity = graph.cosine_similarity(full=True)
|
|
5363
6022
|
>>> select(score).where(cosine_similarity(u, v, score), u.id == 2, v.id == 4).inspect()
|
|
5364
6023
|
▰▰▰▰ Setup complete
|
|
5365
6024
|
score
|
|
@@ -5384,7 +6043,7 @@ class Graph():
|
|
|
5384
6043
|
... Edge.new(src=n14, dst=n13, weight=1.0),
|
|
5385
6044
|
... )
|
|
5386
6045
|
>>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
|
|
5387
|
-
>>> cosine_similarity = graph.cosine_similarity()
|
|
6046
|
+
>>> cosine_similarity = graph.cosine_similarity(full=True)
|
|
5388
6047
|
>>> select(score).where(cosine_similarity(u, v, score), u.id == 1, v.id == 2).inspect()
|
|
5389
6048
|
▰▰▰▰ Setup complete
|
|
5390
6049
|
score
|
|
@@ -5406,48 +6065,246 @@ class Graph():
|
|
|
5406
6065
|
... Edge.new(src=n2, dst=n4, weight=5.0),
|
|
5407
6066
|
... )
|
|
5408
6067
|
>>> u, v, score = Node.ref("u"), Node.ref("v"), Float.ref("score")
|
|
5409
|
-
>>> cosine_similarity = graph.cosine_similarity()
|
|
6068
|
+
>>> cosine_similarity = graph.cosine_similarity(full=True)
|
|
5410
6069
|
>>> select(score).where(cosine_similarity(u, v, score), u.id == 1, v.id == 2).inspect()
|
|
5411
6070
|
▰▰▰▰ Setup complete
|
|
5412
6071
|
score
|
|
5413
6072
|
0 0.996241
|
|
5414
6073
|
|
|
6074
|
+
**Domain Constraint Examples**
|
|
6075
|
+
|
|
6076
|
+
>>> # Use 'from_' parameter to constrain the set of nodes for the first position
|
|
6077
|
+
>>> # Using the same undirected unweighted graph from above
|
|
6078
|
+
>>> from relationalai.semantics import where
|
|
6079
|
+
>>> subset = model.Relationship(f"{{node:{Node}}} is in subset")
|
|
6080
|
+
>>> node = Node.ref()
|
|
6081
|
+
>>> where(node.id == 2).define(subset(node))
|
|
6082
|
+
>>>
|
|
6083
|
+
>>> # Get cosine similarity scores only for pairs where first node is in subset
|
|
6084
|
+
>>> constrained_cosine_similarity = graph.cosine_similarity(from_=subset)
|
|
6085
|
+
>>> select(u.id, v.id, score).where(constrained_cosine_similarity(u, v, score)).inspect()
|
|
6086
|
+
▰▰▰▰ Setup complete
|
|
6087
|
+
id id2 score
|
|
6088
|
+
0 2 2 1.000000
|
|
6089
|
+
1 2 3 0.707107
|
|
6090
|
+
2 2 4 0.408248
|
|
6091
|
+
|
|
6092
|
+
>>> # Use both 'from_' and 'to' parameters to constrain both positions
|
|
6093
|
+
>>> from_subset = model.Relationship(f"{{node:{Node}}} is in from_subset")
|
|
6094
|
+
>>> to_subset = model.Relationship(f"{{node:{Node}}} is in to_subset")
|
|
6095
|
+
>>> where(node.id == 2).define(from_subset(node))
|
|
6096
|
+
>>> where(node.id == 4).define(to_subset(node))
|
|
6097
|
+
>>>
|
|
6098
|
+
>>> # Get cosine similarity scores only where first node is in from_subset and second node is in to_subset
|
|
6099
|
+
>>> constrained_cosine_similarity = graph.cosine_similarity(from_=from_subset, to=to_subset)
|
|
6100
|
+
>>> select(u.id, v.id, score).where(constrained_cosine_similarity(u, v, score)).inspect()
|
|
6101
|
+
▰▰▰▰ Setup complete
|
|
6102
|
+
id id2 score
|
|
6103
|
+
0 2 4 0.408248
|
|
6104
|
+
|
|
6105
|
+
>>> # Use 'between' parameter to constrain to specific pairs of nodes
|
|
6106
|
+
>>> pairs = model.Relationship(f"{{node_a:{Node}}} and {{node_b:{Node}}} are a pair")
|
|
6107
|
+
>>> node_a, node_b = Node.ref(), Node.ref()
|
|
6108
|
+
>>> where(node_a.id == 2, node_b.id == 4).define(pairs(node_a, node_b))
|
|
6109
|
+
>>> where(node_a.id == 3, node_b.id == 4).define(pairs(node_a, node_b))
|
|
6110
|
+
>>>
|
|
6111
|
+
>>> # Get cosine similarity scores only for the specific pairs (2, 4) and (3, 4)
|
|
6112
|
+
>>> constrained_cosine_similarity = graph.cosine_similarity(between=pairs)
|
|
6113
|
+
>>> select(u.id, v.id, score).where(constrained_cosine_similarity(u, v, score)).inspect()
|
|
6114
|
+
▰▰▰▰ Setup complete
|
|
6115
|
+
id id2 score
|
|
6116
|
+
0 2 4 0.408248
|
|
6117
|
+
1 3 4 0.707107
|
|
6118
|
+
|
|
5415
6119
|
"""
|
|
5416
|
-
|
|
5417
|
-
|
|
5418
|
-
|
|
5419
|
-
"of all pairs of nodes of the graph. To provide better control over "
|
|
5420
|
-
"the computed subset, `cosine_similarity`'s interface will soon "
|
|
5421
|
-
"need to change."
|
|
5422
|
-
),
|
|
5423
|
-
FutureWarning,
|
|
5424
|
-
stacklevel=2
|
|
6120
|
+
# Validate domain constraint parameters.
|
|
6121
|
+
self._validate_domain_constraint_parameters(
|
|
6122
|
+
'cosine_similarity', full, from_, to, between
|
|
5425
6123
|
)
|
|
5426
6124
|
|
|
6125
|
+
# At this point, exactly one of `full`, `from_`, or `between`
|
|
6126
|
+
# has been provided, and if `to` is provided, `from_` is also provided.
|
|
6127
|
+
|
|
6128
|
+
# Handle `between`.
|
|
6129
|
+
if between is not None:
|
|
6130
|
+
self._validate_pair_subset_parameter(between)
|
|
6131
|
+
return self._cosine_similarity_between(between)
|
|
6132
|
+
|
|
6133
|
+
# Handle `from_` (and potentially `to`).
|
|
6134
|
+
if from_ is not None:
|
|
6135
|
+
self._validate_node_subset_parameter('from_', from_)
|
|
6136
|
+
if to is not None:
|
|
6137
|
+
self._validate_node_subset_parameter('to', to)
|
|
6138
|
+
return self._cosine_similarity_from_to(from_, to)
|
|
6139
|
+
return self._cosine_similarity_from(from_)
|
|
6140
|
+
|
|
6141
|
+
# Handle `full`.
|
|
5427
6142
|
return self._cosine_similarity
|
|
5428
6143
|
|
|
5429
6144
|
@cached_property
|
|
5430
6145
|
def _cosine_similarity(self):
|
|
5431
|
-
"""Lazily define and cache the
|
|
5432
|
-
_cosine_similarity_rel = self.
|
|
6146
|
+
"""Lazily define and cache the full cosine_similarity relationship."""
|
|
6147
|
+
_cosine_similarity_rel = self._create_cosine_similarity_relationship()
|
|
6148
|
+
_cosine_similarity_rel.annotate(annotations.track("graphs", "cosine_similarity"))
|
|
6149
|
+
return _cosine_similarity_rel
|
|
5433
6150
|
|
|
6151
|
+
def _cosine_similarity_from(self, node_subset_from: Relationship):
|
|
6152
|
+
"""
|
|
6153
|
+
Create a cosine_similarity relationship, with the first position in each
|
|
6154
|
+
tuple constrained to be in the given subset of nodes. Note this relationship
|
|
6155
|
+
is not cached; it is specific to the callsite.
|
|
6156
|
+
"""
|
|
6157
|
+
_cosine_similarity_rel = self._create_cosine_similarity_relationship(
|
|
6158
|
+
node_subset_from=node_subset_from
|
|
6159
|
+
)
|
|
6160
|
+
_cosine_similarity_rel.annotate(annotations.track("graphs", "cosine_similarity_from"))
|
|
6161
|
+
return _cosine_similarity_rel
|
|
6162
|
+
|
|
6163
|
+
def _cosine_similarity_from_to(self, node_subset_from: Relationship, node_subset_to: Relationship):
|
|
6164
|
+
"""
|
|
6165
|
+
Create a cosine_similarity relationship, with the first position in each
|
|
6166
|
+
tuple constrained to be in `node_subset_from`, and the second position in
|
|
6167
|
+
each tuple constrained to be in `node_subset_to`. Note this relationship
|
|
6168
|
+
is not cached; it is specific to the callsite.
|
|
6169
|
+
"""
|
|
6170
|
+
_cosine_similarity_rel = self._create_cosine_similarity_relationship(
|
|
6171
|
+
node_subset_from=node_subset_from,
|
|
6172
|
+
node_subset_to=node_subset_to
|
|
6173
|
+
)
|
|
6174
|
+
_cosine_similarity_rel.annotate(annotations.track("graphs", "cosine_similarity_from_to"))
|
|
6175
|
+
return _cosine_similarity_rel
|
|
6176
|
+
|
|
6177
|
+
def _cosine_similarity_between(self, pair_subset_between: Relationship):
|
|
6178
|
+
"""
|
|
6179
|
+
Create a cosine_similarity relationship, with the first and second position
|
|
6180
|
+
in each tuple jointly constrained to be in the given set of pairs
|
|
6181
|
+
of nodes. Note this relationship is not cached;
|
|
6182
|
+
it is specific to the callsite.
|
|
6183
|
+
"""
|
|
6184
|
+
_cosine_similarity_rel = self._create_cosine_similarity_relationship(
|
|
6185
|
+
pair_subset_between=pair_subset_between
|
|
6186
|
+
)
|
|
6187
|
+
_cosine_similarity_rel.annotate(annotations.track("graphs", "cosine_similarity_between"))
|
|
6188
|
+
return _cosine_similarity_rel
|
|
6189
|
+
|
|
6190
|
+
def _create_cosine_similarity_relationship(
|
|
6191
|
+
self,
|
|
6192
|
+
*,
|
|
6193
|
+
node_subset_from: Optional[Relationship] = None,
|
|
6194
|
+
node_subset_to: Optional[Relationship] = None,
|
|
6195
|
+
pair_subset_between: Optional[Relationship] = None,
|
|
6196
|
+
):
|
|
6197
|
+
"""
|
|
6198
|
+
Create cosine_similarity relationship, optionally constrained by
|
|
6199
|
+
the provided node subsets or pair subset.
|
|
6200
|
+
"""
|
|
6201
|
+
_cosine_similarity_rel = self._model.Relationship(
|
|
6202
|
+
f"{{node_u:{self._NodeConceptStr}}} has a cosine similarity to "
|
|
6203
|
+
f"{{node_v:{self._NodeConceptStr}}} of {{score:Float}}"
|
|
6204
|
+
)
|
|
6205
|
+
|
|
6206
|
+
# TODO: Optimization opportunity. In a number of branches below,
|
|
6207
|
+
# we compute _count_outneighbor_of, which transitively computes
|
|
6208
|
+
# _outneighbor_of, and then compute _outneighbor_of directly;
|
|
6209
|
+
# the present code structure makes this a developer-time-efficient
|
|
6210
|
+
# way to get this off the ground, but of course involves redundant
|
|
6211
|
+
# work. In future this redundant work could be eliminated.
|
|
6212
|
+
|
|
6213
|
+
# TODO: Optimization opportunity. In some of the cases below
|
|
6214
|
+
# (unweighted in particular), the node_constraint is redundant with
|
|
6215
|
+
# the constraints baked into the _count_outneigherbor_of and
|
|
6216
|
+
# _outneighbor_of relationships. The join with node_constraint
|
|
6217
|
+
# could be eliminated in those cases. Possibly also relevant to
|
|
6218
|
+
# other domain-constrained relations.
|
|
6219
|
+
|
|
6220
|
+
# Branch by case to select appropriate count_outneighbor and
|
|
6221
|
+
# outneighbor relationships, and build appropriate constraints
|
|
6222
|
+
# on the domain of the nodes.
|
|
6223
|
+
node_u, node_v = self.Node.ref(), self.Node.ref()
|
|
6224
|
+
|
|
6225
|
+
# Handle the `between` case.
|
|
6226
|
+
if pair_subset_between is not None:
|
|
6227
|
+
# Extract first-position and second-position nodes.
|
|
6228
|
+
first_position_subset = self._model.Relationship(f"{{node:{self._NodeConceptStr}}}")
|
|
6229
|
+
second_position_subset = self._model.Relationship(f"{{node:{self._NodeConceptStr}}}")
|
|
6230
|
+
node_x, node_y = self.Node.ref(), self.Node.ref()
|
|
6231
|
+
where(
|
|
6232
|
+
pair_subset_between(node_x, node_y)
|
|
6233
|
+
).define(
|
|
6234
|
+
first_position_subset(node_x),
|
|
6235
|
+
second_position_subset(node_y)
|
|
6236
|
+
)
|
|
6237
|
+
|
|
6238
|
+
count_outneighbor_u_rel = self._count_outneighbor_of(first_position_subset)
|
|
6239
|
+
count_outneighbor_v_rel = self._count_outneighbor_of(second_position_subset)
|
|
6240
|
+
outneighbor_u_rel = self._outneighbor_of(first_position_subset)
|
|
6241
|
+
outneighbor_v_rel = self._outneighbor_of(second_position_subset)
|
|
6242
|
+
|
|
6243
|
+
node_constraints = [pair_subset_between(node_u, node_v)]
|
|
6244
|
+
|
|
6245
|
+
# Handle the `from_` case.
|
|
6246
|
+
elif node_subset_from is not None and node_subset_to is None:
|
|
6247
|
+
count_outneighbor_u_rel = self._count_outneighbor_of(node_subset_from)
|
|
6248
|
+
count_outneighbor_v_rel = self._count_outneighbor
|
|
6249
|
+
outneighbor_u_rel = self._outneighbor_of(node_subset_from)
|
|
6250
|
+
outneighbor_v_rel = self._outneighbor
|
|
6251
|
+
# TODO: This case could be optimized via an analog of
|
|
6252
|
+
# the depth-2 traversal strategy suggested for the equivalent
|
|
6253
|
+
# case of common_neighbor, but for another day.
|
|
6254
|
+
|
|
6255
|
+
node_constraints = [node_subset_from(node_u)]
|
|
6256
|
+
|
|
6257
|
+
# Handle the `from_`/`to` case.
|
|
6258
|
+
elif node_subset_from is not None and node_subset_to is not None:
|
|
6259
|
+
# Check for object identity optimization.
|
|
6260
|
+
if node_subset_from is node_subset_to:
|
|
6261
|
+
count_outneighbor_u_rel = self._count_outneighbor_of(node_subset_from)
|
|
6262
|
+
count_outneighbor_v_rel = count_outneighbor_u_rel
|
|
6263
|
+
outneighbor_u_rel = self._outneighbor_of(node_subset_from)
|
|
6264
|
+
outneighbor_v_rel = outneighbor_u_rel
|
|
6265
|
+
else:
|
|
6266
|
+
count_outneighbor_u_rel = self._count_outneighbor_of(node_subset_from)
|
|
6267
|
+
count_outneighbor_v_rel = self._count_outneighbor_of(node_subset_to)
|
|
6268
|
+
outneighbor_u_rel = self._outneighbor_of(node_subset_from)
|
|
6269
|
+
outneighbor_v_rel = self._outneighbor_of(node_subset_to)
|
|
6270
|
+
|
|
6271
|
+
node_constraints = [node_subset_from(node_u), node_subset_to(node_v)]
|
|
6272
|
+
|
|
6273
|
+
# Handle the `full` case.
|
|
6274
|
+
else:
|
|
6275
|
+
count_outneighbor_u_rel = self._count_outneighbor
|
|
6276
|
+
count_outneighbor_v_rel = self._count_outneighbor
|
|
6277
|
+
outneighbor_u_rel = self._outneighbor
|
|
6278
|
+
outneighbor_v_rel = self._outneighbor
|
|
6279
|
+
|
|
6280
|
+
node_constraints = []
|
|
6281
|
+
|
|
6282
|
+
# Define cosine similarity logic for both weighted and unweighted cases.
|
|
5434
6283
|
if not self.weighted:
|
|
5435
|
-
|
|
5436
|
-
count_outneighor_u, count_outneighor_v
|
|
6284
|
+
# Unweighted case: use count of common outneighbors.
|
|
6285
|
+
count_outneighor_u, count_outneighor_v = Integer.ref(), Integer.ref()
|
|
6286
|
+
common_outneighbor_node = self.Node.ref()
|
|
6287
|
+
score = Float.ref()
|
|
5437
6288
|
|
|
5438
6289
|
where(
|
|
5439
|
-
|
|
5440
|
-
|
|
5441
|
-
|
|
6290
|
+
*node_constraints,
|
|
6291
|
+
count_outneighbor_u_rel(node_u, count_outneighor_u),
|
|
6292
|
+
count_outneighbor_v_rel(node_v, count_outneighor_v),
|
|
6293
|
+
c_common := count(common_outneighbor_node).per(node_u, node_v).where(
|
|
6294
|
+
outneighbor_u_rel(node_u, common_outneighbor_node),
|
|
6295
|
+
outneighbor_v_rel(node_v, common_outneighbor_node),
|
|
6296
|
+
),
|
|
5442
6297
|
score := c_common / sqrt(count_outneighor_u * count_outneighor_v),
|
|
5443
6298
|
).define(
|
|
5444
6299
|
_cosine_similarity_rel(node_u, node_v, score)
|
|
5445
6300
|
)
|
|
5446
6301
|
else:
|
|
5447
|
-
|
|
6302
|
+
# Weighted case: use dot product and norms.
|
|
5448
6303
|
node_uk, node_vk = self.Node.ref(), self.Node.ref()
|
|
5449
6304
|
wu, wv = Float.ref(), Float.ref()
|
|
6305
|
+
|
|
5450
6306
|
where(
|
|
6307
|
+
*node_constraints,
|
|
5451
6308
|
squared_norm_wu := sum(node_uk, wu * wu).per(node_u).where(self._weight(node_u, node_uk, wu)),
|
|
5452
6309
|
squared_norm_wv := sum(node_vk, wv * wv).per(node_v).where(self._weight(node_v, node_vk, wv)),
|
|
5453
6310
|
wu_dot_wv := self._wu_dot_wv_fragment(node_u, node_v),
|
|
@@ -5460,19 +6317,69 @@ class Graph():
|
|
|
5460
6317
|
|
|
5461
6318
|
|
|
5462
6319
|
@include_in_docs
|
|
5463
|
-
def adamic_adar(
|
|
5464
|
-
|
|
6320
|
+
def adamic_adar(
|
|
6321
|
+
self,
|
|
6322
|
+
*,
|
|
6323
|
+
full: Optional[bool] = None,
|
|
6324
|
+
from_: Optional[Relationship] = None,
|
|
6325
|
+
to: Optional[Relationship] = None,
|
|
6326
|
+
between: Optional[Relationship] = None,
|
|
6327
|
+
):
|
|
6328
|
+
"""Returns a ternary relationship containing the Adamic-Adar index for pairs of nodes.
|
|
5465
6329
|
|
|
5466
6330
|
The Adamic-Adar index is a similarity measure between two nodes based
|
|
5467
6331
|
on the amount of shared neighbors between them, giving more weight to
|
|
5468
6332
|
common neighbors that are less connected.
|
|
5469
6333
|
|
|
6334
|
+
Parameters
|
|
6335
|
+
----------
|
|
6336
|
+
full : bool, optional
|
|
6337
|
+
If ``True``, computes the Adamic-Adar index for all pairs of nodes in
|
|
6338
|
+
the graph. This computation can be expensive for large graphs, as
|
|
6339
|
+
dependencies can scale quadratically in the number of edges or cubically
|
|
6340
|
+
in the number of nodes. Mutually exclusive with other parameters.
|
|
6341
|
+
Default is ``None``.
|
|
6342
|
+
from_ : Relationship, optional
|
|
6343
|
+
A unary relationship containing a subset of the graph's nodes. When
|
|
6344
|
+
provided, constrains the domain of the Adamic-Adar computation: only
|
|
6345
|
+
Adamic-Adar indices for node pairs where the first node is in this relationship
|
|
6346
|
+
are computed and returned. Mutually exclusive with ``full`` and ``between``.
|
|
6347
|
+
Default is ``None``.
|
|
6348
|
+
to : Relationship, optional
|
|
6349
|
+
A unary relationship containing a subset of the graph's nodes. Can only
|
|
6350
|
+
be used together with the ``from_`` parameter. When provided with ``from_``,
|
|
6351
|
+
constrains the domain of the Adamic-Adar computation: only Adamic-Adar
|
|
6352
|
+
indices for node pairs where the first node is in ``from_`` and the
|
|
6353
|
+
second node is in ``to`` are computed and returned.
|
|
6354
|
+
Default is ``None``.
|
|
6355
|
+
between : Relationship, optional
|
|
6356
|
+
A binary relationship containing pairs of nodes. When provided,
|
|
6357
|
+
constrains the domain of the Adamic-Adar computation: only Adamic-Adar
|
|
6358
|
+
indices for the specific node pairs in this relationship are computed
|
|
6359
|
+
and returned. Mutually exclusive with other parameters.
|
|
6360
|
+
Default is ``None``.
|
|
6361
|
+
|
|
5470
6362
|
Returns
|
|
5471
6363
|
-------
|
|
5472
6364
|
Relationship
|
|
5473
6365
|
A ternary relationship where each tuple represents a pair of nodes
|
|
5474
6366
|
and their Adamic-Adar index.
|
|
5475
6367
|
|
|
6368
|
+
Raises
|
|
6369
|
+
------
|
|
6370
|
+
ValueError
|
|
6371
|
+
If ``full`` is provided with any other parameter.
|
|
6372
|
+
If ``between`` is provided with any other parameter.
|
|
6373
|
+
If ``from_`` is provided with any parameter other than ``to``.
|
|
6374
|
+
If none of ``full``, ``from_``, or ``between`` is provided.
|
|
6375
|
+
If ``full`` is not ``True`` or ``None``.
|
|
6376
|
+
AssertionError
|
|
6377
|
+
If ``from_``, ``to``, or ``between`` is not a ``Relationship``.
|
|
6378
|
+
If ``from_``, ``to``, or ``between`` is not attached to the same model as the graph.
|
|
6379
|
+
If ``from_``, ``to``, or ``between`` does not contain the graph's ``Node`` concept.
|
|
6380
|
+
If ``from_`` or ``to`` is not a unary relationship.
|
|
6381
|
+
If ``between`` is not a binary relationship.
|
|
6382
|
+
|
|
5476
6383
|
Relationship Schema
|
|
5477
6384
|
-------------------
|
|
5478
6385
|
``adamic_adar(node_u, node_v, score)``
|
|
@@ -5496,9 +6403,38 @@ class Graph():
|
|
|
5496
6403
|
|
|
5497
6404
|
AA(u,v) = Σ (1 / log(degree(w)))
|
|
5498
6405
|
|
|
6406
|
+
The ``adamic_adar(full=True)`` method computes and caches the full Adamic-Adar
|
|
6407
|
+
relationship for all pairs of nodes, providing efficient reuse across
|
|
6408
|
+
multiple calls. This can be expensive as dependencies can contain O(|E|²) or
|
|
6409
|
+
O(|V|³) tuples depending on graph density.
|
|
6410
|
+
|
|
6411
|
+
Calling ``adamic_adar()`` without arguments raises a ``ValueError``,
|
|
6412
|
+
to ensure awareness and explicit acknowledgement (``full=True``) of this cost.
|
|
6413
|
+
|
|
6414
|
+
In contrast, ``adamic_adar(from_=subset)`` constrains the computation to
|
|
6415
|
+
tuples with the first position in the passed-in ``subset``. The result is
|
|
6416
|
+
not cached; it is specific to the call site. When a significant fraction of
|
|
6417
|
+
the Adamic-Adar relation is needed across a program, ``adamic_adar(full=True)``
|
|
6418
|
+
is typically more efficient. Use ``adamic_adar(from_=subset)`` only
|
|
6419
|
+
when small subsets of the Adamic-Adar relationship are needed
|
|
6420
|
+
collectively across the program.
|
|
6421
|
+
|
|
6422
|
+
The ``to`` parameter can be used together with ``from_`` to further
|
|
6423
|
+
constrain the computation: ``adamic_adar(from_=subset_a, to=subset_b)``
|
|
6424
|
+
computes Adamic-Adar indices only for node pairs where the first node is in
|
|
6425
|
+
``subset_a`` and the second node is in ``subset_b``. (Since ``adamic_adar``
|
|
6426
|
+
is symmetric in its first two positions, using ``to`` without ``from_`` would
|
|
6427
|
+
be functionally redundant, and is not allowed.)
|
|
6428
|
+
|
|
6429
|
+
The ``between`` parameter provides another way to constrain the computation.
|
|
6430
|
+
Unlike ``from_`` and ``to``, which allow you to independently constrain the first
|
|
6431
|
+
and second positions in ``adamic_adar`` tuples to sets of nodes, ``between``
|
|
6432
|
+
allows you constrain the first and second positions, jointly, to specific pairs
|
|
6433
|
+
of nodes.
|
|
6434
|
+
|
|
5499
6435
|
Examples
|
|
5500
6436
|
--------
|
|
5501
|
-
>>> from relationalai.semantics import Model, define, select, Float
|
|
6437
|
+
>>> from relationalai.semantics import Model, define, select, where, Float
|
|
5502
6438
|
>>> from relationalai.semantics.reasoners.graph import Graph
|
|
5503
6439
|
>>>
|
|
5504
6440
|
>>> # 1. Set up an undirected graph
|
|
@@ -5517,10 +6453,10 @@ class Graph():
|
|
|
5517
6453
|
... Edge.new(src=n4, dst=n3),
|
|
5518
6454
|
... )
|
|
5519
6455
|
>>>
|
|
5520
|
-
>>> # 3. Select the Adamic-Adar
|
|
6456
|
+
>>> # 3. Select the Adamic-Adar indices from the full relationship
|
|
5521
6457
|
>>> u, v = Node.ref("u"), Node.ref("v")
|
|
5522
6458
|
>>> score = Float.ref("score")
|
|
5523
|
-
>>> adamic_adar = graph.adamic_adar()
|
|
6459
|
+
>>> adamic_adar = graph.adamic_adar(full=True)
|
|
5524
6460
|
>>> select(
|
|
5525
6461
|
... u.id, v.id, score,
|
|
5526
6462
|
... ).where(
|
|
@@ -5532,32 +6468,193 @@ class Graph():
|
|
|
5532
6468
|
id id2 score
|
|
5533
6469
|
0 2 4 0.910239
|
|
5534
6470
|
|
|
6471
|
+
>>> # 4. Use 'from_' parameter to constrain the set of nodes for the first position
|
|
6472
|
+
>>> # Define a subset containing only node 1
|
|
6473
|
+
>>> subset = model.Relationship(f"{{node:{Node}}} is in subset")
|
|
6474
|
+
>>> node = Node.ref()
|
|
6475
|
+
>>> where(node.id == 1).define(subset(node))
|
|
6476
|
+
>>>
|
|
6477
|
+
>>> # Get Adamic-Adar indices only for pairs where first node is in subset
|
|
6478
|
+
>>> constrained_adamic_adar = graph.adamic_adar(from_=subset)
|
|
6479
|
+
>>> select(u.id, v.id, score).where(constrained_adamic_adar(u, v, score)).inspect()
|
|
6480
|
+
▰▰▰▰ Setup complete
|
|
6481
|
+
id id2 score
|
|
6482
|
+
0 1 1 2.885390
|
|
6483
|
+
1 1 4 2.885390
|
|
6484
|
+
|
|
6485
|
+
>>> # 5. Use both 'from_' and 'to' parameters to constrain both positions
|
|
6486
|
+
>>> subset_a = model.Relationship(f"{{node:{Node}}} is in subset_a")
|
|
6487
|
+
>>> subset_b = model.Relationship(f"{{node:{Node}}} is in subset_b")
|
|
6488
|
+
>>> where(node.id == 1).define(subset_a(node))
|
|
6489
|
+
>>> where(node.id == 4).define(subset_b(node))
|
|
6490
|
+
>>>
|
|
6491
|
+
>>> # Get Adamic-Adar indices only where first node is in subset_a and second node is in subset_b
|
|
6492
|
+
>>> constrained_adamic_adar = graph.adamic_adar(from_=subset_a, to=subset_b)
|
|
6493
|
+
>>> select(u.id, v.id, score).where(constrained_adamic_adar(u, v, score)).inspect()
|
|
6494
|
+
▰▰▰▰ Setup complete
|
|
6495
|
+
id id2 score
|
|
6496
|
+
0 1 4 2.885390
|
|
6497
|
+
|
|
6498
|
+
>>> # 6. Use 'between' parameter to constrain to specific pairs of nodes
|
|
6499
|
+
>>> pairs = model.Relationship(f"{{node_a:{Node}}} and {{node_b:{Node}}} are a pair")
|
|
6500
|
+
>>> node_a, node_b = Node.ref(), Node.ref()
|
|
6501
|
+
>>> where(node_a.id == 1, node_b.id == 4).define(pairs(node_a, node_b))
|
|
6502
|
+
>>> where(node_a.id == 2, node_b.id == 3).define(pairs(node_a, node_b))
|
|
6503
|
+
>>>
|
|
6504
|
+
>>> # Get Adamic-Adar indices only for the specific pairs (1, 4) and (2, 3)
|
|
6505
|
+
>>> constrained_adamic_adar = graph.adamic_adar(between=pairs)
|
|
6506
|
+
>>> select(u.id, v.id, score).where(constrained_adamic_adar(u, v, score)).inspect()
|
|
6507
|
+
▰▰▰▰ Setup complete
|
|
6508
|
+
id id2 score
|
|
6509
|
+
0 1 4 2.885390
|
|
6510
|
+
1 2 3 1.442695
|
|
6511
|
+
|
|
5535
6512
|
"""
|
|
5536
|
-
|
|
5537
|
-
|
|
5538
|
-
|
|
5539
|
-
"of all pairs of nodes of the graph. To provide better control over "
|
|
5540
|
-
"the computed subset, `adamic_adar`'s interface will soon "
|
|
5541
|
-
"need to change."
|
|
5542
|
-
),
|
|
5543
|
-
FutureWarning,
|
|
5544
|
-
stacklevel=2
|
|
6513
|
+
# Validate domain constraint parameters.
|
|
6514
|
+
self._validate_domain_constraint_parameters(
|
|
6515
|
+
'adamic_adar', full, from_, to, between
|
|
5545
6516
|
)
|
|
5546
6517
|
|
|
6518
|
+
# At this point, exactly one of `full`, `from_`, or `between`
|
|
6519
|
+
# has been provided, and if `to` is provided, `from_` is also provided.
|
|
6520
|
+
|
|
6521
|
+
# Handle `between`.
|
|
6522
|
+
if between is not None:
|
|
6523
|
+
self._validate_pair_subset_parameter(between)
|
|
6524
|
+
return self._adamic_adar_between(between)
|
|
6525
|
+
|
|
6526
|
+
# Handle `from_` (and potentially `to`).
|
|
6527
|
+
if from_ is not None:
|
|
6528
|
+
self._validate_node_subset_parameter('from_', from_)
|
|
6529
|
+
if to is not None:
|
|
6530
|
+
self._validate_node_subset_parameter('to', to)
|
|
6531
|
+
return self._adamic_adar_from_to(from_, to)
|
|
6532
|
+
return self._adamic_adar_from(from_)
|
|
6533
|
+
|
|
6534
|
+
# Handle `full`.
|
|
5547
6535
|
return self._adamic_adar
|
|
5548
6536
|
|
|
5549
6537
|
@cached_property
|
|
5550
6538
|
def _adamic_adar(self):
|
|
5551
|
-
"""Lazily define and cache the
|
|
5552
|
-
_adamic_adar_rel = self.
|
|
6539
|
+
"""Lazily define and cache the full adamic_adar relationship."""
|
|
6540
|
+
_adamic_adar_rel = self._create_adamic_adar_relationship()
|
|
6541
|
+
_adamic_adar_rel.annotate(annotations.track("graphs", "adamic_adar"))
|
|
6542
|
+
return _adamic_adar_rel
|
|
6543
|
+
|
|
6544
|
+
def _adamic_adar_from(self, node_subset_from: Relationship):
|
|
6545
|
+
"""
|
|
6546
|
+
Create an adamic_adar relationship, with the first position in each
|
|
6547
|
+
tuple constrained to be in the given subset of nodes. Note this relationship
|
|
6548
|
+
is not cached; it is specific to the callsite.
|
|
6549
|
+
"""
|
|
6550
|
+
_adamic_adar_rel = self._create_adamic_adar_relationship(
|
|
6551
|
+
node_subset_from=node_subset_from
|
|
6552
|
+
)
|
|
6553
|
+
_adamic_adar_rel.annotate(annotations.track("graphs", "adamic_adar_from"))
|
|
6554
|
+
return _adamic_adar_rel
|
|
6555
|
+
|
|
6556
|
+
def _adamic_adar_from_to(self, node_subset_from: Relationship, node_subset_to: Relationship):
|
|
6557
|
+
"""
|
|
6558
|
+
Create an adamic_adar relationship, with the first position in each
|
|
6559
|
+
tuple constrained to be in `node_subset_from`, and the second position in
|
|
6560
|
+
each tuple constrained to be in `node_subset_to`. Note this relationship
|
|
6561
|
+
is not cached; it is specific to the callsite.
|
|
6562
|
+
"""
|
|
6563
|
+
_adamic_adar_rel = self._create_adamic_adar_relationship(
|
|
6564
|
+
node_subset_from=node_subset_from,
|
|
6565
|
+
node_subset_to=node_subset_to
|
|
6566
|
+
)
|
|
6567
|
+
_adamic_adar_rel.annotate(annotations.track("graphs", "adamic_adar_from_to"))
|
|
6568
|
+
return _adamic_adar_rel
|
|
6569
|
+
|
|
6570
|
+
def _adamic_adar_between(self, pair_subset_between: Relationship):
|
|
6571
|
+
"""
|
|
6572
|
+
Create an adamic_adar relationship, with the first and second position
|
|
6573
|
+
in each tuple jointly constrained to be in the given set of pairs
|
|
6574
|
+
of nodes. Note this relationship is not cached;
|
|
6575
|
+
it is specific to the callsite.
|
|
6576
|
+
"""
|
|
6577
|
+
_adamic_adar_rel = self._create_adamic_adar_relationship(
|
|
6578
|
+
pair_subset_between=pair_subset_between
|
|
6579
|
+
)
|
|
6580
|
+
_adamic_adar_rel.annotate(annotations.track("graphs", "adamic_adar_between"))
|
|
6581
|
+
return _adamic_adar_rel
|
|
5553
6582
|
|
|
6583
|
+
def _create_adamic_adar_relationship(
|
|
6584
|
+
self,
|
|
6585
|
+
*,
|
|
6586
|
+
node_subset_from: Optional[Relationship] = None,
|
|
6587
|
+
node_subset_to: Optional[Relationship] = None,
|
|
6588
|
+
pair_subset_between: Optional[Relationship] = None,
|
|
6589
|
+
):
|
|
6590
|
+
"""
|
|
6591
|
+
Create adamic_adar relationship, optionally constrained by the provided
|
|
6592
|
+
node subsets or pair subset.
|
|
6593
|
+
"""
|
|
6594
|
+
_adamic_adar_rel = self._model.Relationship(
|
|
6595
|
+
f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} "
|
|
6596
|
+
f"have adamic adar score {{score:Float}}"
|
|
6597
|
+
)
|
|
6598
|
+
|
|
6599
|
+
# NOTE: Handling of the common_neighbor relation (`common_neighbor_rel`)
|
|
6600
|
+
# differs in each case, whereas handling of the count_neighbor relation
|
|
6601
|
+
# (`count_neighbor_rel`) is: a) the same among the constrained cases;
|
|
6602
|
+
# and b) different in the unconstrained case. As such we handle
|
|
6603
|
+
# `common_neighbor_rel` in the branches by case below, and handle
|
|
6604
|
+
# `count_neighbor_rel` in a separate constrained/unconstrained branch later.
|
|
6605
|
+
|
|
6606
|
+
# Handle the `between` case.
|
|
6607
|
+
if pair_subset_between is not None:
|
|
6608
|
+
# Get the appropriate common_neighbor relationship.
|
|
6609
|
+
common_neighbor_rel = self._common_neighbor_between(pair_subset_between)
|
|
6610
|
+
|
|
6611
|
+
# Handle the `from_` case.
|
|
6612
|
+
elif node_subset_from is not None and node_subset_to is None:
|
|
6613
|
+
# Get the appropriate common_neighbor relationship.
|
|
6614
|
+
common_neighbor_rel = self._common_neighbor_from(node_subset_from)
|
|
6615
|
+
|
|
6616
|
+
# Handle the `from_`/`to` case.
|
|
6617
|
+
elif node_subset_from is not None and node_subset_to is not None:
|
|
6618
|
+
common_neighbor_rel = self._common_neighbor_from_to(node_subset_from, node_subset_to)
|
|
6619
|
+
# Note that _common_neighbor_from_to handles optimization
|
|
6620
|
+
# when the from_ and to sets are object-identical.
|
|
6621
|
+
|
|
6622
|
+
# Handle the `full` case.
|
|
6623
|
+
else:
|
|
6624
|
+
# Use cached full relationship.
|
|
6625
|
+
common_neighbor_rel = self._common_neighbor
|
|
6626
|
+
|
|
6627
|
+
# Handle `count_neighbor_rel` for unconstrained versus constrained cases.
|
|
6628
|
+
if pair_subset_between is None and node_subset_from is None:
|
|
6629
|
+
# Unconstrained case.
|
|
6630
|
+
count_neighbor_rel = self._count_neighbor
|
|
6631
|
+
|
|
6632
|
+
else:
|
|
6633
|
+
# Constrained cases.
|
|
6634
|
+
|
|
6635
|
+
# Extract common neighbors that appear in
|
|
6636
|
+
# the constrained common_neighbor relationship.
|
|
6637
|
+
common_neighbors_subset = self._model.Relationship(
|
|
6638
|
+
f"{{node:{self._NodeConceptStr}}} is a relevant common neighbor"
|
|
6639
|
+
)
|
|
6640
|
+
node_x, node_y, neighbor_z = self.Node.ref(), self.Node.ref(), self.Node.ref()
|
|
6641
|
+
where(
|
|
6642
|
+
common_neighbor_rel(node_x, node_y, neighbor_z)
|
|
6643
|
+
).define(
|
|
6644
|
+
common_neighbors_subset(neighbor_z)
|
|
6645
|
+
)
|
|
6646
|
+
|
|
6647
|
+
# From those common neighbors,
|
|
6648
|
+
# build a constrained count_neighbor relationship.
|
|
6649
|
+
count_neighbor_rel = self._count_neighbor_of(common_neighbors_subset)
|
|
6650
|
+
|
|
6651
|
+
# Define the Adamic-Adar aggregation using the selected relationships.
|
|
5554
6652
|
node_u, node_v, common_neighbor = self.Node.ref(), self.Node.ref(), self.Node.ref()
|
|
5555
6653
|
neighbor_count = Integer.ref()
|
|
5556
|
-
|
|
5557
6654
|
where(
|
|
5558
6655
|
_score := sum(common_neighbor, 1.0 / natural_log(neighbor_count)).per(node_u, node_v).where(
|
|
5559
|
-
|
|
5560
|
-
|
|
6656
|
+
common_neighbor_rel(node_u, node_v, common_neighbor),
|
|
6657
|
+
count_neighbor_rel(common_neighbor, neighbor_count),
|
|
5561
6658
|
)
|
|
5562
6659
|
).define(_adamic_adar_rel(node_u, node_v, _score))
|
|
5563
6660
|
|
|
@@ -5648,6 +6745,7 @@ class Graph():
|
|
|
5648
6745
|
def _preferential_attachment(self):
|
|
5649
6746
|
"""Lazily define and cache the self._preferential_attachment relationship."""
|
|
5650
6747
|
_preferential_attachment_rel = self._model.Relationship(f"{{node_u:{self._NodeConceptStr}}} and {{node_v:{self._NodeConceptStr}}} have preferential attachment score {{score:Integer}}")
|
|
6748
|
+
_preferential_attachment_rel.annotate(annotations.track("graphs", "preferential_attachment"))
|
|
5651
6749
|
|
|
5652
6750
|
node_u, node_v = self.Node.ref(), self.Node.ref()
|
|
5653
6751
|
count_u, count_v = Integer.ref(), Integer.ref()
|