infrahub-server 1.3.0b1__py3-none-any.whl → 1.3.0b3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- infrahub/actions/constants.py +87 -0
- infrahub/actions/gather.py +3 -3
- infrahub/actions/models.py +10 -8
- infrahub/actions/parsers.py +6 -6
- infrahub/actions/schema.py +46 -37
- infrahub/actions/tasks.py +4 -11
- infrahub/branch/__init__.py +0 -0
- infrahub/branch/tasks.py +29 -0
- infrahub/branch/triggers.py +22 -0
- infrahub/cli/db.py +2 -2
- infrahub/computed_attribute/gather.py +3 -1
- infrahub/computed_attribute/tasks.py +23 -29
- infrahub/core/constants/__init__.py +5 -0
- infrahub/core/constants/database.py +1 -0
- infrahub/core/convert_object_type/conversion.py +1 -1
- infrahub/core/diff/query/save.py +67 -40
- infrahub/core/diff/query/time_range_query.py +0 -1
- infrahub/core/graph/__init__.py +1 -1
- infrahub/core/migrations/graph/__init__.py +6 -0
- infrahub/core/migrations/graph/m013_convert_git_password_credential.py +0 -2
- infrahub/core/migrations/graph/m029_duplicates_cleanup.py +662 -0
- infrahub/core/migrations/graph/m030_illegal_edges.py +82 -0
- infrahub/core/migrations/query/attribute_add.py +13 -9
- infrahub/core/migrations/query/relationship_duplicate.py +0 -1
- infrahub/core/migrations/schema/node_remove.py +0 -1
- infrahub/core/node/__init__.py +2 -0
- infrahub/core/node/base.py +1 -1
- infrahub/core/path.py +1 -1
- infrahub/core/protocols.py +4 -3
- infrahub/core/query/node.py +1 -1
- infrahub/core/query/relationship.py +2 -2
- infrahub/core/query/standard_node.py +19 -5
- infrahub/core/relationship/constraints/peer_relatives.py +72 -0
- infrahub/core/relationship/model.py +1 -1
- infrahub/core/schema/attribute_schema.py +26 -6
- infrahub/core/schema/basenode_schema.py +2 -2
- infrahub/core/schema/definitions/core/resource_pool.py +9 -0
- infrahub/core/schema/definitions/internal.py +9 -1
- infrahub/core/schema/generated/attribute_schema.py +4 -4
- infrahub/core/schema/generated/relationship_schema.py +6 -1
- infrahub/core/schema/manager.py +4 -2
- infrahub/core/schema/schema_branch.py +14 -5
- infrahub/core/validators/tasks.py +1 -1
- infrahub/database/__init__.py +1 -1
- infrahub/database/validation.py +100 -0
- infrahub/dependencies/builder/constraint/grouped/node_runner.py +2 -0
- infrahub/dependencies/builder/constraint/relationship_manager/peer_relatives.py +8 -0
- infrahub/dependencies/builder/diff/deserializer.py +1 -1
- infrahub/dependencies/registry.py +2 -0
- infrahub/events/models.py +1 -1
- infrahub/graphql/mutations/main.py +1 -1
- infrahub/graphql/mutations/resource_manager.py +13 -13
- infrahub/graphql/resolvers/many_relationship.py +1 -1
- infrahub/graphql/resolvers/resolver.py +2 -2
- infrahub/graphql/resolvers/single_relationship.py +1 -1
- infrahub/menu/menu.py +5 -4
- infrahub/message_bus/operations/refresh/registry.py +3 -3
- infrahub/patch/queries/delete_duplicated_edges.py +40 -29
- infrahub/pools/registration.py +22 -0
- infrahub/pools/tasks.py +56 -0
- infrahub/proposed_change/tasks.py +8 -8
- infrahub/schema/__init__.py +0 -0
- infrahub/schema/tasks.py +27 -0
- infrahub/schema/triggers.py +23 -0
- infrahub/trigger/catalogue.py +4 -0
- infrahub/trigger/models.py +5 -4
- infrahub/trigger/setup.py +26 -2
- infrahub/trigger/tasks.py +1 -1
- infrahub/webhook/tasks.py +6 -9
- infrahub/workflows/catalogue.py +27 -1
- {infrahub_server-1.3.0b1.dist-info → infrahub_server-1.3.0b3.dist-info}/METADATA +1 -1
- {infrahub_server-1.3.0b1.dist-info → infrahub_server-1.3.0b3.dist-info}/RECORD +80 -67
- infrahub_testcontainers/container.py +239 -64
- infrahub_testcontainers/docker-compose-cluster.test.yml +321 -0
- infrahub_testcontainers/docker-compose.test.yml +1 -0
- infrahub_testcontainers/helpers.py +15 -1
- infrahub_testcontainers/plugin.py +9 -0
- infrahub/patch/queries/consolidate_duplicated_nodes.py +0 -106
- {infrahub_server-1.3.0b1.dist-info → infrahub_server-1.3.0b3.dist-info}/LICENSE.txt +0 -0
- {infrahub_server-1.3.0b1.dist-info → infrahub_server-1.3.0b3.dist-info}/WHEEL +0 -0
- {infrahub_server-1.3.0b1.dist-info → infrahub_server-1.3.0b3.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,662 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING, Any, Literal
|
|
4
|
+
|
|
5
|
+
from infrahub.core.constants import RelationshipDirection
|
|
6
|
+
from infrahub.core.constants.database import DatabaseEdgeType
|
|
7
|
+
from infrahub.core.migrations.shared import MigrationResult
|
|
8
|
+
from infrahub.core.query import Query, QueryType
|
|
9
|
+
from infrahub.log import get_logger
|
|
10
|
+
|
|
11
|
+
from ..shared import ArbitraryMigration
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from infrahub.database import InfrahubDatabase
|
|
15
|
+
|
|
16
|
+
log = get_logger()
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class CleanUpDuplicatedUuidVertices(Query):
|
|
20
|
+
"""
|
|
21
|
+
Find vertexes that include the given label and have the same UUID and same set of labels
|
|
22
|
+
For each of these duplicate vertex groups, keep one and mark all the others to be deleted by the PerformHardDeletesQuery
|
|
23
|
+
- Group all of the edges touching a vertex in this vertex group by branch, edge_type, peer_element_id, and direction
|
|
24
|
+
- For each edge group, we will link one edge to the vertex we are keeping for this vertex group and mark all of the others to be deleted
|
|
25
|
+
- we will set/create one active edge from the vertex to keep to the peer of this group, setting its from time to the earliest active
|
|
26
|
+
from time in this group
|
|
27
|
+
- if ALL edges in this edge group are deleted, then we will set the to time of the active edge to the latest deleted time and
|
|
28
|
+
set/create a deleted edge with a from time of the latest deleted time
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
name = "clean_up_duplicated_uuid_vertices"
|
|
32
|
+
type = QueryType.WRITE
|
|
33
|
+
insert_return = False
|
|
34
|
+
insert_limit = False
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
vertex_label: str,
|
|
39
|
+
outbound_edge_types: list[DatabaseEdgeType],
|
|
40
|
+
inbound_edge_types: list[DatabaseEdgeType],
|
|
41
|
+
**kwargs: Any,
|
|
42
|
+
) -> None:
|
|
43
|
+
super().__init__(**kwargs)
|
|
44
|
+
self.vertex_label = vertex_label
|
|
45
|
+
self.outbound_edge_types = outbound_edge_types
|
|
46
|
+
self.inbound_edge_types = inbound_edge_types
|
|
47
|
+
|
|
48
|
+
def _get_or_create_active_edge_subquery(
|
|
49
|
+
self,
|
|
50
|
+
edge_type: DatabaseEdgeType,
|
|
51
|
+
direction: Literal[RelationshipDirection.INBOUND, RelationshipDirection.OUTBOUND],
|
|
52
|
+
) -> str:
|
|
53
|
+
if direction is RelationshipDirection.INBOUND:
|
|
54
|
+
l_arrow = "<"
|
|
55
|
+
r_arrow = ""
|
|
56
|
+
else:
|
|
57
|
+
l_arrow = ""
|
|
58
|
+
r_arrow = ">"
|
|
59
|
+
|
|
60
|
+
query = """
|
|
61
|
+
CALL (vertex_to_keep, edge_type, branch, peer, earliest_active_time, latest_deleted_time, all_edges_deleted, edge_to_copy) {
|
|
62
|
+
// ------------
|
|
63
|
+
// get or create the active %(edge_type)s edge
|
|
64
|
+
// ------------
|
|
65
|
+
WITH edge_type
|
|
66
|
+
WHERE edge_type = "%(edge_type)s"
|
|
67
|
+
MERGE (vertex_to_keep)%(l_arrow)s-[active_edge:%(edge_type)s {branch: branch, status: "active"}]-%(r_arrow)s(peer)
|
|
68
|
+
WITH active_edge
|
|
69
|
+
LIMIT 1
|
|
70
|
+
SET active_edge.to_delete = NULL
|
|
71
|
+
SET active_edge.from = earliest_active_time
|
|
72
|
+
SET active_edge.to = CASE
|
|
73
|
+
WHEN all_edges_deleted = TRUE THEN latest_deleted_time
|
|
74
|
+
ELSE NULL
|
|
75
|
+
END
|
|
76
|
+
SET active_edge.branch_level = edge_to_copy.branch_level
|
|
77
|
+
SET active_edge.hierarchy = edge_to_copy.hierarchy
|
|
78
|
+
}
|
|
79
|
+
""" % {
|
|
80
|
+
"edge_type": edge_type.value,
|
|
81
|
+
"l_arrow": l_arrow,
|
|
82
|
+
"r_arrow": r_arrow,
|
|
83
|
+
}
|
|
84
|
+
return query
|
|
85
|
+
|
|
86
|
+
def _add_deleted_edge_subquery(
|
|
87
|
+
self,
|
|
88
|
+
edge_type: DatabaseEdgeType,
|
|
89
|
+
direction: Literal[RelationshipDirection.INBOUND, RelationshipDirection.OUTBOUND],
|
|
90
|
+
) -> str:
|
|
91
|
+
if direction is RelationshipDirection.INBOUND:
|
|
92
|
+
l_arrow = "<"
|
|
93
|
+
r_arrow = ""
|
|
94
|
+
else:
|
|
95
|
+
l_arrow = ""
|
|
96
|
+
r_arrow = ">"
|
|
97
|
+
subquery = """
|
|
98
|
+
CALL (vertex_to_keep, edge_type, branch, peer, latest_deleted_time, edge_to_copy) {
|
|
99
|
+
// ------------
|
|
100
|
+
// create the deleted %(edge_type)s edge
|
|
101
|
+
// ------------
|
|
102
|
+
WITH edge_type
|
|
103
|
+
WHERE edge_type = "%(edge_type)s"
|
|
104
|
+
MERGE (vertex_to_keep)%(l_arrow)s-[deleted_edge:%(edge_type)s {branch: branch, status: "deleted"}]-%(r_arrow)s(peer)
|
|
105
|
+
WITH deleted_edge
|
|
106
|
+
LIMIT 1
|
|
107
|
+
SET deleted_edge.to_delete = NULL
|
|
108
|
+
SET deleted_edge.from = latest_deleted_time
|
|
109
|
+
SET deleted_edge.to = NULL
|
|
110
|
+
SET deleted_edge.branch_level = edge_to_copy.branch_level
|
|
111
|
+
SET deleted_edge.hierarchy = edge_to_copy.hierarchy
|
|
112
|
+
}
|
|
113
|
+
""" % {"edge_type": edge_type.value, "l_arrow": l_arrow, "r_arrow": r_arrow}
|
|
114
|
+
return subquery
|
|
115
|
+
|
|
116
|
+
def _build_directed_edges_subquery(
|
|
117
|
+
self,
|
|
118
|
+
db: InfrahubDatabase,
|
|
119
|
+
direction: Literal[RelationshipDirection.INBOUND, RelationshipDirection.OUTBOUND],
|
|
120
|
+
edge_types: list[DatabaseEdgeType],
|
|
121
|
+
) -> str:
|
|
122
|
+
if direction is RelationshipDirection.INBOUND:
|
|
123
|
+
l_arrow = "<"
|
|
124
|
+
r_arrow = ""
|
|
125
|
+
else:
|
|
126
|
+
l_arrow = ""
|
|
127
|
+
r_arrow = ">"
|
|
128
|
+
active_subqueries: list[str] = []
|
|
129
|
+
delete_subqueries: list[str] = []
|
|
130
|
+
for edge_type in edge_types:
|
|
131
|
+
active_subqueries.append(
|
|
132
|
+
self._get_or_create_active_edge_subquery(
|
|
133
|
+
edge_type=edge_type,
|
|
134
|
+
direction=direction,
|
|
135
|
+
)
|
|
136
|
+
)
|
|
137
|
+
delete_subqueries.append(self._add_deleted_edge_subquery(edge_type=edge_type, direction=direction))
|
|
138
|
+
active_edge_subqueries = "\n".join(active_subqueries)
|
|
139
|
+
deleted_edge_subqueries = "\n".join(delete_subqueries)
|
|
140
|
+
|
|
141
|
+
edges_query = """
|
|
142
|
+
//------------
|
|
143
|
+
// Get every %(direction)s branch, edge_type, peer element_id combinations touching vertices with this uuid/labels combination
|
|
144
|
+
//------------
|
|
145
|
+
CALL (n_uuid, vertex_element_ids, element_id_to_keep) {
|
|
146
|
+
CALL (n_uuid, vertex_element_ids) {
|
|
147
|
+
MATCH (n:%(vertex_label)s {uuid: n_uuid})
|
|
148
|
+
WHERE %(id_func_name)s(n) IN vertex_element_ids
|
|
149
|
+
MATCH (n)%(l_arrow)s-[e]-%(r_arrow)s(peer)
|
|
150
|
+
WITH DISTINCT e.branch AS branch, type(e) AS edge_type, %(id_func_name)s(peer) AS peer_element_id
|
|
151
|
+
RETURN branch, edge_type, peer_element_id
|
|
152
|
+
}
|
|
153
|
+
//------------
|
|
154
|
+
// Are all of the edges with these with this branch/edge_type/peer_element_id combination deleted?
|
|
155
|
+
//------------
|
|
156
|
+
CALL (n_uuid, vertex_element_ids, branch, edge_type, peer_element_id) {
|
|
157
|
+
// nodes with this edge_type/branch/peer combo
|
|
158
|
+
MATCH (node_with_edge:%(vertex_label)s {uuid: n_uuid})%(l_arrow)s-[e {branch: branch}]-%(r_arrow)s(peer)
|
|
159
|
+
WHERE %(id_func_name)s(node_with_edge) IN vertex_element_ids
|
|
160
|
+
AND type(e) = edge_type
|
|
161
|
+
AND %(id_func_name)s(peer) = peer_element_id
|
|
162
|
+
// count of nodes with this edge_type/branch/peer combo
|
|
163
|
+
WITH DISTINCT n_uuid, branch, edge_type, peer_element_id, %(id_func_name)s(node_with_edge) AS node_with_edge_element_id
|
|
164
|
+
WITH n_uuid, branch, edge_type, peer_element_id, collect(node_with_edge_element_id) AS node_with_edge_element_ids
|
|
165
|
+
// nodes with this edge_type/branch/peer combo where the edge is DELETED
|
|
166
|
+
OPTIONAL MATCH (node_with_deleted_edge:%(vertex_label)s {uuid: n_uuid})%(l_arrow)s-[e {branch: branch}]-%(r_arrow)s(peer)
|
|
167
|
+
WHERE %(id_func_name)s(node_with_deleted_edge) IN node_with_edge_element_ids
|
|
168
|
+
AND type(e) = edge_type
|
|
169
|
+
AND %(id_func_name)s(peer) = peer_element_id
|
|
170
|
+
AND (e.status = "deleted" OR e.to IS NOT NULL)
|
|
171
|
+
// count of nodes with this DELETED edge_type/branch/peer combo
|
|
172
|
+
WITH DISTINCT node_with_edge_element_ids, %(id_func_name)s(node_with_deleted_edge) AS node_with_deleted_edge_element_id
|
|
173
|
+
WITH node_with_edge_element_ids, collect(node_with_deleted_edge_element_id) AS node_with_deleted_edge_element_ids
|
|
174
|
+
RETURN size(node_with_edge_element_ids) = size(node_with_deleted_edge_element_ids) AS all_edges_deleted
|
|
175
|
+
}
|
|
176
|
+
//------------
|
|
177
|
+
// What is the earliest active time for this branch/edge_type/peer_element_id/UUID/labels combination?
|
|
178
|
+
//------------
|
|
179
|
+
CALL (n_uuid, vertex_element_ids, branch, edge_type, peer_element_id) {
|
|
180
|
+
MATCH (n {uuid: n_uuid})%(l_arrow)s-[e {branch: branch, status: "active"}]-%(r_arrow)s(peer)
|
|
181
|
+
WHERE %(id_func_name)s(n) IN vertex_element_ids
|
|
182
|
+
AND type(e) = edge_type
|
|
183
|
+
AND %(id_func_name)s(peer) = peer_element_id
|
|
184
|
+
RETURN e.from AS earliest_active_time
|
|
185
|
+
ORDER BY e.from ASC
|
|
186
|
+
LIMIT 1
|
|
187
|
+
}
|
|
188
|
+
//------------
|
|
189
|
+
// What is the latest deleted time for this branch/edge_type/peer_element_id/UUID/labels combination?
|
|
190
|
+
//------------
|
|
191
|
+
CALL (n_uuid, vertex_element_ids, branch, edge_type, peer_element_id, all_edges_deleted) {
|
|
192
|
+
OPTIONAL MATCH (n {uuid: n_uuid})%(l_arrow)s-[e {branch: branch}]-%(r_arrow)s(peer)
|
|
193
|
+
WHERE all_edges_deleted = TRUE
|
|
194
|
+
AND %(id_func_name)s(n) IN vertex_element_ids
|
|
195
|
+
AND type(e) = edge_type
|
|
196
|
+
AND %(id_func_name)s(peer) = peer_element_id
|
|
197
|
+
RETURN CASE
|
|
198
|
+
WHEN e.status = "active" THEN e.to
|
|
199
|
+
ELSE e.from
|
|
200
|
+
END AS latest_deleted_time
|
|
201
|
+
ORDER BY latest_deleted_time DESC
|
|
202
|
+
LIMIT 1
|
|
203
|
+
}
|
|
204
|
+
// ------------
|
|
205
|
+
// Add the %(direction)s edges to the node we are keeping, if necessary
|
|
206
|
+
// ------------
|
|
207
|
+
CALL (
|
|
208
|
+
n_uuid, vertex_element_ids, element_id_to_keep, branch, edge_type, peer_element_id, all_edges_deleted,
|
|
209
|
+
earliest_active_time, latest_deleted_time
|
|
210
|
+
) {
|
|
211
|
+
// get the node we are keeping
|
|
212
|
+
MATCH (vertex_to_keep {uuid: n_uuid})
|
|
213
|
+
WHERE %(id_func_name)s(vertex_to_keep) = element_id_to_keep
|
|
214
|
+
// get the peer we are linking to
|
|
215
|
+
MATCH (n {uuid: n_uuid})%(l_arrow)s-[]-%(r_arrow)s(peer)
|
|
216
|
+
WHERE %(id_func_name)s(n) IN vertex_element_ids
|
|
217
|
+
AND %(id_func_name)s(peer) = peer_element_id
|
|
218
|
+
WITH peer, vertex_to_keep
|
|
219
|
+
LIMIT 1
|
|
220
|
+
// ------------
|
|
221
|
+
// mark all other edges for this branch/edge_type/peer combination as to be deleted
|
|
222
|
+
// we will unmark any to keep later
|
|
223
|
+
// ------------
|
|
224
|
+
CALL (n_uuid, branch, peer, vertex_element_ids, edge_type) {
|
|
225
|
+
OPTIONAL MATCH (n {uuid: n_uuid})%(l_arrow)s-[edge_to_delete {branch: branch}]-%(r_arrow)s(peer)
|
|
226
|
+
WHERE %(id_func_name)s(n) IN vertex_element_ids
|
|
227
|
+
AND type(edge_to_delete) = edge_type
|
|
228
|
+
SET edge_to_delete.to_delete = TRUE
|
|
229
|
+
}
|
|
230
|
+
CALL (n_uuid, branch, vertex_element_ids, edge_type, peer) {
|
|
231
|
+
// ------------
|
|
232
|
+
// get the edge to copy
|
|
233
|
+
// ------------
|
|
234
|
+
MATCH (n {uuid: n_uuid})%(l_arrow)s-[e {branch: branch, status: "active"}]-%(r_arrow)s(peer)
|
|
235
|
+
WHERE %(id_func_name)s(n) IN vertex_element_ids
|
|
236
|
+
AND type(e) = edge_type
|
|
237
|
+
RETURN e AS edge_to_copy
|
|
238
|
+
ORDER BY e.from DESC
|
|
239
|
+
LIMIT 1
|
|
240
|
+
}
|
|
241
|
+
%(active_edge_subqueries)s
|
|
242
|
+
// ------------
|
|
243
|
+
// conditionally create the deleted edges
|
|
244
|
+
// ------------
|
|
245
|
+
WITH vertex_to_keep, peer, edge_to_copy
|
|
246
|
+
WHERE all_edges_deleted = TRUE
|
|
247
|
+
%(deleted_edge_subqueries)s
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
""" % {
|
|
251
|
+
"direction": direction.value,
|
|
252
|
+
"l_arrow": l_arrow,
|
|
253
|
+
"r_arrow": r_arrow,
|
|
254
|
+
"id_func_name": db.get_id_function_name(),
|
|
255
|
+
"active_edge_subqueries": active_edge_subqueries,
|
|
256
|
+
"deleted_edge_subqueries": deleted_edge_subqueries,
|
|
257
|
+
"vertex_label": self.vertex_label,
|
|
258
|
+
}
|
|
259
|
+
return edges_query
|
|
260
|
+
|
|
261
|
+
async def query_init(self, db: InfrahubDatabase, **kwargs: dict[str, Any]) -> None: # noqa: ARG002
|
|
262
|
+
self.params["limit"] = self.limit or 1000
|
|
263
|
+
self.params["offset"] = self.offset or 0
|
|
264
|
+
query_start = """
|
|
265
|
+
//------------
|
|
266
|
+
// Find vertices with the same labels and UUID
|
|
267
|
+
//------------
|
|
268
|
+
MATCH (n:%(vertex_label)s)
|
|
269
|
+
WITH n.uuid AS node_uuid, count(*) as num_nodes_with_uuid
|
|
270
|
+
WHERE num_nodes_with_uuid > 1
|
|
271
|
+
WITH DISTINCT node_uuid
|
|
272
|
+
ORDER BY node_uuid ASC
|
|
273
|
+
MATCH (n:%(vertex_label)s {uuid: node_uuid})
|
|
274
|
+
WITH node_uuid, n, %(id_func_name)s(n) AS element_id
|
|
275
|
+
ORDER BY node_uuid ASC, element_id ASC
|
|
276
|
+
CALL (n) {
|
|
277
|
+
WITH labels(n) AS n_labels
|
|
278
|
+
UNWIND n_labels AS n_label
|
|
279
|
+
WITH n_label
|
|
280
|
+
ORDER BY n_label ASC
|
|
281
|
+
RETURN collect(n_label) AS sorted_labels
|
|
282
|
+
}
|
|
283
|
+
WITH n.uuid AS n_uuid, sorted_labels, collect(element_id) AS vertex_element_ids
|
|
284
|
+
WHERE size(vertex_element_ids) > 1
|
|
285
|
+
WITH n_uuid, vertex_element_ids
|
|
286
|
+
//------------
|
|
287
|
+
// Are there more nodes to process after this query?
|
|
288
|
+
//------------
|
|
289
|
+
WITH collect([n_uuid, vertex_element_ids]) AS duplicate_details
|
|
290
|
+
WITH duplicate_details, size(duplicate_details) > ($offset + $limit) AS more_nodes_to_process
|
|
291
|
+
UNWIND duplicate_details AS duplicate_detail
|
|
292
|
+
WITH duplicate_detail[0] AS n_uuid, duplicate_detail[1] AS vertex_element_ids, more_nodes_to_process
|
|
293
|
+
//------------
|
|
294
|
+
// Limit the nodes to process
|
|
295
|
+
//------------
|
|
296
|
+
SKIP $offset
|
|
297
|
+
LIMIT $limit
|
|
298
|
+
//------------
|
|
299
|
+
// Which node are we going to keep for this UUID/labels combination?
|
|
300
|
+
//------------
|
|
301
|
+
CALL (vertex_element_ids) {
|
|
302
|
+
UNWIND vertex_element_ids AS element_id
|
|
303
|
+
WITH element_id
|
|
304
|
+
ORDER BY element_id ASC
|
|
305
|
+
RETURN element_id AS element_id_to_keep
|
|
306
|
+
LIMIT 1
|
|
307
|
+
}
|
|
308
|
+
""" % {"id_func_name": db.get_id_function_name(), "vertex_label": self.vertex_label}
|
|
309
|
+
self.add_to_query(query_start)
|
|
310
|
+
|
|
311
|
+
if self.outbound_edge_types:
|
|
312
|
+
outbound_edges_query = self._build_directed_edges_subquery(
|
|
313
|
+
db=db,
|
|
314
|
+
direction=RelationshipDirection.OUTBOUND,
|
|
315
|
+
edge_types=self.outbound_edge_types,
|
|
316
|
+
)
|
|
317
|
+
self.add_to_query(outbound_edges_query)
|
|
318
|
+
if self.inbound_edge_types:
|
|
319
|
+
inbound_edges_query = self._build_directed_edges_subquery(
|
|
320
|
+
db=db,
|
|
321
|
+
direction=RelationshipDirection.INBOUND,
|
|
322
|
+
edge_types=self.inbound_edge_types,
|
|
323
|
+
)
|
|
324
|
+
self.add_to_query(inbound_edges_query)
|
|
325
|
+
|
|
326
|
+
query_end = """
|
|
327
|
+
// ------------
|
|
328
|
+
// Mark the nodes to delete
|
|
329
|
+
// ------------
|
|
330
|
+
MATCH (node_to_delete:%(vertex_label)s {uuid: n_uuid})
|
|
331
|
+
WHERE %(id_func_name)s(node_to_delete) IN vertex_element_ids
|
|
332
|
+
AND %(id_func_name)s(node_to_delete) <> element_id_to_keep
|
|
333
|
+
SET node_to_delete.to_delete = TRUE
|
|
334
|
+
RETURN more_nodes_to_process
|
|
335
|
+
""" % {"id_func_name": db.get_id_function_name(), "vertex_label": self.vertex_label}
|
|
336
|
+
self.add_to_query(query_end)
|
|
337
|
+
self.return_labels = ["more_nodes_to_process"]
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
class DeleteDuplicatedEdgesQuery(Query):
|
|
341
|
+
"""
|
|
342
|
+
For all Node vertices, find duplicated or overlapping edges of the same status, type, direction, and branch to update and delete
|
|
343
|
+
- one edge will be kept for each pair of nodes and a given status, type, direction, and branch. it will be
|
|
344
|
+
updated to have the earliest "from" and latest "to" times in this group
|
|
345
|
+
- all the other duplicate/overlapping edges will be deleted
|
|
346
|
+
"""
|
|
347
|
+
|
|
348
|
+
name = "delete_duplicated_edges"
|
|
349
|
+
type = QueryType.WRITE
|
|
350
|
+
insert_return = False
|
|
351
|
+
|
|
352
|
+
async def query_init(self, db: InfrahubDatabase, **kwargs: dict[str, Any]) -> None: # noqa: ARG002
|
|
353
|
+
query = """
|
|
354
|
+
// ------------
|
|
355
|
+
// Find vertex pairs that have duplicate edges
|
|
356
|
+
// ------------
|
|
357
|
+
MATCH (node_with_dup_edges:Node)-[edge]-(peer)
|
|
358
|
+
WITH
|
|
359
|
+
node_with_dup_edges,
|
|
360
|
+
type(edge) AS edge_type,
|
|
361
|
+
edge.status AS edge_status,
|
|
362
|
+
edge.branch AS edge_branch,
|
|
363
|
+
peer,
|
|
364
|
+
%(id_func_name)s(startNode(edge)) = %(id_func_name)s(node_with_dup_edges) AS is_outbound
|
|
365
|
+
WITH node_with_dup_edges, edge_type, edge_status, edge_branch, peer, is_outbound, count(*) AS num_dup_edges
|
|
366
|
+
WHERE num_dup_edges > 1
|
|
367
|
+
WITH DISTINCT node_with_dup_edges, edge_type, edge_branch, peer, is_outbound
|
|
368
|
+
CALL (node_with_dup_edges, edge_type, edge_branch, peer, is_outbound) {
|
|
369
|
+
// ------------
|
|
370
|
+
// Get the earliest active and deleted edges for this branch
|
|
371
|
+
// ------------
|
|
372
|
+
OPTIONAL MATCH (node_with_dup_edges)-[active_edge {branch: edge_branch, status: "active"}]-(peer)
|
|
373
|
+
WHERE type(active_edge) = edge_type
|
|
374
|
+
AND (%(id_func_name)s(startNode(active_edge)) = %(id_func_name)s(node_with_dup_edges) OR is_outbound = FALSE)
|
|
375
|
+
WITH node_with_dup_edges, edge_type, edge_branch, peer, is_outbound, active_edge
|
|
376
|
+
ORDER BY active_edge.from ASC
|
|
377
|
+
WITH node_with_dup_edges, edge_type, edge_branch, peer, is_outbound, head(collect(active_edge.from)) AS active_from
|
|
378
|
+
OPTIONAL MATCH (node_with_dup_edges)-[deleted_edge {branch: edge_branch, status: "deleted"}]-(peer)
|
|
379
|
+
WHERE %(id_func_name)s(startNode(deleted_edge)) = %(id_func_name)s(node_with_dup_edges) OR is_outbound = FALSE
|
|
380
|
+
WITH node_with_dup_edges, edge_type, edge_branch, peer, is_outbound, active_from, deleted_edge
|
|
381
|
+
ORDER BY deleted_edge.from DESC
|
|
382
|
+
WITH node_with_dup_edges, edge_type, edge_branch, peer, is_outbound, active_from, head(collect(deleted_edge.from)) AS deleted_from
|
|
383
|
+
// ------------
|
|
384
|
+
// ensure one active edge with correct from and to times
|
|
385
|
+
// set the others to be deleted
|
|
386
|
+
// ------------
|
|
387
|
+
CALL (node_with_dup_edges, edge_type, edge_branch, peer, is_outbound, active_from, deleted_from) {
|
|
388
|
+
OPTIONAL MATCH (node_with_dup_edges)-[active_e {branch: edge_branch, status: "active"}]-(peer)
|
|
389
|
+
WHERE type(active_e) = edge_type
|
|
390
|
+
AND (%(id_func_name)s(startNode(active_e)) = %(id_func_name)s(node_with_dup_edges) OR is_outbound = FALSE)
|
|
391
|
+
WITH active_from, deleted_from, collect(active_e) AS active_edges
|
|
392
|
+
WITH active_from, deleted_from, head(active_edges) AS edge_to_keep, tail(active_edges) AS edges_to_delete
|
|
393
|
+
SET edge_to_keep.from = active_from
|
|
394
|
+
SET edge_to_keep.to = deleted_from
|
|
395
|
+
WITH edges_to_delete
|
|
396
|
+
UNWIND edges_to_delete AS edge_to_delete
|
|
397
|
+
SET edge_to_delete.to_delete = TRUE
|
|
398
|
+
}
|
|
399
|
+
// ------------
|
|
400
|
+
// ensure one deleted edge with correct from time, if necessary
|
|
401
|
+
// set the others to be deleted
|
|
402
|
+
// ------------
|
|
403
|
+
CALL (node_with_dup_edges, edge_type, edge_branch, peer, is_outbound, deleted_from) {
|
|
404
|
+
MATCH (node_with_dup_edges)-[deleted_e {branch: edge_branch, status: "deleted"}]-(peer)
|
|
405
|
+
WHERE type(deleted_e) = edge_type
|
|
406
|
+
AND (%(id_func_name)s(startNode(deleted_e)) = %(id_func_name)s(node_with_dup_edges) OR is_outbound = FALSE)
|
|
407
|
+
WITH deleted_from, collect(deleted_e) AS deleted_edges
|
|
408
|
+
WITH deleted_from, head(deleted_edges) AS edge_to_keep, tail(deleted_edges) AS edges_to_delete
|
|
409
|
+
SET edge_to_keep.from = deleted_from
|
|
410
|
+
WITH edges_to_delete
|
|
411
|
+
UNWIND edges_to_delete AS edge_to_delete
|
|
412
|
+
SET edge_to_delete.to_delete = TRUE
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
""" % {"id_func_name": db.get_id_function_name()}
|
|
416
|
+
self.add_to_query(query)
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
class DeleteIllegalRelationships(Query):
|
|
420
|
+
"""
|
|
421
|
+
Find all Relationship vertices with the same UUID (in a valid database, there are none)
|
|
422
|
+
If any of these Relationships have an IS_RELATED edge to a deleted Node, then delete them
|
|
423
|
+
this includes if an IS_RELATED edge was added on a branch after the Node was deleted on main or -global-
|
|
424
|
+
If any of these Relationships are now only connected to a single Node, then delete them
|
|
425
|
+
"""
|
|
426
|
+
|
|
427
|
+
name = "delete_illegal_relationships"
|
|
428
|
+
type = QueryType.WRITE
|
|
429
|
+
insert_return = False
|
|
430
|
+
|
|
431
|
+
async def query_init(self, db: InfrahubDatabase, **kwargs: dict[str, Any]) -> None: # noqa: ARG002
|
|
432
|
+
query = """
|
|
433
|
+
// ------------
|
|
434
|
+
// Get the default and global branch names
|
|
435
|
+
// ------------
|
|
436
|
+
MATCH (default_b:Branch)
|
|
437
|
+
WHERE default_b.is_default = TRUE
|
|
438
|
+
WITH default_b.name AS default_branch
|
|
439
|
+
LIMIT 1
|
|
440
|
+
MATCH (global_b:Branch)
|
|
441
|
+
WHERE global_b.is_global = TRUE
|
|
442
|
+
WITH default_branch, global_b.name AS global_branch
|
|
443
|
+
LIMIT 1
|
|
444
|
+
// ------------
|
|
445
|
+
// Find relationships with duplicate UUIDs
|
|
446
|
+
// ------------
|
|
447
|
+
MATCH (r: Relationship)
|
|
448
|
+
WITH default_branch, global_branch, r.uuid AS r_uuid, count(*) AS num_dups
|
|
449
|
+
WHERE num_dups > 1
|
|
450
|
+
WITH DISTINCT default_branch, global_branch, r_uuid
|
|
451
|
+
// ------------
|
|
452
|
+
// Find any IS_RELATED edges on the duplicate Relationships that link to deleted Nodes,
|
|
453
|
+
// accounting for if the edge was added on a branch after the Node was deleted on main
|
|
454
|
+
// ------------
|
|
455
|
+
CALL (default_branch, global_branch, r_uuid) {
|
|
456
|
+
MATCH (:Relationship {uuid: r_uuid})-[is_related:IS_RELATED]-(n:Node)
|
|
457
|
+
CALL (is_related) {
|
|
458
|
+
MATCH (b:Branch {name: is_related.branch})
|
|
459
|
+
RETURN b.branched_from AS edge_branched_from_time
|
|
460
|
+
}
|
|
461
|
+
// ------------
|
|
462
|
+
// If this Node was deleted
|
|
463
|
+
// ------------
|
|
464
|
+
MATCH (n)-[is_part_of:IS_PART_OF]->(:Root)
|
|
465
|
+
WHERE (is_part_of.status = "deleted" OR is_part_of.to IS NOT NULL)
|
|
466
|
+
// ------------
|
|
467
|
+
// before the active IS_RELATED edge's from time, then delete the edge
|
|
468
|
+
// ------------
|
|
469
|
+
WITH is_related, edge_branched_from_time, is_part_of, CASE
|
|
470
|
+
WHEN is_part_of.status = "deleted" THEN is_part_of.from
|
|
471
|
+
ELSE is_part_of.to
|
|
472
|
+
END AS node_deleted_time
|
|
473
|
+
WHERE (is_part_of.branch IN [is_related.branch, global_branch] AND is_related.from > node_deleted_time)
|
|
474
|
+
OR (is_part_of.branch = default_branch AND node_deleted_time < edge_branched_from_time)
|
|
475
|
+
DELETE is_related
|
|
476
|
+
}
|
|
477
|
+
MATCH (rel:Relationship {uuid: r_uuid})
|
|
478
|
+
CALL (rel) {
|
|
479
|
+
OPTIONAL MATCH (rel)-[:IS_RELATED]-(n:Node)
|
|
480
|
+
WITH DISTINCT n
|
|
481
|
+
RETURN count(*) AS num_peers
|
|
482
|
+
}
|
|
483
|
+
WITH rel
|
|
484
|
+
WHERE num_peers < 2
|
|
485
|
+
DETACH DELETE rel
|
|
486
|
+
"""
|
|
487
|
+
self.add_to_query(query)
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
class DeleteDuplicateRelationships(Query):
|
|
491
|
+
"""
|
|
492
|
+
There can also be leftover duplicate active Relationships that do not have the same UUID.
|
|
493
|
+
They are linked to the same Nodes, have the same Relationship.name, and are on the same branch.
|
|
494
|
+
In this case, we want to DETACH DELETE the later Relationship. We won't lose any information b/c the exact
|
|
495
|
+
same Relationship (maybe with an earlier from time) still exists.
|
|
496
|
+
"""
|
|
497
|
+
|
|
498
|
+
name = "delete_duplicate_relationships"
|
|
499
|
+
type = QueryType.WRITE
|
|
500
|
+
insert_return = False
|
|
501
|
+
|
|
502
|
+
async def query_init(self, db: InfrahubDatabase, **kwargs: dict[str, Any]) -> None: # noqa: ARG002
|
|
503
|
+
query = """
|
|
504
|
+
MATCH (n:Node)
|
|
505
|
+
WITH n.uuid AS node_uuid, count(*) as num_nodes_with_uuid
|
|
506
|
+
WHERE num_nodes_with_uuid > 1
|
|
507
|
+
WITH DISTINCT node_uuid
|
|
508
|
+
ORDER BY node_uuid ASC
|
|
509
|
+
MATCH (a:Node {uuid: node_uuid})-[e1:IS_RELATED {status: "active"}]-(rel:Relationship)-[e2:IS_RELATED {branch: e1.branch, status: "active"}]-(b:Node)
|
|
510
|
+
WHERE a.uuid <> b.uuid
|
|
511
|
+
AND e1.to IS NULL
|
|
512
|
+
AND e2.to IS NULL
|
|
513
|
+
WITH a, rel.name AS rel_name, rel, b, e1.branch AS branch, CASE
|
|
514
|
+
WHEN startNode(e1) = a AND startNode(e2) = rel THEN "out"
|
|
515
|
+
WHEN startNode(e1) = rel AND startNode(e2) = b THEN "in"
|
|
516
|
+
ELSE "bidir"
|
|
517
|
+
END AS direction,
|
|
518
|
+
CASE
|
|
519
|
+
WHEN e1.from < e2.from THEN e1.from ELSE e2.from
|
|
520
|
+
END AS earliest_from
|
|
521
|
+
ORDER BY %(id_func_name)s(a), rel_name, %(id_func_name)s(b), direction, branch, earliest_from ASC
|
|
522
|
+
WITH a, rel_name, b, direction, branch, collect(rel) AS relationships_list
|
|
523
|
+
WHERE size(relationships_list) > 1
|
|
524
|
+
WITH a, rel_name, b, direction, branch, tail(relationships_list) AS rels_to_delete
|
|
525
|
+
UNWIND rels_to_delete AS rel_to_delete
|
|
526
|
+
DETACH DELETE rel_to_delete
|
|
527
|
+
""" % {"id_func_name": db.get_id_function_name()}
|
|
528
|
+
self.add_to_query(query)
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
class PerformHardDeletes(Query):
|
|
532
|
+
name = "do_hard_deletes"
|
|
533
|
+
type = QueryType.WRITE
|
|
534
|
+
insert_return = False
|
|
535
|
+
|
|
536
|
+
async def query_init(self, db: InfrahubDatabase, **kwargs: dict[str, Any]) -> None: # noqa: ARG002
|
|
537
|
+
query = """
|
|
538
|
+
CALL {
|
|
539
|
+
MATCH (n)
|
|
540
|
+
WHERE n.to_delete = TRUE
|
|
541
|
+
DETACH DELETE n
|
|
542
|
+
}
|
|
543
|
+
CALL {
|
|
544
|
+
MATCH ()-[e]-()
|
|
545
|
+
WHERE e.to_delete = TRUE
|
|
546
|
+
DELETE e
|
|
547
|
+
}
|
|
548
|
+
"""
|
|
549
|
+
self.add_to_query(query)
|
|
550
|
+
|
|
551
|
+
|
|
552
|
+
class Migration029(ArbitraryMigration):
|
|
553
|
+
"""
|
|
554
|
+
Clean up a variety of bad data created during bugged merges for node kind/inheritance updates
|
|
555
|
+
|
|
556
|
+
1. Identify improperly duplicated nodes (ie nodes with the same UUID and the same database labels)
|
|
557
|
+
a. Consolidate edges onto a single duplicated node, making sure that the edges remain active if ANY active path exists
|
|
558
|
+
b. Delete the duplicated edges
|
|
559
|
+
c. Delete the duplicated nodes
|
|
560
|
+
2. Delete duplicated Relationships linked to the de-duplicated node
|
|
561
|
+
3. Delete duplicated edges across the database
|
|
562
|
+
|
|
563
|
+
Some of these changes must be batched because there can be a lot of them and the queries can be rather complex
|
|
564
|
+
Some of these queries also require marking nodes and edges as to be deleted (using the `to_delete` property) and then
|
|
565
|
+
deleting them in a separate query
|
|
566
|
+
"""
|
|
567
|
+
|
|
568
|
+
name: str = "029_duplicates_cleanup"
|
|
569
|
+
minimum_version: int = 28
|
|
570
|
+
limit: int = 100
|
|
571
|
+
|
|
572
|
+
async def validate_migration(self, db: InfrahubDatabase) -> MigrationResult: # noqa: ARG002
|
|
573
|
+
result = MigrationResult()
|
|
574
|
+
|
|
575
|
+
return result
|
|
576
|
+
|
|
577
|
+
async def execute(self, db: InfrahubDatabase) -> MigrationResult:
|
|
578
|
+
migration_result = MigrationResult()
|
|
579
|
+
limit = self.limit
|
|
580
|
+
offset = 0
|
|
581
|
+
more_nodes_to_process = True
|
|
582
|
+
try:
|
|
583
|
+
while more_nodes_to_process:
|
|
584
|
+
log.info(f"Running node duplicates cleanup query {limit=},{offset=}")
|
|
585
|
+
node_cleanup_query = await CleanUpDuplicatedUuidVertices.init(
|
|
586
|
+
db=db,
|
|
587
|
+
vertex_label="Node",
|
|
588
|
+
limit=limit,
|
|
589
|
+
offset=offset,
|
|
590
|
+
outbound_edge_types=[
|
|
591
|
+
DatabaseEdgeType.IS_PART_OF,
|
|
592
|
+
DatabaseEdgeType.HAS_ATTRIBUTE,
|
|
593
|
+
DatabaseEdgeType.IS_RELATED,
|
|
594
|
+
DatabaseEdgeType.IS_RESERVED,
|
|
595
|
+
],
|
|
596
|
+
inbound_edge_types=[
|
|
597
|
+
DatabaseEdgeType.IS_RELATED,
|
|
598
|
+
DatabaseEdgeType.HAS_OWNER,
|
|
599
|
+
DatabaseEdgeType.HAS_SOURCE,
|
|
600
|
+
],
|
|
601
|
+
)
|
|
602
|
+
await node_cleanup_query.execute(db=db)
|
|
603
|
+
has_results = False
|
|
604
|
+
for result in node_cleanup_query.get_results():
|
|
605
|
+
has_results = True
|
|
606
|
+
more_nodes_to_process = result.get_as_type("more_nodes_to_process", bool)
|
|
607
|
+
offset += limit
|
|
608
|
+
if not has_results or not more_nodes_to_process:
|
|
609
|
+
break
|
|
610
|
+
|
|
611
|
+
hard_delete_query = await PerformHardDeletes.init(db=db)
|
|
612
|
+
await hard_delete_query.execute(db=db)
|
|
613
|
+
|
|
614
|
+
duplicate_edge_query = await DeleteDuplicatedEdgesQuery.init(db=db)
|
|
615
|
+
await duplicate_edge_query.execute(db=db)
|
|
616
|
+
|
|
617
|
+
hard_delete_query = await PerformHardDeletes.init(db=db)
|
|
618
|
+
await hard_delete_query.execute(db=db)
|
|
619
|
+
|
|
620
|
+
illegal_relationships_cleanup_query = await DeleteIllegalRelationships.init(db=db)
|
|
621
|
+
await illegal_relationships_cleanup_query.execute(db=db)
|
|
622
|
+
|
|
623
|
+
offset = 0
|
|
624
|
+
more_nodes_to_process = True
|
|
625
|
+
while more_nodes_to_process:
|
|
626
|
+
log.info(f"Running relationship duplicates cleanup query {limit=},{offset=}")
|
|
627
|
+
relationship_cleanup_query = await CleanUpDuplicatedUuidVertices.init(
|
|
628
|
+
db=db,
|
|
629
|
+
vertex_label="Relationship",
|
|
630
|
+
limit=limit,
|
|
631
|
+
offset=offset,
|
|
632
|
+
outbound_edge_types=[
|
|
633
|
+
DatabaseEdgeType.IS_RELATED,
|
|
634
|
+
DatabaseEdgeType.IS_VISIBLE,
|
|
635
|
+
DatabaseEdgeType.IS_PROTECTED,
|
|
636
|
+
DatabaseEdgeType.HAS_OWNER,
|
|
637
|
+
DatabaseEdgeType.HAS_SOURCE,
|
|
638
|
+
],
|
|
639
|
+
inbound_edge_types=[
|
|
640
|
+
DatabaseEdgeType.IS_RELATED,
|
|
641
|
+
],
|
|
642
|
+
)
|
|
643
|
+
await relationship_cleanup_query.execute(db=db)
|
|
644
|
+
has_results = False
|
|
645
|
+
for result in relationship_cleanup_query.get_results():
|
|
646
|
+
has_results = True
|
|
647
|
+
more_nodes_to_process = result.get_as_type("more_nodes_to_process", bool)
|
|
648
|
+
offset += limit
|
|
649
|
+
if not has_results or not more_nodes_to_process:
|
|
650
|
+
break
|
|
651
|
+
|
|
652
|
+
hard_delete_query = await PerformHardDeletes.init(db=db)
|
|
653
|
+
await hard_delete_query.execute(db=db)
|
|
654
|
+
|
|
655
|
+
duplicate_relationships_cleanup_query = await DeleteDuplicateRelationships.init(db=db)
|
|
656
|
+
await duplicate_relationships_cleanup_query.execute(db=db)
|
|
657
|
+
|
|
658
|
+
except Exception as exc:
|
|
659
|
+
migration_result.errors.append(str(exc))
|
|
660
|
+
return migration_result
|
|
661
|
+
|
|
662
|
+
return migration_result
|