infrahub-server 1.2.10__py3-none-any.whl → 1.2.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. infrahub/config.py +9 -0
  2. infrahub/core/constants/database.py +1 -0
  3. infrahub/core/constants/infrahubkind.py +1 -0
  4. infrahub/core/constraint/node/runner.py +1 -1
  5. infrahub/core/diff/query/save.py +75 -45
  6. infrahub/core/diff/query_parser.py +5 -1
  7. infrahub/core/diff/tasks.py +3 -3
  8. infrahub/core/graph/__init__.py +1 -1
  9. infrahub/core/migrations/graph/__init__.py +6 -0
  10. infrahub/core/migrations/graph/m029_duplicates_cleanup.py +680 -0
  11. infrahub/core/migrations/graph/m030_illegal_edges.py +83 -0
  12. infrahub/core/migrations/query/attribute_add.py +13 -9
  13. infrahub/core/node/resource_manager/ip_address_pool.py +6 -2
  14. infrahub/core/node/resource_manager/ip_prefix_pool.py +6 -2
  15. infrahub/core/protocols.py +4 -0
  16. infrahub/core/query/diff.py +7 -0
  17. infrahub/core/schema/definitions/core/__init__.py +8 -1
  18. infrahub/core/schema/definitions/core/resource_pool.py +20 -0
  19. infrahub/core/schema/schema_branch.py +5 -3
  20. infrahub/core/validators/tasks.py +1 -1
  21. infrahub/database/__init__.py +5 -4
  22. infrahub/database/validation.py +101 -0
  23. infrahub/graphql/app.py +1 -1
  24. infrahub/graphql/loaders/node.py +1 -1
  25. infrahub/graphql/loaders/peers.py +1 -1
  26. infrahub/graphql/mutations/main.py +1 -1
  27. infrahub/graphql/mutations/proposed_change.py +1 -1
  28. infrahub/graphql/queries/relationship.py +1 -1
  29. infrahub/graphql/queries/task.py +10 -0
  30. infrahub/graphql/resolvers/many_relationship.py +4 -4
  31. infrahub/graphql/resolvers/resolver.py +4 -4
  32. infrahub/graphql/resolvers/single_relationship.py +2 -2
  33. infrahub/graphql/subscription/graphql_query.py +2 -2
  34. infrahub/graphql/types/branch.py +1 -1
  35. infrahub/graphql/types/task_log.py +3 -2
  36. infrahub/message_bus/operations/refresh/registry.py +4 -4
  37. infrahub/message_bus/operations/requests/proposed_change.py +4 -4
  38. infrahub/patch/queries/delete_duplicated_edges.py +40 -29
  39. infrahub/task_manager/task.py +44 -4
  40. infrahub/telemetry/database.py +1 -1
  41. infrahub/telemetry/tasks.py +1 -1
  42. infrahub/webhook/tasks.py +2 -1
  43. {infrahub_server-1.2.10.dist-info → infrahub_server-1.2.12.dist-info}/METADATA +3 -3
  44. {infrahub_server-1.2.10.dist-info → infrahub_server-1.2.12.dist-info}/RECORD +52 -49
  45. {infrahub_server-1.2.10.dist-info → infrahub_server-1.2.12.dist-info}/WHEEL +1 -1
  46. infrahub_testcontainers/container.py +239 -64
  47. infrahub_testcontainers/docker-compose-cluster.test.yml +321 -0
  48. infrahub_testcontainers/docker-compose.test.yml +1 -0
  49. infrahub_testcontainers/helpers.py +15 -1
  50. infrahub_testcontainers/plugin.py +9 -0
  51. infrahub/patch/queries/consolidate_duplicated_nodes.py +0 -109
  52. {infrahub_server-1.2.10.dist-info → infrahub_server-1.2.12.dist-info}/LICENSE.txt +0 -0
  53. {infrahub_server-1.2.10.dist-info → infrahub_server-1.2.12.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,680 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING, Any, Literal
4
+
5
+ from infrahub.core.constants import RelationshipDirection
6
+ from infrahub.core.constants.database import DatabaseEdgeType
7
+ from infrahub.core.migrations.shared import MigrationResult
8
+ from infrahub.core.query import Query, QueryType
9
+ from infrahub.log import get_logger
10
+
11
+ from ..shared import ArbitraryMigration
12
+
13
+ if TYPE_CHECKING:
14
+ from infrahub.database import InfrahubDatabase
15
+
16
+ log = get_logger()
17
+
18
+
19
+ class CleanUpDuplicatedUuidVertices(Query):
20
+ """
21
+ Find vertexes that include the given label and have the same UUID and same set of labels
22
+ For each of these duplicate vertex groups, keep one and mark all the others to be deleted by the PerformHardDeletesQuery
23
+ - Group all of the edges touching a vertex in this vertex group by branch, edge_type, peer_element_id, and direction
24
+ - For each edge group, we will link one edge to the vertex we are keeping for this vertex group and mark all of the others to be deleted
25
+ - we will set/create one active edge from the vertex to keep to the peer of this group, setting its from time to the earliest active
26
+ from time in this group
27
+ - if ALL edges in this edge group are deleted, then we will set the to time of the active edge to the latest deleted time and
28
+ set/create a deleted edge with a from time of the latest deleted time
29
+ """
30
+
31
+ name = "clean_up_duplicated_uuid_vertices"
32
+ type = QueryType.WRITE
33
+ insert_return = False
34
+ insert_limit = False
35
+
36
+ def __init__(
37
+ self,
38
+ vertex_label: str,
39
+ outbound_edge_types: list[DatabaseEdgeType],
40
+ inbound_edge_types: list[DatabaseEdgeType],
41
+ **kwargs: Any,
42
+ ) -> None:
43
+ super().__init__(**kwargs)
44
+ self.vertex_label = vertex_label
45
+ self.outbound_edge_types = outbound_edge_types
46
+ self.inbound_edge_types = inbound_edge_types
47
+
48
+ def _get_or_create_active_edge_subquery(
49
+ self,
50
+ edge_type: DatabaseEdgeType,
51
+ direction: Literal[RelationshipDirection.INBOUND, RelationshipDirection.OUTBOUND],
52
+ ) -> str:
53
+ if direction is RelationshipDirection.INBOUND:
54
+ l_arrow = "<"
55
+ r_arrow = ""
56
+ else:
57
+ l_arrow = ""
58
+ r_arrow = ">"
59
+
60
+ query = """
61
+ CALL {
62
+ // ------------
63
+ // get or create the active %(edge_type)s edge
64
+ // ------------
65
+ WITH vertex_to_keep, edge_type, branch, peer, earliest_active_time, latest_deleted_time, all_edges_deleted, edge_to_copy
66
+ WITH vertex_to_keep, edge_type, branch, peer, earliest_active_time, latest_deleted_time, all_edges_deleted, edge_to_copy
67
+ WHERE edge_type = "%(edge_type)s"
68
+ MERGE (vertex_to_keep)%(l_arrow)s-[active_edge:%(edge_type)s {branch: branch, status: "active"}]-%(r_arrow)s(peer)
69
+ WITH active_edge, edge_to_copy, earliest_active_time, latest_deleted_time, all_edges_deleted
70
+ LIMIT 1
71
+ SET active_edge.to_delete = NULL
72
+ SET active_edge.from = earliest_active_time
73
+ SET active_edge.to = CASE
74
+ WHEN all_edges_deleted = TRUE THEN latest_deleted_time
75
+ ELSE NULL
76
+ END
77
+ SET active_edge.branch_level = edge_to_copy.branch_level
78
+ SET active_edge.hierarchy = edge_to_copy.hierarchy
79
+ }
80
+ """ % {
81
+ "edge_type": edge_type.value,
82
+ "l_arrow": l_arrow,
83
+ "r_arrow": r_arrow,
84
+ }
85
+ return query
86
+
87
+ def _add_deleted_edge_subquery(
88
+ self,
89
+ edge_type: DatabaseEdgeType,
90
+ direction: Literal[RelationshipDirection.INBOUND, RelationshipDirection.OUTBOUND],
91
+ ) -> str:
92
+ if direction is RelationshipDirection.INBOUND:
93
+ l_arrow = "<"
94
+ r_arrow = ""
95
+ else:
96
+ l_arrow = ""
97
+ r_arrow = ">"
98
+ subquery = """
99
+ CALL {
100
+ // ------------
101
+ // create the deleted %(edge_type)s edge
102
+ // ------------
103
+ WITH vertex_to_keep, edge_type, branch, peer, latest_deleted_time, edge_to_copy
104
+ WITH vertex_to_keep, edge_type, branch, peer, latest_deleted_time, edge_to_copy
105
+ WHERE edge_type = "%(edge_type)s"
106
+ MERGE (vertex_to_keep)%(l_arrow)s-[deleted_edge:%(edge_type)s {branch: branch, status: "deleted"}]-%(r_arrow)s(peer)
107
+ WITH deleted_edge, latest_deleted_time, edge_to_copy
108
+ LIMIT 1
109
+ SET deleted_edge.to_delete = NULL
110
+ SET deleted_edge.from = latest_deleted_time
111
+ SET deleted_edge.to = NULL
112
+ SET deleted_edge.branch_level = edge_to_copy.branch_level
113
+ SET deleted_edge.hierarchy = edge_to_copy.hierarchy
114
+ }
115
+ """ % {"edge_type": edge_type.value, "l_arrow": l_arrow, "r_arrow": r_arrow}
116
+ return subquery
117
+
118
+ def _build_directed_edges_subquery(
119
+ self,
120
+ db: InfrahubDatabase,
121
+ direction: Literal[RelationshipDirection.INBOUND, RelationshipDirection.OUTBOUND],
122
+ edge_types: list[DatabaseEdgeType],
123
+ ) -> str:
124
+ if direction is RelationshipDirection.INBOUND:
125
+ l_arrow = "<"
126
+ r_arrow = ""
127
+ else:
128
+ l_arrow = ""
129
+ r_arrow = ">"
130
+ active_subqueries: list[str] = []
131
+ delete_subqueries: list[str] = []
132
+ for edge_type in edge_types:
133
+ active_subqueries.append(
134
+ self._get_or_create_active_edge_subquery(
135
+ edge_type=edge_type,
136
+ direction=direction,
137
+ )
138
+ )
139
+ delete_subqueries.append(self._add_deleted_edge_subquery(edge_type=edge_type, direction=direction))
140
+ active_edge_subqueries = "\n".join(active_subqueries)
141
+ deleted_edge_subqueries = "\n".join(delete_subqueries)
142
+
143
+ edges_query = """
144
+ //------------
145
+ // Get every %(direction)s branch, edge_type, peer element_id combinations touching vertices with this uuid/labels combination
146
+ //------------
147
+ CALL {
148
+ WITH n_uuid, vertex_element_ids, element_id_to_keep
149
+ CALL {
150
+ WITH n_uuid, vertex_element_ids
151
+ MATCH (n:%(vertex_label)s {uuid: n_uuid})
152
+ WHERE %(id_func_name)s(n) IN vertex_element_ids
153
+ MATCH (n)%(l_arrow)s-[e]-%(r_arrow)s(peer)
154
+ WITH DISTINCT e.branch AS branch, type(e) AS edge_type, %(id_func_name)s(peer) AS peer_element_id
155
+ RETURN branch, edge_type, peer_element_id
156
+ }
157
+ //------------
158
+ // Are all of the edges with these with this branch/edge_type/peer_element_id combination deleted?
159
+ //------------
160
+ CALL {
161
+ WITH n_uuid, vertex_element_ids, branch, edge_type, peer_element_id
162
+ // nodes with this edge_type/branch/peer combo
163
+ MATCH (node_with_edge:%(vertex_label)s {uuid: n_uuid})%(l_arrow)s-[e {branch: branch}]-%(r_arrow)s(peer)
164
+ WHERE %(id_func_name)s(node_with_edge) IN vertex_element_ids
165
+ AND type(e) = edge_type
166
+ AND %(id_func_name)s(peer) = peer_element_id
167
+ // count of nodes with this edge_type/branch/peer combo
168
+ WITH DISTINCT n_uuid, branch, edge_type, peer_element_id, %(id_func_name)s(node_with_edge) AS node_with_edge_element_id
169
+ WITH n_uuid, branch, edge_type, peer_element_id, collect(node_with_edge_element_id) AS node_with_edge_element_ids
170
+ // nodes with this edge_type/branch/peer combo where the edge is DELETED
171
+ OPTIONAL MATCH (node_with_deleted_edge:%(vertex_label)s {uuid: n_uuid})%(l_arrow)s-[e {branch: branch}]-%(r_arrow)s(peer)
172
+ WHERE %(id_func_name)s(node_with_deleted_edge) IN node_with_edge_element_ids
173
+ AND type(e) = edge_type
174
+ AND %(id_func_name)s(peer) = peer_element_id
175
+ AND (e.status = "deleted" OR e.to IS NOT NULL)
176
+ // count of nodes with this DELETED edge_type/branch/peer combo
177
+ WITH DISTINCT node_with_edge_element_ids, %(id_func_name)s(node_with_deleted_edge) AS node_with_deleted_edge_element_id
178
+ WITH node_with_edge_element_ids, collect(node_with_deleted_edge_element_id) AS node_with_deleted_edge_element_ids
179
+ RETURN size(node_with_edge_element_ids) = size(node_with_deleted_edge_element_ids) AS all_edges_deleted
180
+ }
181
+ //------------
182
+ // What is the earliest active time for this branch/edge_type/peer_element_id/UUID/labels combination?
183
+ //------------
184
+ CALL {
185
+ WITH n_uuid, vertex_element_ids, branch, edge_type, peer_element_id
186
+ MATCH (n {uuid: n_uuid})%(l_arrow)s-[e {branch: branch, status: "active"}]-%(r_arrow)s(peer)
187
+ WHERE %(id_func_name)s(n) IN vertex_element_ids
188
+ AND type(e) = edge_type
189
+ AND %(id_func_name)s(peer) = peer_element_id
190
+ RETURN e.from AS earliest_active_time
191
+ ORDER BY e.from ASC
192
+ LIMIT 1
193
+ }
194
+ //------------
195
+ // What is the latest deleted time for this branch/edge_type/peer_element_id/UUID/labels combination?
196
+ //------------
197
+ CALL {
198
+ WITH n_uuid, vertex_element_ids, branch, edge_type, peer_element_id, all_edges_deleted
199
+ OPTIONAL MATCH (n {uuid: n_uuid})%(l_arrow)s-[e {branch: branch}]-%(r_arrow)s(peer)
200
+ WHERE all_edges_deleted = TRUE
201
+ AND %(id_func_name)s(n) IN vertex_element_ids
202
+ AND type(e) = edge_type
203
+ AND %(id_func_name)s(peer) = peer_element_id
204
+ RETURN CASE
205
+ WHEN e.status = "active" THEN e.to
206
+ ELSE e.from
207
+ END AS latest_deleted_time
208
+ ORDER BY latest_deleted_time DESC
209
+ LIMIT 1
210
+ }
211
+ // ------------
212
+ // Add the %(direction)s edges to the node we are keeping, if necessary
213
+ // ------------
214
+ CALL {
215
+ WITH n_uuid, vertex_element_ids, element_id_to_keep, branch, edge_type, peer_element_id, all_edges_deleted,
216
+ earliest_active_time, latest_deleted_time
217
+ // get the node we are keeping
218
+ MATCH (vertex_to_keep {uuid: n_uuid})
219
+ WHERE %(id_func_name)s(vertex_to_keep) = element_id_to_keep
220
+ // get the peer we are linking to
221
+ MATCH (n {uuid: n_uuid})%(l_arrow)s-[]-%(r_arrow)s(peer)
222
+ WHERE %(id_func_name)s(n) IN vertex_element_ids
223
+ AND %(id_func_name)s(peer) = peer_element_id
224
+ WITH n_uuid, vertex_element_ids, element_id_to_keep, vertex_to_keep, branch, edge_type, peer, all_edges_deleted,
225
+ earliest_active_time, latest_deleted_time
226
+ LIMIT 1
227
+ // ------------
228
+ // mark all other edges for this branch/edge_type/peer combination as to be deleted
229
+ // we will unmark any to keep later
230
+ // ------------
231
+ CALL {
232
+ WITH n_uuid, branch, peer, vertex_element_ids, edge_type
233
+ OPTIONAL MATCH (n {uuid: n_uuid})%(l_arrow)s-[edge_to_delete {branch: branch}]-%(r_arrow)s(peer)
234
+ WHERE %(id_func_name)s(n) IN vertex_element_ids
235
+ AND type(edge_to_delete) = edge_type
236
+ SET edge_to_delete.to_delete = TRUE
237
+ }
238
+ CALL {
239
+ // ------------
240
+ // get the edge to copy
241
+ // ------------
242
+ WITH n_uuid, branch, vertex_element_ids, edge_type, peer
243
+ MATCH (n {uuid: n_uuid})%(l_arrow)s-[e {branch: branch, status: "active"}]-%(r_arrow)s(peer)
244
+ WHERE %(id_func_name)s(n) IN vertex_element_ids
245
+ AND type(e) = edge_type
246
+ RETURN e AS edge_to_copy
247
+ ORDER BY e.from DESC
248
+ LIMIT 1
249
+ }
250
+ %(active_edge_subqueries)s
251
+ // ------------
252
+ // conditionally create the deleted edges
253
+ // ------------
254
+ WITH n_uuid, vertex_element_ids, vertex_to_keep, branch, edge_type, peer, all_edges_deleted,
255
+ latest_deleted_time, edge_to_copy
256
+ WHERE all_edges_deleted = TRUE
257
+ %(deleted_edge_subqueries)s
258
+ }
259
+ }
260
+ """ % {
261
+ "direction": direction.value,
262
+ "l_arrow": l_arrow,
263
+ "r_arrow": r_arrow,
264
+ "id_func_name": db.get_id_function_name(),
265
+ "active_edge_subqueries": active_edge_subqueries,
266
+ "deleted_edge_subqueries": deleted_edge_subqueries,
267
+ "vertex_label": self.vertex_label,
268
+ }
269
+ return edges_query
270
+
271
+ async def query_init(self, db: InfrahubDatabase, **kwargs: dict[str, Any]) -> None: # noqa: ARG002
272
+ self.params["limit"] = self.limit or 1000
273
+ self.params["offset"] = self.offset or 0
274
+ query_start = """
275
+ //------------
276
+ // Find vertices with the same labels and UUID
277
+ //------------
278
+ MATCH (n:%(vertex_label)s)
279
+ WITH n.uuid AS node_uuid, count(*) as num_nodes_with_uuid
280
+ WHERE num_nodes_with_uuid > 1
281
+ WITH DISTINCT node_uuid
282
+ ORDER BY node_uuid ASC
283
+ MATCH (n:%(vertex_label)s {uuid: node_uuid})
284
+ WITH node_uuid, n, %(id_func_name)s(n) AS element_id
285
+ ORDER BY node_uuid ASC, element_id ASC
286
+ CALL {
287
+ WITH n
288
+ WITH labels(n) AS n_labels
289
+ UNWIND n_labels AS n_label
290
+ WITH n_label
291
+ ORDER BY n_label ASC
292
+ RETURN collect(n_label) AS sorted_labels
293
+ }
294
+ WITH n.uuid AS n_uuid, sorted_labels, collect(element_id) AS vertex_element_ids
295
+ WHERE size(vertex_element_ids) > 1
296
+ WITH n_uuid, vertex_element_ids
297
+ //------------
298
+ // Are there more nodes to process after this query?
299
+ //------------
300
+ WITH collect([n_uuid, vertex_element_ids]) AS duplicate_details
301
+ WITH duplicate_details, size(duplicate_details) > ($offset + $limit) AS more_nodes_to_process
302
+ UNWIND duplicate_details AS duplicate_detail
303
+ WITH duplicate_detail[0] AS n_uuid, duplicate_detail[1] AS vertex_element_ids, more_nodes_to_process
304
+ //------------
305
+ // Limit the nodes to process
306
+ //------------
307
+ SKIP $offset
308
+ LIMIT $limit
309
+ //------------
310
+ // Which node are we going to keep for this UUID/labels combination?
311
+ //------------
312
+ CALL {
313
+ WITH vertex_element_ids
314
+ UNWIND vertex_element_ids AS element_id
315
+ WITH element_id
316
+ ORDER BY element_id ASC
317
+ RETURN element_id AS element_id_to_keep
318
+ LIMIT 1
319
+ }
320
+ """ % {"id_func_name": db.get_id_function_name(), "vertex_label": self.vertex_label}
321
+ self.add_to_query(query_start)
322
+
323
+ if self.outbound_edge_types:
324
+ outbound_edges_query = self._build_directed_edges_subquery(
325
+ db=db,
326
+ direction=RelationshipDirection.OUTBOUND,
327
+ edge_types=self.outbound_edge_types,
328
+ )
329
+ self.add_to_query(outbound_edges_query)
330
+ if self.inbound_edge_types:
331
+ inbound_edges_query = self._build_directed_edges_subquery(
332
+ db=db,
333
+ direction=RelationshipDirection.INBOUND,
334
+ edge_types=self.inbound_edge_types,
335
+ )
336
+ self.add_to_query(inbound_edges_query)
337
+
338
+ query_end = """
339
+ // ------------
340
+ // Mark the nodes to delete
341
+ // ------------
342
+ MATCH (node_to_delete:%(vertex_label)s {uuid: n_uuid})
343
+ WHERE %(id_func_name)s(node_to_delete) IN vertex_element_ids
344
+ AND %(id_func_name)s(node_to_delete) <> element_id_to_keep
345
+ SET node_to_delete.to_delete = TRUE
346
+ RETURN more_nodes_to_process
347
+ """ % {"id_func_name": db.get_id_function_name(), "vertex_label": self.vertex_label}
348
+ self.add_to_query(query_end)
349
+ self.return_labels = ["more_nodes_to_process"]
350
+
351
+
352
+ class DeleteDuplicatedEdgesQuery(Query):
353
+ """
354
+ For all Node vertices, find duplicated or overlapping edges of the same status, type, direction, and branch to update and delete
355
+ - one edge will be kept for each pair of nodes and a given status, type, direction, and branch. it will be
356
+ updated to have the earliest "from" and latest "to" times in this group
357
+ - all the other duplicate/overlapping edges will be deleted
358
+ """
359
+
360
+ name = "delete_duplicated_edges"
361
+ type = QueryType.WRITE
362
+ insert_return = False
363
+
364
+ async def query_init(self, db: InfrahubDatabase, **kwargs: dict[str, Any]) -> None: # noqa: ARG002
365
+ query = """
366
+ // ------------
367
+ // Find vertex pairs that have duplicate edges
368
+ // ------------
369
+ MATCH (node_with_dup_edges:Node)-[edge]-(peer)
370
+ WITH
371
+ node_with_dup_edges,
372
+ type(edge) AS edge_type,
373
+ edge.status AS edge_status,
374
+ edge.branch AS edge_branch,
375
+ peer,
376
+ %(id_func_name)s(startNode(edge)) = %(id_func_name)s(node_with_dup_edges) AS is_outbound
377
+ WITH node_with_dup_edges, edge_type, edge_status, edge_branch, peer, is_outbound, count(*) AS num_dup_edges
378
+ WHERE num_dup_edges > 1
379
+ WITH DISTINCT node_with_dup_edges, edge_type, edge_branch, peer, is_outbound
380
+ CALL {
381
+ // ------------
382
+ // Get the earliest active and deleted edges for this branch
383
+ // ------------
384
+ WITH node_with_dup_edges, edge_type, edge_branch, peer, is_outbound
385
+ OPTIONAL MATCH (node_with_dup_edges)-[active_edge {branch: edge_branch, status: "active"}]-(peer)
386
+ WHERE type(active_edge) = edge_type
387
+ AND (%(id_func_name)s(startNode(active_edge)) = %(id_func_name)s(node_with_dup_edges) OR is_outbound = FALSE)
388
+ WITH node_with_dup_edges, edge_type, edge_branch, peer, is_outbound, active_edge
389
+ ORDER BY active_edge.from ASC
390
+ WITH node_with_dup_edges, edge_type, edge_branch, peer, is_outbound, head(collect(active_edge.from)) AS active_from
391
+ OPTIONAL MATCH (node_with_dup_edges)-[deleted_edge {branch: edge_branch, status: "deleted"}]-(peer)
392
+ WHERE %(id_func_name)s(startNode(deleted_edge)) = %(id_func_name)s(node_with_dup_edges) OR is_outbound = FALSE
393
+ WITH node_with_dup_edges, edge_type, edge_branch, peer, is_outbound, active_from, deleted_edge
394
+ ORDER BY deleted_edge.from DESC
395
+ WITH node_with_dup_edges, edge_type, edge_branch, peer, is_outbound, active_from, head(collect(deleted_edge.from)) AS deleted_from
396
+ // ------------
397
+ // ensure one active edge with correct from and to times
398
+ // set the others to be deleted
399
+ // ------------
400
+ CALL {
401
+ WITH node_with_dup_edges, edge_type, edge_branch, peer, is_outbound, active_from, deleted_from
402
+ OPTIONAL MATCH (node_with_dup_edges)-[active_e {branch: edge_branch, status: "active"}]-(peer)
403
+ WHERE type(active_e) = edge_type
404
+ AND (%(id_func_name)s(startNode(active_e)) = %(id_func_name)s(node_with_dup_edges) OR is_outbound = FALSE)
405
+ WITH active_from, deleted_from, collect(active_e) AS active_edges
406
+ WITH active_from, deleted_from, head(active_edges) AS edge_to_keep, tail(active_edges) AS edges_to_delete
407
+ SET edge_to_keep.from = active_from
408
+ SET edge_to_keep.to = deleted_from
409
+ WITH edges_to_delete
410
+ UNWIND edges_to_delete AS edge_to_delete
411
+ SET edge_to_delete.to_delete = TRUE
412
+ }
413
+ // ------------
414
+ // ensure one deleted edge with correct from time, if necessary
415
+ // set the others to be deleted
416
+ // ------------
417
+ CALL {
418
+ WITH node_with_dup_edges, edge_type, edge_branch, peer, is_outbound, deleted_from
419
+ MATCH (node_with_dup_edges)-[deleted_e {branch: edge_branch, status: "deleted"}]-(peer)
420
+ WHERE type(deleted_e) = edge_type
421
+ AND (%(id_func_name)s(startNode(deleted_e)) = %(id_func_name)s(node_with_dup_edges) OR is_outbound = FALSE)
422
+ WITH deleted_from, collect(deleted_e) AS deleted_edges
423
+ WITH deleted_from, head(deleted_edges) AS edge_to_keep, tail(deleted_edges) AS edges_to_delete
424
+ SET edge_to_keep.from = deleted_from
425
+ WITH edges_to_delete
426
+ UNWIND edges_to_delete AS edge_to_delete
427
+ SET edge_to_delete.to_delete = TRUE
428
+ }
429
+ }
430
+ """ % {"id_func_name": db.get_id_function_name()}
431
+ self.add_to_query(query)
432
+
433
+
434
+ class DeleteIllegalRelationships(Query):
435
+ """
436
+ Find all Relationship vertices with the same UUID (in a valid database, there are none)
437
+ If any of these Relationships have an IS_RELATED edge to a deleted Node, then delete them
438
+ this includes if an IS_RELATED edge was added on a branch after the Node was deleted on main or -global-
439
+ If any of these Relationships are now only connected to a single Node, then delete them
440
+ """
441
+
442
+ name = "delete_illegal_relationships"
443
+ type = QueryType.WRITE
444
+ insert_return = False
445
+
446
+ async def query_init(self, db: InfrahubDatabase, **kwargs: dict[str, Any]) -> None: # noqa: ARG002
447
+ query = """
448
+ // ------------
449
+ // Get the default and global branch names
450
+ // ------------
451
+ MATCH (default_b:Branch)
452
+ WHERE default_b.is_default = TRUE
453
+ WITH default_b.name AS default_branch
454
+ LIMIT 1
455
+ MATCH (global_b:Branch)
456
+ WHERE global_b.is_global = TRUE
457
+ WITH default_branch, global_b.name AS global_branch
458
+ LIMIT 1
459
+ // ------------
460
+ // Find relationships with duplicate UUIDs
461
+ // ------------
462
+ MATCH (r: Relationship)
463
+ WITH default_branch, global_branch, r.uuid AS r_uuid, count(*) AS num_dups
464
+ WHERE num_dups > 1
465
+ WITH DISTINCT default_branch, global_branch, r_uuid
466
+ // ------------
467
+ // Find any IS_RELATED edges on the duplicate Relationships that link to deleted Nodes,
468
+ // accounting for if the edge was added on a branch after the Node was deleted on main
469
+ // ------------
470
+ CALL {
471
+ WITH default_branch, global_branch, r_uuid
472
+ MATCH (:Relationship {uuid: r_uuid})-[is_related:IS_RELATED]-(n:Node)
473
+ CALL {
474
+ WITH is_related
475
+ MATCH (b:Branch {name: is_related.branch})
476
+ RETURN b.branched_from AS edge_branched_from_time
477
+ }
478
+ // ------------
479
+ // If this Node was deleted
480
+ // ------------
481
+ MATCH (n)-[is_part_of:IS_PART_OF]->(:Root)
482
+ WHERE (is_part_of.status = "deleted" OR is_part_of.to IS NOT NULL)
483
+ // ------------
484
+ // before the active IS_RELATED edge's from time, then delete the edge
485
+ // ------------
486
+ WITH default_branch, global_branch, is_related, edge_branched_from_time, is_part_of, CASE
487
+ WHEN is_part_of.status = "deleted" THEN is_part_of.from
488
+ ELSE is_part_of.to
489
+ END AS node_deleted_time
490
+ WHERE (is_part_of.branch IN [is_related.branch, global_branch] AND is_related.from > node_deleted_time)
491
+ OR (is_part_of.branch = default_branch AND node_deleted_time < edge_branched_from_time)
492
+ DELETE is_related
493
+ }
494
+ MATCH (rel:Relationship {uuid: r_uuid})
495
+ CALL {
496
+ WITH rel
497
+ OPTIONAL MATCH (rel)-[:IS_RELATED]-(n:Node)
498
+ WITH DISTINCT n
499
+ RETURN count(*) AS num_peers
500
+ }
501
+ WITH rel
502
+ WHERE num_peers < 2
503
+ DETACH DELETE rel
504
+ """
505
+ self.add_to_query(query)
506
+
507
+
508
+ class DeleteDuplicateRelationships(Query):
509
+ """
510
+ There can also be leftover duplicate active Relationships that do not have the same UUID.
511
+ They are linked to the same Nodes, have the same Relationship.name, and are on the same branch.
512
+ In this case, we want to DETACH DELETE the later Relationship. We won't lose any information b/c the exact
513
+ same Relationship (maybe with an earlier from time) still exists.
514
+ """
515
+
516
+ name = "delete_duplicate_relationships"
517
+ type = QueryType.WRITE
518
+ insert_return = False
519
+
520
+ async def query_init(self, db: InfrahubDatabase, **kwargs: dict[str, Any]) -> None: # noqa: ARG002
521
+ query = """
522
+ MATCH (n:Node)
523
+ WITH n.uuid AS node_uuid, count(*) as num_nodes_with_uuid
524
+ WHERE num_nodes_with_uuid > 1
525
+ WITH DISTINCT node_uuid
526
+ ORDER BY node_uuid ASC
527
+ MATCH (a:Node {uuid: node_uuid})-[e1:IS_RELATED {status: "active"}]-(rel:Relationship)-[e2:IS_RELATED {branch: e1.branch, status: "active"}]-(b:Node)
528
+ WHERE a.uuid <> b.uuid
529
+ AND e1.to IS NULL
530
+ AND e2.to IS NULL
531
+ WITH a, rel.name AS rel_name, rel, b, e1.branch AS branch, CASE
532
+ WHEN startNode(e1) = a AND startNode(e2) = rel THEN "out"
533
+ WHEN startNode(e1) = rel AND startNode(e2) = b THEN "in"
534
+ ELSE "bidir"
535
+ END AS direction,
536
+ CASE
537
+ WHEN e1.from < e2.from THEN e1.from ELSE e2.from
538
+ END AS earliest_from
539
+ ORDER BY %(id_func_name)s(a), rel_name, %(id_func_name)s(b), direction, branch, earliest_from ASC
540
+ WITH a, rel_name, b, direction, branch, collect(rel) AS relationships_list
541
+ WHERE size(relationships_list) > 1
542
+ WITH a, rel_name, b, direction, branch, tail(relationships_list) AS rels_to_delete
543
+ UNWIND rels_to_delete AS rel_to_delete
544
+ DETACH DELETE rel_to_delete
545
+ """ % {"id_func_name": db.get_id_function_name()}
546
+ self.add_to_query(query)
547
+
548
+
549
+ class PerformHardDeletes(Query):
550
+ name = "do_hard_deletes"
551
+ type = QueryType.WRITE
552
+ insert_return = False
553
+
554
+ async def query_init(self, db: InfrahubDatabase, **kwargs: dict[str, Any]) -> None: # noqa: ARG002
555
+ query = """
556
+ CALL {
557
+ MATCH (n)
558
+ WHERE n.to_delete = TRUE
559
+ DETACH DELETE n
560
+ }
561
+ CALL {
562
+ MATCH ()-[e]-()
563
+ WHERE e.to_delete = TRUE
564
+ DELETE e
565
+ }
566
+ """
567
+ self.add_to_query(query)
568
+
569
+
570
+ class Migration029(ArbitraryMigration):
571
+ """
572
+ Clean up a variety of bad data created during bugged merges for node kind/inheritance updates
573
+
574
+ 1. Identify improperly duplicated nodes (ie nodes with the same UUID and the same database labels)
575
+ a. Consolidate edges onto a single duplicated node, making sure that the edges remain active if ANY active path exists
576
+ b. Delete the duplicated edges
577
+ c. Delete the duplicated nodes
578
+ 2. Delete duplicated Relationships linked to the de-duplicated node
579
+ 3. Delete duplicated edges across the database
580
+
581
+ Some of these changes must be batched because there can be a lot of them and the queries can be rather complex
582
+ Some of these queries also require marking nodes and edges as to be deleted (using the `to_delete` property) and then
583
+ deleting them in a separate query
584
+ """
585
+
586
+ name: str = "029_duplicates_cleanup"
587
+ minimum_version: int = 28
588
+ limit: int = 100
589
+
590
+ async def validate_migration(self, db: InfrahubDatabase) -> MigrationResult: # noqa: ARG002
591
+ result = MigrationResult()
592
+
593
+ return result
594
+
595
+ async def execute(self, db: InfrahubDatabase) -> MigrationResult:
596
+ migration_result = MigrationResult()
597
+ limit = self.limit
598
+ offset = 0
599
+ more_nodes_to_process = True
600
+ try:
601
+ while more_nodes_to_process:
602
+ log.info(f"Running node duplicates cleanup query {limit=},{offset=}")
603
+ node_cleanup_query = await CleanUpDuplicatedUuidVertices.init(
604
+ db=db,
605
+ vertex_label="Node",
606
+ limit=limit,
607
+ offset=offset,
608
+ outbound_edge_types=[
609
+ DatabaseEdgeType.IS_PART_OF,
610
+ DatabaseEdgeType.HAS_ATTRIBUTE,
611
+ DatabaseEdgeType.IS_RELATED,
612
+ DatabaseEdgeType.IS_RESERVED,
613
+ ],
614
+ inbound_edge_types=[
615
+ DatabaseEdgeType.IS_RELATED,
616
+ DatabaseEdgeType.HAS_OWNER,
617
+ DatabaseEdgeType.HAS_SOURCE,
618
+ ],
619
+ )
620
+ await node_cleanup_query.execute(db=db)
621
+ has_results = False
622
+ for result in node_cleanup_query.get_results():
623
+ has_results = True
624
+ more_nodes_to_process = result.get_as_type("more_nodes_to_process", bool)
625
+ offset += limit
626
+ if not has_results or not more_nodes_to_process:
627
+ break
628
+
629
+ hard_delete_query = await PerformHardDeletes.init(db=db)
630
+ await hard_delete_query.execute(db=db)
631
+
632
+ duplicate_edge_query = await DeleteDuplicatedEdgesQuery.init(db=db)
633
+ await duplicate_edge_query.execute(db=db)
634
+
635
+ hard_delete_query = await PerformHardDeletes.init(db=db)
636
+ await hard_delete_query.execute(db=db)
637
+
638
+ illegal_relationships_cleanup_query = await DeleteIllegalRelationships.init(db=db)
639
+ await illegal_relationships_cleanup_query.execute(db=db)
640
+
641
+ offset = 0
642
+ more_nodes_to_process = True
643
+ while more_nodes_to_process:
644
+ log.info(f"Running relationship duplicates cleanup query {limit=},{offset=}")
645
+ relationship_cleanup_query = await CleanUpDuplicatedUuidVertices.init(
646
+ db=db,
647
+ vertex_label="Relationship",
648
+ limit=limit,
649
+ offset=offset,
650
+ outbound_edge_types=[
651
+ DatabaseEdgeType.IS_RELATED,
652
+ DatabaseEdgeType.IS_VISIBLE,
653
+ DatabaseEdgeType.IS_PROTECTED,
654
+ DatabaseEdgeType.HAS_OWNER,
655
+ DatabaseEdgeType.HAS_SOURCE,
656
+ ],
657
+ inbound_edge_types=[
658
+ DatabaseEdgeType.IS_RELATED,
659
+ ],
660
+ )
661
+ await relationship_cleanup_query.execute(db=db)
662
+ has_results = False
663
+ for result in relationship_cleanup_query.get_results():
664
+ has_results = True
665
+ more_nodes_to_process = result.get_as_type("more_nodes_to_process", bool)
666
+ offset += limit
667
+ if not has_results or not more_nodes_to_process:
668
+ break
669
+
670
+ hard_delete_query = await PerformHardDeletes.init(db=db)
671
+ await hard_delete_query.execute(db=db)
672
+
673
+ duplicate_relationships_cleanup_query = await DeleteDuplicateRelationships.init(db=db)
674
+ await duplicate_relationships_cleanup_query.execute(db=db)
675
+
676
+ except Exception as exc:
677
+ migration_result.errors.append(str(exc))
678
+ return migration_result
679
+
680
+ return migration_result