cartography 0.104.0rc3__py3-none-any.whl → 0.106.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/_version.py +2 -2
- cartography/cli.py +104 -3
- cartography/client/aws/__init__.py +19 -0
- cartography/client/aws/ecr.py +51 -0
- cartography/client/core/tx.py +62 -0
- cartography/config.py +32 -0
- cartography/data/indexes.cypher +0 -37
- cartography/data/jobs/cleanup/aws_import_lambda_cleanup.json +1 -1
- cartography/driftdetect/cli.py +3 -2
- cartography/graph/cleanupbuilder.py +198 -41
- cartography/graph/job.py +42 -0
- cartography/graph/querybuilder.py +136 -2
- cartography/graph/statement.py +1 -1
- cartography/intel/airbyte/__init__.py +105 -0
- cartography/intel/airbyte/connections.py +120 -0
- cartography/intel/airbyte/destinations.py +81 -0
- cartography/intel/airbyte/organizations.py +59 -0
- cartography/intel/airbyte/sources.py +78 -0
- cartography/intel/airbyte/tags.py +64 -0
- cartography/intel/airbyte/users.py +106 -0
- cartography/intel/airbyte/util.py +122 -0
- cartography/intel/airbyte/workspaces.py +63 -0
- cartography/intel/aws/acm.py +124 -0
- cartography/intel/aws/cloudtrail.py +3 -38
- cartography/intel/aws/codebuild.py +132 -0
- cartography/intel/aws/ecr.py +8 -2
- cartography/intel/aws/ecs.py +228 -380
- cartography/intel/aws/efs.py +179 -11
- cartography/intel/aws/iam.py +1 -1
- cartography/intel/aws/identitycenter.py +14 -3
- cartography/intel/aws/inspector.py +96 -53
- cartography/intel/aws/lambda_function.py +1 -1
- cartography/intel/aws/rds.py +2 -1
- cartography/intel/aws/resources.py +4 -0
- cartography/intel/aws/s3.py +195 -4
- cartography/intel/aws/sqs.py +36 -90
- cartography/intel/entra/__init__.py +22 -0
- cartography/intel/entra/applications.py +366 -0
- cartography/intel/entra/groups.py +151 -0
- cartography/intel/entra/ou.py +21 -5
- cartography/intel/entra/users.py +84 -42
- cartography/intel/kubernetes/__init__.py +30 -14
- cartography/intel/kubernetes/clusters.py +86 -0
- cartography/intel/kubernetes/namespaces.py +59 -57
- cartography/intel/kubernetes/pods.py +140 -77
- cartography/intel/kubernetes/secrets.py +95 -45
- cartography/intel/kubernetes/services.py +131 -67
- cartography/intel/kubernetes/util.py +125 -14
- cartography/intel/scaleway/__init__.py +127 -0
- cartography/intel/scaleway/iam/__init__.py +0 -0
- cartography/intel/scaleway/iam/apikeys.py +71 -0
- cartography/intel/scaleway/iam/applications.py +71 -0
- cartography/intel/scaleway/iam/groups.py +71 -0
- cartography/intel/scaleway/iam/users.py +71 -0
- cartography/intel/scaleway/instances/__init__.py +0 -0
- cartography/intel/scaleway/instances/flexibleips.py +86 -0
- cartography/intel/scaleway/instances/instances.py +92 -0
- cartography/intel/scaleway/projects.py +79 -0
- cartography/intel/scaleway/storage/__init__.py +0 -0
- cartography/intel/scaleway/storage/snapshots.py +86 -0
- cartography/intel/scaleway/storage/volumes.py +84 -0
- cartography/intel/scaleway/utils.py +37 -0
- cartography/intel/trivy/__init__.py +161 -0
- cartography/intel/trivy/scanner.py +363 -0
- cartography/models/airbyte/__init__.py +0 -0
- cartography/models/airbyte/connection.py +138 -0
- cartography/models/airbyte/destination.py +75 -0
- cartography/models/airbyte/organization.py +19 -0
- cartography/models/airbyte/source.py +75 -0
- cartography/models/airbyte/stream.py +74 -0
- cartography/models/airbyte/tag.py +69 -0
- cartography/models/airbyte/user.py +111 -0
- cartography/models/airbyte/workspace.py +46 -0
- cartography/models/aws/acm/__init__.py +0 -0
- cartography/models/aws/acm/certificate.py +75 -0
- cartography/models/aws/cloudtrail/trail.py +24 -0
- cartography/models/aws/codebuild/__init__.py +0 -0
- cartography/models/aws/codebuild/project.py +49 -0
- cartography/models/aws/ecs/__init__.py +0 -0
- cartography/models/aws/ecs/clusters.py +64 -0
- cartography/models/aws/ecs/container_definitions.py +93 -0
- cartography/models/aws/ecs/container_instances.py +84 -0
- cartography/models/aws/ecs/containers.py +99 -0
- cartography/models/aws/ecs/services.py +117 -0
- cartography/models/aws/ecs/task_definitions.py +135 -0
- cartography/models/aws/ecs/tasks.py +110 -0
- cartography/models/aws/efs/access_point.py +77 -0
- cartography/models/aws/efs/file_system.py +60 -0
- cartography/models/aws/efs/mount_target.py +29 -2
- cartography/models/aws/s3/notification.py +24 -0
- cartography/models/aws/secretsmanager/secret_version.py +0 -2
- cartography/models/aws/sqs/__init__.py +0 -0
- cartography/models/aws/sqs/queue.py +89 -0
- cartography/models/core/common.py +1 -0
- cartography/models/core/nodes.py +15 -2
- cartography/models/core/relationships.py +44 -0
- cartography/models/entra/app_role_assignment.py +115 -0
- cartography/models/entra/application.py +47 -0
- cartography/models/entra/group.py +91 -0
- cartography/models/entra/user.py +17 -51
- cartography/models/kubernetes/__init__.py +0 -0
- cartography/models/kubernetes/clusters.py +26 -0
- cartography/models/kubernetes/containers.py +108 -0
- cartography/models/kubernetes/namespaces.py +51 -0
- cartography/models/kubernetes/pods.py +80 -0
- cartography/models/kubernetes/secrets.py +79 -0
- cartography/models/kubernetes/services.py +108 -0
- cartography/models/scaleway/__init__.py +0 -0
- cartography/models/scaleway/iam/__init__.py +0 -0
- cartography/models/scaleway/iam/apikey.py +96 -0
- cartography/models/scaleway/iam/application.py +52 -0
- cartography/models/scaleway/iam/group.py +95 -0
- cartography/models/scaleway/iam/user.py +60 -0
- cartography/models/scaleway/instance/__init__.py +0 -0
- cartography/models/scaleway/instance/flexibleip.py +52 -0
- cartography/models/scaleway/instance/instance.py +118 -0
- cartography/models/scaleway/organization.py +19 -0
- cartography/models/scaleway/project.py +48 -0
- cartography/models/scaleway/storage/__init__.py +0 -0
- cartography/models/scaleway/storage/snapshot.py +78 -0
- cartography/models/scaleway/storage/volume.py +51 -0
- cartography/models/trivy/__init__.py +0 -0
- cartography/models/trivy/findings.py +66 -0
- cartography/models/trivy/fix.py +66 -0
- cartography/models/trivy/package.py +71 -0
- cartography/sync.py +10 -4
- cartography/util.py +15 -10
- {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/METADATA +6 -2
- {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/RECORD +133 -49
- cartography/data/jobs/cleanup/kubernetes_import_cleanup.json +0 -70
- {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/WHEEL +0 -0
- {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/entry_points.txt +0 -0
- {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.104.0rc3.dist-info → cartography-0.106.0.dist-info}/top_level.txt +0 -0
|
@@ -3,6 +3,7 @@ from string import Template
|
|
|
3
3
|
from typing import Dict
|
|
4
4
|
from typing import List
|
|
5
5
|
|
|
6
|
+
from cartography.graph.querybuilder import _asdict_with_validate_relprops
|
|
6
7
|
from cartography.graph.querybuilder import _build_match_clause
|
|
7
8
|
from cartography.graph.querybuilder import rel_present_on_node_schema
|
|
8
9
|
from cartography.models.core.common import PropertyRef
|
|
@@ -15,33 +16,40 @@ from cartography.models.core.relationships import TargetNodeMatcher
|
|
|
15
16
|
def build_cleanup_queries(node_schema: CartographyNodeSchema) -> List[str]:
|
|
16
17
|
"""
|
|
17
18
|
Generates queries to clean up stale nodes and relationships from the given CartographyNodeSchema.
|
|
19
|
+
Properly handles cases where a node schema has a scoped cleanup or not.
|
|
18
20
|
Note that auto-cleanups for a node with no relationships is not currently supported.
|
|
19
|
-
|
|
20
|
-
Algorithm:
|
|
21
|
-
1. If node_schema has no relationships at all, return empty.
|
|
22
|
-
|
|
23
|
-
Otherwise,
|
|
24
|
-
|
|
25
|
-
1. If node_schema doesn't have a sub_resource relationship, generate queries only to clean up its other
|
|
26
|
-
relationships. No nodes will be cleaned up.
|
|
27
|
-
|
|
28
|
-
Otherwise,
|
|
29
|
-
|
|
30
|
-
1. First delete all stale nodes attached to the node_schema's sub resource
|
|
31
|
-
2. Delete all stale node to sub resource relationships
|
|
32
|
-
- We don't expect this to be very common (never for AWS resources, at least), but in case it is possible for an
|
|
33
|
-
asset to change sub resources, we want to handle it properly.
|
|
34
|
-
3. For all relationships defined on the node schema, delete all stale ones.
|
|
35
21
|
:param node_schema: The given CartographyNodeSchema
|
|
36
22
|
:return: A list of Neo4j queries to clean up nodes and relationships.
|
|
37
23
|
"""
|
|
24
|
+
# If the node has no relationships, do not delete the node. Leave this behind for the user to manage.
|
|
25
|
+
# Oftentimes these are SyncMetadata nodes.
|
|
38
26
|
if (
|
|
39
27
|
not node_schema.sub_resource_relationship
|
|
40
28
|
and not node_schema.other_relationships
|
|
41
29
|
):
|
|
42
30
|
return []
|
|
43
31
|
|
|
44
|
-
|
|
32
|
+
# Case 1 [Standard]: the node has a sub resource and scoped cleanup is true => clean up stale nodes
|
|
33
|
+
# of this type, scoped to the sub resource. Continue on to clean up the other_relationships too.
|
|
34
|
+
if node_schema.sub_resource_relationship and node_schema.scoped_cleanup:
|
|
35
|
+
queries = _build_cleanup_node_and_rel_queries(
|
|
36
|
+
node_schema,
|
|
37
|
+
node_schema.sub_resource_relationship,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
# Case 2: The node has a sub resource but scoped cleanup is false => this does not make sense
|
|
41
|
+
# because if have a sub resource, we are implying that we are doing scoped cleanup.
|
|
42
|
+
elif node_schema.sub_resource_relationship and not node_schema.scoped_cleanup:
|
|
43
|
+
raise ValueError(
|
|
44
|
+
f"This is not expected: {node_schema.label} has a sub_resource_relationship but scoped_cleanup=False."
|
|
45
|
+
"Please check the class definition for this node schema. It doesn't make sense for a node to have a "
|
|
46
|
+
"sub resource relationship and an unscoped cleanup. Doing this will cause all stale nodes of this type "
|
|
47
|
+
"to be deleted regardless of the sub resource they are attached to."
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# Case 3: The node has no sub resource but scoped cleanup is true => do not delete any nodes, but clean up stale relationships.
|
|
51
|
+
# Return early.
|
|
52
|
+
elif not node_schema.sub_resource_relationship and node_schema.scoped_cleanup:
|
|
45
53
|
queries = []
|
|
46
54
|
other_rels = (
|
|
47
55
|
node_schema.other_relationships.rels
|
|
@@ -53,17 +61,20 @@ def build_cleanup_queries(node_schema: CartographyNodeSchema) -> List[str]:
|
|
|
53
61
|
queries.append(query)
|
|
54
62
|
return queries
|
|
55
63
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
node_schema
|
|
59
|
-
|
|
64
|
+
# Case 4: The node has no sub resource and scoped cleanup is false => clean up the stale nodes. Continue on to clean up the other_relationships too.
|
|
65
|
+
else:
|
|
66
|
+
queries = [_build_cleanup_node_query_unscoped(node_schema)]
|
|
67
|
+
|
|
60
68
|
if node_schema.other_relationships:
|
|
61
69
|
for rel in node_schema.other_relationships.rels:
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
70
|
+
if node_schema.scoped_cleanup:
|
|
71
|
+
# [0] is the delete node query, [1] is the delete relationship query. We only want the latter.
|
|
72
|
+
_, rel_query = _build_cleanup_node_and_rel_queries(node_schema, rel)
|
|
73
|
+
queries.append(rel_query)
|
|
74
|
+
else:
|
|
75
|
+
queries.append(_build_cleanup_rel_queries_unscoped(node_schema, rel))
|
|
65
76
|
|
|
66
|
-
return
|
|
77
|
+
return queries
|
|
67
78
|
|
|
68
79
|
|
|
69
80
|
def _build_cleanup_rel_query_no_sub_resource(
|
|
@@ -94,6 +105,46 @@ def _build_cleanup_rel_query_no_sub_resource(
|
|
|
94
105
|
)
|
|
95
106
|
|
|
96
107
|
|
|
108
|
+
def _build_match_statement_for_cleanup(node_schema: CartographyNodeSchema) -> str:
|
|
109
|
+
"""
|
|
110
|
+
Helper function to build a MATCH statement for a given node schema for cleanup.
|
|
111
|
+
"""
|
|
112
|
+
if not node_schema.sub_resource_relationship and not node_schema.scoped_cleanup:
|
|
113
|
+
template = Template("MATCH (n:$node_label)")
|
|
114
|
+
return template.safe_substitute(
|
|
115
|
+
node_label=node_schema.label,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# if it has a sub resource relationship defined, we need to match on the sub resource to make sure we only delete
|
|
119
|
+
# nodes that are attached to the sub resource.
|
|
120
|
+
template = Template(
|
|
121
|
+
"MATCH (n:$node_label)$sub_resource_link(:$sub_resource_label{$match_sub_res_clause})"
|
|
122
|
+
)
|
|
123
|
+
sub_resource_link = ""
|
|
124
|
+
sub_resource_label = ""
|
|
125
|
+
match_sub_res_clause = ""
|
|
126
|
+
|
|
127
|
+
if node_schema.sub_resource_relationship:
|
|
128
|
+
# Draw sub resource rel with correct direction
|
|
129
|
+
if node_schema.sub_resource_relationship.direction == LinkDirection.INWARD:
|
|
130
|
+
sub_resource_link_template = Template("<-[s:$SubResourceRelLabel]-")
|
|
131
|
+
else:
|
|
132
|
+
sub_resource_link_template = Template("-[s:$SubResourceRelLabel]->")
|
|
133
|
+
sub_resource_link = sub_resource_link_template.safe_substitute(
|
|
134
|
+
SubResourceRelLabel=node_schema.sub_resource_relationship.rel_label,
|
|
135
|
+
)
|
|
136
|
+
sub_resource_label = node_schema.sub_resource_relationship.target_node_label
|
|
137
|
+
match_sub_res_clause = _build_match_clause(
|
|
138
|
+
node_schema.sub_resource_relationship.target_node_matcher,
|
|
139
|
+
)
|
|
140
|
+
return template.safe_substitute(
|
|
141
|
+
node_label=node_schema.label,
|
|
142
|
+
sub_resource_link=sub_resource_link,
|
|
143
|
+
sub_resource_label=sub_resource_label,
|
|
144
|
+
match_sub_res_clause=match_sub_res_clause,
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
|
|
97
148
|
def _build_cleanup_node_and_rel_queries(
|
|
98
149
|
node_schema: CartographyNodeSchema,
|
|
99
150
|
selected_relationship: CartographyRelSchema,
|
|
@@ -120,15 +171,6 @@ def _build_cleanup_node_and_rel_queries(
|
|
|
120
171
|
"verify the node class definition for the relationships that it has.",
|
|
121
172
|
)
|
|
122
173
|
|
|
123
|
-
# Draw sub resource rel with correct direction
|
|
124
|
-
if node_schema.sub_resource_relationship.direction == LinkDirection.INWARD:
|
|
125
|
-
sub_resource_link_template = Template("<-[s:$SubResourceRelLabel]-")
|
|
126
|
-
else:
|
|
127
|
-
sub_resource_link_template = Template("-[s:$SubResourceRelLabel]->")
|
|
128
|
-
sub_resource_link = sub_resource_link_template.safe_substitute(
|
|
129
|
-
SubResourceRelLabel=node_schema.sub_resource_relationship.rel_label,
|
|
130
|
-
)
|
|
131
|
-
|
|
132
174
|
# The cleanup node query must always be before the cleanup rel query
|
|
133
175
|
delete_action_clauses = [
|
|
134
176
|
"""
|
|
@@ -161,19 +203,14 @@ def _build_cleanup_node_and_rel_queries(
|
|
|
161
203
|
# Ensure the node is attached to the sub resource and delete the node
|
|
162
204
|
query_template = Template(
|
|
163
205
|
"""
|
|
164
|
-
|
|
206
|
+
$match_statement
|
|
165
207
|
$selected_rel_clause
|
|
166
208
|
$delete_action_clause
|
|
167
209
|
""",
|
|
168
210
|
)
|
|
169
211
|
return [
|
|
170
212
|
query_template.safe_substitute(
|
|
171
|
-
|
|
172
|
-
sub_resource_link=sub_resource_link,
|
|
173
|
-
sub_resource_label=node_schema.sub_resource_relationship.target_node_label,
|
|
174
|
-
match_sub_res_clause=_build_match_clause(
|
|
175
|
-
node_schema.sub_resource_relationship.target_node_matcher,
|
|
176
|
-
),
|
|
213
|
+
match_statement=_build_match_statement_for_cleanup(node_schema),
|
|
177
214
|
selected_rel_clause=(
|
|
178
215
|
""
|
|
179
216
|
if selected_relationship == node_schema.sub_resource_relationship
|
|
@@ -185,6 +222,80 @@ def _build_cleanup_node_and_rel_queries(
|
|
|
185
222
|
]
|
|
186
223
|
|
|
187
224
|
|
|
225
|
+
def _build_cleanup_node_query_unscoped(
|
|
226
|
+
node_schema: CartographyNodeSchema,
|
|
227
|
+
) -> str:
|
|
228
|
+
"""
|
|
229
|
+
Generates a cleanup query for a node_schema to allow unscoped cleanup.
|
|
230
|
+
"""
|
|
231
|
+
if node_schema.scoped_cleanup:
|
|
232
|
+
raise ValueError(
|
|
233
|
+
f"_build_cleanup_node_query_for_unscoped_cleanup() failed: '{node_schema.label}' does not have "
|
|
234
|
+
"scoped_cleanup=False, so we cannot generate a query to clean it up. Please verify that the class "
|
|
235
|
+
"definition is what you expect.",
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
# The cleanup node query must always be before the cleanup rel query
|
|
239
|
+
delete_action_clause = """
|
|
240
|
+
WHERE n.lastupdated <> $UPDATE_TAG
|
|
241
|
+
WITH n LIMIT $LIMIT_SIZE
|
|
242
|
+
DETACH DELETE n;
|
|
243
|
+
"""
|
|
244
|
+
|
|
245
|
+
# Ensure the node is attached to the sub resource and delete the node
|
|
246
|
+
query_template = Template(
|
|
247
|
+
"""
|
|
248
|
+
$match_statement
|
|
249
|
+
$delete_action_clause
|
|
250
|
+
""",
|
|
251
|
+
)
|
|
252
|
+
return query_template.safe_substitute(
|
|
253
|
+
match_statement=_build_match_statement_for_cleanup(node_schema),
|
|
254
|
+
delete_action_clause=delete_action_clause,
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def _build_cleanup_rel_queries_unscoped(
|
|
259
|
+
node_schema: CartographyNodeSchema,
|
|
260
|
+
selected_relationship: CartographyRelSchema,
|
|
261
|
+
) -> str:
|
|
262
|
+
"""
|
|
263
|
+
Generates relationship cleanup query for a node_schema with scoped_cleanup=False.
|
|
264
|
+
"""
|
|
265
|
+
if node_schema.scoped_cleanup:
|
|
266
|
+
raise ValueError(
|
|
267
|
+
f"_build_cleanup_node_and_rel_queries_unscoped() failed: '{node_schema.label}' does not have "
|
|
268
|
+
"scoped_cleanup=False, so we cannot generate a query to clean it up. Please verify that the class "
|
|
269
|
+
"definition is what you expect.",
|
|
270
|
+
)
|
|
271
|
+
if not rel_present_on_node_schema(node_schema, selected_relationship):
|
|
272
|
+
raise ValueError(
|
|
273
|
+
f"_build_cleanup_node_query(): Attempted to build cleanup query for node '{node_schema.label}' and "
|
|
274
|
+
f"relationship {selected_relationship.rel_label} but that relationship is not present on the node. Please "
|
|
275
|
+
"verify the node class definition for the relationships that it has.",
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
# The cleanup node query must always be before the cleanup rel query
|
|
279
|
+
delete_action_clause = """WHERE r.lastupdated <> $UPDATE_TAG
|
|
280
|
+
WITH r LIMIT $LIMIT_SIZE
|
|
281
|
+
DELETE r;
|
|
282
|
+
"""
|
|
283
|
+
|
|
284
|
+
# Ensure the node is attached to the sub resource and delete the node
|
|
285
|
+
query_template = Template(
|
|
286
|
+
"""
|
|
287
|
+
$match_statement
|
|
288
|
+
$selected_rel_clause
|
|
289
|
+
$delete_action_clause
|
|
290
|
+
""",
|
|
291
|
+
)
|
|
292
|
+
return query_template.safe_substitute(
|
|
293
|
+
match_statement=_build_match_statement_for_cleanup(node_schema),
|
|
294
|
+
selected_rel_clause=_build_selected_rel_clause(selected_relationship),
|
|
295
|
+
delete_action_clause=delete_action_clause,
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
|
|
188
299
|
def _build_selected_rel_clause(selected_relationship: CartographyRelSchema) -> str:
|
|
189
300
|
"""
|
|
190
301
|
Draw selected relationship with correct direction. Returns a string that looks like either
|
|
@@ -224,3 +335,49 @@ def _validate_target_node_matcher_for_cleanup_job(tgm: TargetNodeMatcher):
|
|
|
224
335
|
f"{key} has set_in_kwargs=False, please check by reviewing the full stack trace to know which object"
|
|
225
336
|
f"this message was raised from. Debug information: PropertyRef name = {prop_ref.name}.",
|
|
226
337
|
)
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def build_cleanup_query_for_matchlink(rel_schema: CartographyRelSchema) -> str:
|
|
341
|
+
"""
|
|
342
|
+
Generates a cleanup query for a matchlink relationship.
|
|
343
|
+
:param rel_schema: The CartographyRelSchema object to generate a query. This CartographyRelSchema object
|
|
344
|
+
- Must have a source_node_matcher and source_node_label defined
|
|
345
|
+
- Must have a CartographyRelProperties object where _sub_resource_label and _sub_resource_id are defined
|
|
346
|
+
:return: A Neo4j query used to clean up stale matchlink relationships.
|
|
347
|
+
"""
|
|
348
|
+
if not rel_schema.source_node_matcher:
|
|
349
|
+
raise ValueError(
|
|
350
|
+
f"No source node matcher found for {rel_schema.rel_label}; returning empty list."
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
query_template = Template(
|
|
354
|
+
"""
|
|
355
|
+
MATCH (from:$source_node_label)$rel_direction[r:$rel_label]$rel_direction_end(to:$target_node_label)
|
|
356
|
+
WHERE r.lastupdated <> $UPDATE_TAG
|
|
357
|
+
AND r._sub_resource_label = $sub_resource_label
|
|
358
|
+
AND r._sub_resource_id = $sub_resource_id
|
|
359
|
+
WITH r LIMIT $LIMIT_SIZE
|
|
360
|
+
DELETE r;
|
|
361
|
+
"""
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
# Determine which way to point the arrow. INWARD is toward the source, otherwise we go toward the target.
|
|
365
|
+
if rel_schema.direction == LinkDirection.INWARD:
|
|
366
|
+
rel_direction = "<-"
|
|
367
|
+
rel_direction_end = "-"
|
|
368
|
+
else:
|
|
369
|
+
rel_direction = "-"
|
|
370
|
+
rel_direction_end = "->"
|
|
371
|
+
|
|
372
|
+
# Small hack: avoid type-checking errors by converting the rel_schema to a dict.
|
|
373
|
+
rel_props_as_dict = _asdict_with_validate_relprops(rel_schema)
|
|
374
|
+
|
|
375
|
+
return query_template.safe_substitute(
|
|
376
|
+
source_node_label=rel_schema.source_node_label,
|
|
377
|
+
target_node_label=rel_schema.target_node_label,
|
|
378
|
+
rel_label=rel_schema.rel_label,
|
|
379
|
+
rel_direction=rel_direction,
|
|
380
|
+
rel_direction_end=rel_direction_end,
|
|
381
|
+
sub_resource_label=rel_props_as_dict["_sub_resource_label"],
|
|
382
|
+
sub_resource_id=rel_props_as_dict["_sub_resource_id"],
|
|
383
|
+
)
|
cartography/graph/job.py
CHANGED
|
@@ -13,9 +13,11 @@ from typing import Union
|
|
|
13
13
|
import neo4j
|
|
14
14
|
|
|
15
15
|
from cartography.graph.cleanupbuilder import build_cleanup_queries
|
|
16
|
+
from cartography.graph.cleanupbuilder import build_cleanup_query_for_matchlink
|
|
16
17
|
from cartography.graph.statement import get_job_shortname
|
|
17
18
|
from cartography.graph.statement import GraphStatement
|
|
18
19
|
from cartography.models.core.nodes import CartographyNodeSchema
|
|
20
|
+
from cartography.models.core.relationships import CartographyRelSchema
|
|
19
21
|
|
|
20
22
|
logger = logging.getLogger(__name__)
|
|
21
23
|
|
|
@@ -176,6 +178,46 @@ class GraphJob:
|
|
|
176
178
|
node_schema.label,
|
|
177
179
|
)
|
|
178
180
|
|
|
181
|
+
@classmethod
|
|
182
|
+
def from_matchlink(
|
|
183
|
+
cls,
|
|
184
|
+
rel_schema: CartographyRelSchema,
|
|
185
|
+
sub_resource_label: str,
|
|
186
|
+
sub_resource_id: str,
|
|
187
|
+
update_tag: int,
|
|
188
|
+
) -> "GraphJob":
|
|
189
|
+
"""
|
|
190
|
+
Create a cleanup job from a CartographyRelSchema object (specifically, a MatchLink).
|
|
191
|
+
This is used for cleaning up stale links between nodes created by load_rels(). Do not use for other purposes.
|
|
192
|
+
|
|
193
|
+
Other notes:
|
|
194
|
+
- For a given rel_schema, the fields used in the rel_schema.properties._sub_resource_label.name and
|
|
195
|
+
rel_schema.properties._sub_resource_id.name must be provided as keys and values in the params dict.
|
|
196
|
+
- The rel_schema must have a source_node_matcher and target_node_matcher.
|
|
197
|
+
"""
|
|
198
|
+
cleanup_link_query = build_cleanup_query_for_matchlink(rel_schema)
|
|
199
|
+
logger.debug(f"Cleanup query: {cleanup_link_query}")
|
|
200
|
+
|
|
201
|
+
parameters = {
|
|
202
|
+
"UPDATE_TAG": update_tag,
|
|
203
|
+
"_sub_resource_label": sub_resource_label,
|
|
204
|
+
"_sub_resource_id": sub_resource_id,
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
statement = GraphStatement(
|
|
208
|
+
cleanup_link_query,
|
|
209
|
+
parameters=parameters,
|
|
210
|
+
iterative=True,
|
|
211
|
+
iterationsize=100,
|
|
212
|
+
parent_job_name=rel_schema.rel_label,
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
return cls(
|
|
216
|
+
f"Cleanup {rel_schema.rel_label} between {rel_schema.source_node_label} and {rel_schema.target_node_label}",
|
|
217
|
+
[statement],
|
|
218
|
+
rel_schema.rel_label,
|
|
219
|
+
)
|
|
220
|
+
|
|
179
221
|
@classmethod
|
|
180
222
|
def from_json_file(cls, file_path: Union[str, Path]) -> "GraphJob":
|
|
181
223
|
"""
|
|
@@ -14,6 +14,7 @@ from cartography.models.core.nodes import ExtraNodeLabels
|
|
|
14
14
|
from cartography.models.core.relationships import CartographyRelSchema
|
|
15
15
|
from cartography.models.core.relationships import LinkDirection
|
|
16
16
|
from cartography.models.core.relationships import OtherRelationships
|
|
17
|
+
from cartography.models.core.relationships import SourceNodeMatcher
|
|
17
18
|
from cartography.models.core.relationships import TargetNodeMatcher
|
|
18
19
|
|
|
19
20
|
logger = logging.getLogger(__name__)
|
|
@@ -109,10 +110,10 @@ def _build_rel_properties_statement(
|
|
|
109
110
|
return set_clause
|
|
110
111
|
|
|
111
112
|
|
|
112
|
-
def _build_match_clause(matcher: TargetNodeMatcher) -> str:
|
|
113
|
+
def _build_match_clause(matcher: TargetNodeMatcher | SourceNodeMatcher) -> str:
|
|
113
114
|
"""
|
|
114
115
|
Generate a Neo4j match statement on one or more keys and values for a given node.
|
|
115
|
-
:param matcher: A TargetNodeMatcher object
|
|
116
|
+
:param matcher: A TargetNodeMatcher or SourceNodeMatcher object
|
|
116
117
|
:return: a Neo4j match clause
|
|
117
118
|
"""
|
|
118
119
|
match = Template("$Key: $PropRef")
|
|
@@ -548,3 +549,136 @@ def build_create_index_queries(node_schema: CartographyNodeSchema) -> List[str]:
|
|
|
548
549
|
],
|
|
549
550
|
)
|
|
550
551
|
return result
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
def build_create_index_queries_for_matchlink(
|
|
555
|
+
rel_schema: CartographyRelSchema,
|
|
556
|
+
) -> list[str]:
|
|
557
|
+
"""
|
|
558
|
+
Generate queries to create indexes for the given CartographyRelSchema and all node types attached to it via its
|
|
559
|
+
relationships.
|
|
560
|
+
:param rel_schema: The CartographyRelSchema object
|
|
561
|
+
:return: A list of queries of the form `CREATE INDEX IF NOT EXISTS FOR (n:$TargetNodeLabel) ON (n.$TargetAttribute)`
|
|
562
|
+
"""
|
|
563
|
+
if not rel_schema.source_node_matcher:
|
|
564
|
+
logger.warning(
|
|
565
|
+
f"No source node matcher found for {rel_schema.rel_label}; returning empty list."
|
|
566
|
+
"Please note that build_create_index_queries_for_matchlink() is only used for load_matchlinks() where we match on "
|
|
567
|
+
"and connect existing nodes in the graph."
|
|
568
|
+
)
|
|
569
|
+
return []
|
|
570
|
+
|
|
571
|
+
index_template = Template(
|
|
572
|
+
"CREATE INDEX IF NOT EXISTS FOR (n:$NodeLabel) ON (n.$NodeAttribute);",
|
|
573
|
+
)
|
|
574
|
+
|
|
575
|
+
result = []
|
|
576
|
+
for source_key in asdict(rel_schema.source_node_matcher).keys():
|
|
577
|
+
result.append(
|
|
578
|
+
index_template.safe_substitute(
|
|
579
|
+
NodeLabel=rel_schema.source_node_label,
|
|
580
|
+
NodeAttribute=source_key,
|
|
581
|
+
),
|
|
582
|
+
)
|
|
583
|
+
for target_key in asdict(rel_schema.target_node_matcher).keys():
|
|
584
|
+
result.append(
|
|
585
|
+
index_template.safe_substitute(
|
|
586
|
+
NodeLabel=rel_schema.target_node_label,
|
|
587
|
+
NodeAttribute=target_key,
|
|
588
|
+
),
|
|
589
|
+
)
|
|
590
|
+
|
|
591
|
+
# Create a composite index for the relationship between the source and target nodes.
|
|
592
|
+
# https://neo4j.com/docs/cypher-manual/4.3/indexes-for-search-performance/#administration-indexes-create-a-composite-index-for-relationships
|
|
593
|
+
rel_index_template = Template(
|
|
594
|
+
"CREATE INDEX IF NOT EXISTS FOR ()$rel_direction[r:$RelLabel]$rel_direction_end() "
|
|
595
|
+
"ON (r.lastupdated, r._sub_resource_label, r._sub_resource_id);",
|
|
596
|
+
)
|
|
597
|
+
if rel_schema.direction == LinkDirection.INWARD:
|
|
598
|
+
result.append(
|
|
599
|
+
rel_index_template.safe_substitute(
|
|
600
|
+
RelLabel=rel_schema.rel_label,
|
|
601
|
+
rel_direction="<-",
|
|
602
|
+
rel_direction_end="-",
|
|
603
|
+
)
|
|
604
|
+
)
|
|
605
|
+
else:
|
|
606
|
+
result.append(
|
|
607
|
+
rel_index_template.safe_substitute(
|
|
608
|
+
RelLabel=rel_schema.rel_label,
|
|
609
|
+
rel_direction="-",
|
|
610
|
+
rel_direction_end="->",
|
|
611
|
+
)
|
|
612
|
+
)
|
|
613
|
+
return result
|
|
614
|
+
|
|
615
|
+
|
|
616
|
+
def build_matchlink_query(rel_schema: CartographyRelSchema) -> str:
|
|
617
|
+
"""
|
|
618
|
+
Generate a Neo4j query to link two existing nodes when given a CartographyRelSchema object.
|
|
619
|
+
This is only used for load_matchlinks().
|
|
620
|
+
:param rel_schema: The CartographyRelSchema object to generate a query. This CartographyRelSchema object
|
|
621
|
+
- Must have a source_node_matcher and source_node_label defined
|
|
622
|
+
- Must have a CartographyRelProperties object where _sub_resource_label and _sub_resource_id are defined
|
|
623
|
+
:return: A Neo4j query that can be used to link two existing nodes.
|
|
624
|
+
"""
|
|
625
|
+
if not rel_schema.source_node_matcher or not rel_schema.source_node_label:
|
|
626
|
+
raise ValueError(
|
|
627
|
+
f"No source node matcher or source node label found for {rel_schema.rel_label}. "
|
|
628
|
+
"MatchLink relationships require a source_node_matcher and source_node_label to be defined."
|
|
629
|
+
)
|
|
630
|
+
|
|
631
|
+
rel_props_as_dict = _asdict_with_validate_relprops(rel_schema)
|
|
632
|
+
|
|
633
|
+
# These are needed for the cleanup query
|
|
634
|
+
if "_sub_resource_label" not in rel_props_as_dict:
|
|
635
|
+
raise ValueError(
|
|
636
|
+
f"Expected _sub_resource_label to be defined on {rel_schema.properties.__class__.__name__}"
|
|
637
|
+
"Please include `_sub_resource_label: PropertyRef = PropertyRef('_sub_resource_label', set_in_kwargs=True)`"
|
|
638
|
+
)
|
|
639
|
+
if "_sub_resource_id" not in rel_props_as_dict:
|
|
640
|
+
raise ValueError(
|
|
641
|
+
f"Expected _sub_resource_id to be defined on {rel_schema.properties.__class__.__name__}"
|
|
642
|
+
"Please include `_sub_resource_id: PropertyRef = PropertyRef('_sub_resource_id', set_in_kwargs=True)`"
|
|
643
|
+
)
|
|
644
|
+
|
|
645
|
+
matchlink_query_template = Template(
|
|
646
|
+
"""
|
|
647
|
+
UNWIND $DictList as item
|
|
648
|
+
$source_match
|
|
649
|
+
$target_match
|
|
650
|
+
MERGE $rel
|
|
651
|
+
ON CREATE SET r.firstseen = timestamp()
|
|
652
|
+
SET
|
|
653
|
+
$set_rel_properties_statement;
|
|
654
|
+
"""
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
source_match = Template(
|
|
658
|
+
"MATCH (from:$source_node_label{$match_clause})"
|
|
659
|
+
).safe_substitute(
|
|
660
|
+
source_node_label=rel_schema.source_node_label,
|
|
661
|
+
match_clause=_build_match_clause(rel_schema.source_node_matcher),
|
|
662
|
+
)
|
|
663
|
+
|
|
664
|
+
target_match = Template(
|
|
665
|
+
"MATCH (to:$target_node_label{$match_clause})"
|
|
666
|
+
).safe_substitute(
|
|
667
|
+
target_node_label=rel_schema.target_node_label,
|
|
668
|
+
match_clause=_build_match_clause(rel_schema.target_node_matcher),
|
|
669
|
+
)
|
|
670
|
+
|
|
671
|
+
if rel_schema.direction == LinkDirection.INWARD:
|
|
672
|
+
rel = f"(from)<-[r:{rel_schema.rel_label}]-(to)"
|
|
673
|
+
else:
|
|
674
|
+
rel = f"(from)-[r:{rel_schema.rel_label}]->(to)"
|
|
675
|
+
|
|
676
|
+
return matchlink_query_template.safe_substitute(
|
|
677
|
+
source_match=source_match,
|
|
678
|
+
target_match=target_match,
|
|
679
|
+
rel=rel,
|
|
680
|
+
set_rel_properties_statement=_build_rel_properties_statement(
|
|
681
|
+
"r",
|
|
682
|
+
rel_props_as_dict,
|
|
683
|
+
),
|
|
684
|
+
)
|
cartography/graph/statement.py
CHANGED
|
@@ -56,7 +56,7 @@ class GraphStatement:
|
|
|
56
56
|
|
|
57
57
|
self.parent_job_name = parent_job_name if parent_job_name else None
|
|
58
58
|
self.parent_job_sequence_num = (
|
|
59
|
-
parent_job_sequence_num if parent_job_sequence_num else
|
|
59
|
+
parent_job_sequence_num if parent_job_sequence_num else 1
|
|
60
60
|
)
|
|
61
61
|
|
|
62
62
|
def merge_parameters(self, parameters: Dict) -> None:
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
import neo4j
|
|
4
|
+
|
|
5
|
+
import cartography.intel.airbyte.connections
|
|
6
|
+
import cartography.intel.airbyte.destinations
|
|
7
|
+
import cartography.intel.airbyte.organizations
|
|
8
|
+
import cartography.intel.airbyte.sources
|
|
9
|
+
import cartography.intel.airbyte.tags
|
|
10
|
+
import cartography.intel.airbyte.users
|
|
11
|
+
import cartography.intel.airbyte.workspaces
|
|
12
|
+
from cartography.config import Config
|
|
13
|
+
from cartography.intel.airbyte.util import AirbyteClient
|
|
14
|
+
from cartography.util import timeit
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@timeit
|
|
20
|
+
def start_airbyte_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
|
|
21
|
+
"""
|
|
22
|
+
If this module is configured, perform ingestion of Airbyte data. Otherwise warn and exit
|
|
23
|
+
:param neo4j_session: Neo4J session for database interface
|
|
24
|
+
:param config: A cartography.config object
|
|
25
|
+
:return: None
|
|
26
|
+
"""
|
|
27
|
+
if (
|
|
28
|
+
not config.airbyte_api_url
|
|
29
|
+
or not config.airbyte_client_id
|
|
30
|
+
or not config.airbyte_client_secret
|
|
31
|
+
):
|
|
32
|
+
logger.info(
|
|
33
|
+
"Airbyte import is not configured - skipping this module. "
|
|
34
|
+
"See docs to configure.",
|
|
35
|
+
)
|
|
36
|
+
return
|
|
37
|
+
|
|
38
|
+
# Create api session
|
|
39
|
+
api_client = AirbyteClient(
|
|
40
|
+
base_url=config.airbyte_api_url,
|
|
41
|
+
client_id=config.airbyte_client_id,
|
|
42
|
+
client_secret=config.airbyte_client_secret,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
common_job_parameters = {
|
|
46
|
+
"UPDATE_TAG": config.update_tag,
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
organizations = cartography.intel.airbyte.organizations.sync(
|
|
50
|
+
neo4j_session,
|
|
51
|
+
api_client,
|
|
52
|
+
common_job_parameters,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
for organization in organizations:
|
|
56
|
+
org_common_job_parameters = {
|
|
57
|
+
"UPDATE_TAG": config.update_tag,
|
|
58
|
+
"ORG_ID": organization["organizationId"],
|
|
59
|
+
}
|
|
60
|
+
workspaces = cartography.intel.airbyte.workspaces.sync(
|
|
61
|
+
neo4j_session,
|
|
62
|
+
api_client,
|
|
63
|
+
organization["organizationId"],
|
|
64
|
+
org_common_job_parameters,
|
|
65
|
+
)
|
|
66
|
+
workspace_ids = [workspace["workspaceId"] for workspace in workspaces]
|
|
67
|
+
|
|
68
|
+
cartography.intel.airbyte.users.sync(
|
|
69
|
+
neo4j_session,
|
|
70
|
+
api_client,
|
|
71
|
+
organization["organizationId"],
|
|
72
|
+
org_common_job_parameters,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
cartography.intel.airbyte.sources.sync(
|
|
76
|
+
neo4j_session,
|
|
77
|
+
api_client,
|
|
78
|
+
organization["organizationId"],
|
|
79
|
+
workspace_ids,
|
|
80
|
+
org_common_job_parameters,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
cartography.intel.airbyte.destinations.sync(
|
|
84
|
+
neo4j_session,
|
|
85
|
+
api_client,
|
|
86
|
+
organization["organizationId"],
|
|
87
|
+
workspace_ids,
|
|
88
|
+
org_common_job_parameters,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
cartography.intel.airbyte.tags.sync(
|
|
92
|
+
neo4j_session,
|
|
93
|
+
api_client,
|
|
94
|
+
organization["organizationId"],
|
|
95
|
+
workspace_ids,
|
|
96
|
+
org_common_job_parameters,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
cartography.intel.airbyte.connections.sync(
|
|
100
|
+
neo4j_session,
|
|
101
|
+
api_client,
|
|
102
|
+
organization["organizationId"],
|
|
103
|
+
workspace_ids,
|
|
104
|
+
org_common_job_parameters,
|
|
105
|
+
)
|