cartography 0.105.0__py3-none-any.whl → 0.106.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (108) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +78 -2
  3. cartography/client/core/tx.py +62 -0
  4. cartography/config.py +24 -0
  5. cartography/data/indexes.cypher +0 -34
  6. cartography/driftdetect/cli.py +3 -2
  7. cartography/graph/cleanupbuilder.py +47 -0
  8. cartography/graph/job.py +42 -0
  9. cartography/graph/querybuilder.py +136 -2
  10. cartography/graph/statement.py +1 -1
  11. cartography/intel/airbyte/__init__.py +105 -0
  12. cartography/intel/airbyte/connections.py +120 -0
  13. cartography/intel/airbyte/destinations.py +81 -0
  14. cartography/intel/airbyte/organizations.py +59 -0
  15. cartography/intel/airbyte/sources.py +78 -0
  16. cartography/intel/airbyte/tags.py +64 -0
  17. cartography/intel/airbyte/users.py +106 -0
  18. cartography/intel/airbyte/util.py +122 -0
  19. cartography/intel/airbyte/workspaces.py +63 -0
  20. cartography/intel/aws/codebuild.py +132 -0
  21. cartography/intel/aws/ecs.py +228 -380
  22. cartography/intel/aws/efs.py +261 -0
  23. cartography/intel/aws/identitycenter.py +14 -3
  24. cartography/intel/aws/inspector.py +96 -53
  25. cartography/intel/aws/rds.py +2 -1
  26. cartography/intel/aws/resources.py +4 -0
  27. cartography/intel/entra/__init__.py +11 -0
  28. cartography/intel/entra/applications.py +366 -0
  29. cartography/intel/entra/users.py +84 -42
  30. cartography/intel/kubernetes/__init__.py +30 -14
  31. cartography/intel/kubernetes/clusters.py +86 -0
  32. cartography/intel/kubernetes/namespaces.py +59 -57
  33. cartography/intel/kubernetes/pods.py +140 -77
  34. cartography/intel/kubernetes/secrets.py +95 -45
  35. cartography/intel/kubernetes/services.py +131 -67
  36. cartography/intel/kubernetes/util.py +125 -14
  37. cartography/intel/scaleway/__init__.py +127 -0
  38. cartography/intel/scaleway/iam/__init__.py +0 -0
  39. cartography/intel/scaleway/iam/apikeys.py +71 -0
  40. cartography/intel/scaleway/iam/applications.py +71 -0
  41. cartography/intel/scaleway/iam/groups.py +71 -0
  42. cartography/intel/scaleway/iam/users.py +71 -0
  43. cartography/intel/scaleway/instances/__init__.py +0 -0
  44. cartography/intel/scaleway/instances/flexibleips.py +86 -0
  45. cartography/intel/scaleway/instances/instances.py +92 -0
  46. cartography/intel/scaleway/projects.py +79 -0
  47. cartography/intel/scaleway/storage/__init__.py +0 -0
  48. cartography/intel/scaleway/storage/snapshots.py +86 -0
  49. cartography/intel/scaleway/storage/volumes.py +84 -0
  50. cartography/intel/scaleway/utils.py +37 -0
  51. cartography/models/airbyte/__init__.py +0 -0
  52. cartography/models/airbyte/connection.py +138 -0
  53. cartography/models/airbyte/destination.py +75 -0
  54. cartography/models/airbyte/organization.py +19 -0
  55. cartography/models/airbyte/source.py +75 -0
  56. cartography/models/airbyte/stream.py +74 -0
  57. cartography/models/airbyte/tag.py +69 -0
  58. cartography/models/airbyte/user.py +111 -0
  59. cartography/models/airbyte/workspace.py +46 -0
  60. cartography/models/aws/codebuild/__init__.py +0 -0
  61. cartography/models/aws/codebuild/project.py +49 -0
  62. cartography/models/aws/ecs/__init__.py +0 -0
  63. cartography/models/aws/ecs/clusters.py +64 -0
  64. cartography/models/aws/ecs/container_definitions.py +93 -0
  65. cartography/models/aws/ecs/container_instances.py +84 -0
  66. cartography/models/aws/ecs/containers.py +99 -0
  67. cartography/models/aws/ecs/services.py +117 -0
  68. cartography/models/aws/ecs/task_definitions.py +135 -0
  69. cartography/models/aws/ecs/tasks.py +110 -0
  70. cartography/models/aws/efs/__init__.py +0 -0
  71. cartography/models/aws/efs/access_point.py +77 -0
  72. cartography/models/aws/efs/file_system.py +60 -0
  73. cartography/models/aws/efs/mount_target.py +79 -0
  74. cartography/models/core/common.py +1 -0
  75. cartography/models/core/relationships.py +44 -0
  76. cartography/models/entra/app_role_assignment.py +115 -0
  77. cartography/models/entra/application.py +47 -0
  78. cartography/models/entra/user.py +17 -51
  79. cartography/models/kubernetes/__init__.py +0 -0
  80. cartography/models/kubernetes/clusters.py +26 -0
  81. cartography/models/kubernetes/containers.py +108 -0
  82. cartography/models/kubernetes/namespaces.py +51 -0
  83. cartography/models/kubernetes/pods.py +80 -0
  84. cartography/models/kubernetes/secrets.py +79 -0
  85. cartography/models/kubernetes/services.py +108 -0
  86. cartography/models/scaleway/__init__.py +0 -0
  87. cartography/models/scaleway/iam/__init__.py +0 -0
  88. cartography/models/scaleway/iam/apikey.py +96 -0
  89. cartography/models/scaleway/iam/application.py +52 -0
  90. cartography/models/scaleway/iam/group.py +95 -0
  91. cartography/models/scaleway/iam/user.py +60 -0
  92. cartography/models/scaleway/instance/__init__.py +0 -0
  93. cartography/models/scaleway/instance/flexibleip.py +52 -0
  94. cartography/models/scaleway/instance/instance.py +118 -0
  95. cartography/models/scaleway/organization.py +19 -0
  96. cartography/models/scaleway/project.py +48 -0
  97. cartography/models/scaleway/storage/__init__.py +0 -0
  98. cartography/models/scaleway/storage/snapshot.py +78 -0
  99. cartography/models/scaleway/storage/volume.py +51 -0
  100. cartography/sync.py +8 -4
  101. cartography/util.py +15 -10
  102. {cartography-0.105.0.dist-info → cartography-0.106.0.dist-info}/METADATA +5 -2
  103. {cartography-0.105.0.dist-info → cartography-0.106.0.dist-info}/RECORD +107 -35
  104. cartography/data/jobs/cleanup/kubernetes_import_cleanup.json +0 -70
  105. {cartography-0.105.0.dist-info → cartography-0.106.0.dist-info}/WHEEL +0 -0
  106. {cartography-0.105.0.dist-info → cartography-0.106.0.dist-info}/entry_points.txt +0 -0
  107. {cartography-0.105.0.dist-info → cartography-0.106.0.dist-info}/licenses/LICENSE +0 -0
  108. {cartography-0.105.0.dist-info → cartography-0.106.0.dist-info}/top_level.txt +0 -0
cartography/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.105.0'
21
- __version_tuple__ = version_tuple = (0, 105, 0)
20
+ __version__ = version = '0.106.0'
21
+ __version_tuple__ = version_tuple = (0, 106, 0)
cartography/cli.py CHANGED
@@ -71,8 +71,8 @@ class CLI:
71
71
  default="bolt://localhost:7687",
72
72
  help=(
73
73
  "A valid Neo4j URI to sync against. See "
74
- "https://neo4j.com/docs/api/python-driver/current/driver.html#uri for complete documentation on the "
75
- "structure of a Neo4j URI."
74
+ "https://neo4j.com/docs/browser-manual/current/operations/dbms-connection/#uri-scheme for complete "
75
+ "documentation on the structure of a Neo4j URI."
76
76
  ),
77
77
  )
78
78
  parser.add_argument(
@@ -637,6 +637,33 @@ class CLI:
637
637
  "Required if you are using the Anthropic intel module. Ignored otherwise."
638
638
  ),
639
639
  )
640
+ parser.add_argument(
641
+ "--airbyte-client-id",
642
+ type=str,
643
+ default=None,
644
+ help=(
645
+ "The Airbyte client ID to use for authentication. "
646
+ "Required if you are using the Airbyte intel module. Ignored otherwise."
647
+ ),
648
+ )
649
+ parser.add_argument(
650
+ "--airbyte-client-secret-env-var",
651
+ type=str,
652
+ default=None,
653
+ help=(
654
+ "The name of an environment variable containing the Airbyte client secret for authentication. "
655
+ "Required if you are using the Airbyte intel module. Ignored otherwise."
656
+ ),
657
+ )
658
+ parser.add_argument(
659
+ "--airbyte-api-url",
660
+ type=str,
661
+ default="https://api.airbyte.com/v1",
662
+ help=(
663
+ "The base URL for the Airbyte API (default is the public Airbyte Cloud API). "
664
+ "Required if you are using the Airbyte intel module. Ignored otherwise."
665
+ ),
666
+ )
640
667
  parser.add_argument(
641
668
  "--trivy-s3-bucket",
642
669
  type=str,
@@ -655,6 +682,33 @@ class CLI:
655
682
  "Required if you are using the Trivy module. Ignored otherwise."
656
683
  ),
657
684
  )
685
+ parser.add_argument(
686
+ "--scaleway-org",
687
+ type=str,
688
+ default=None,
689
+ help=(
690
+ "The Scaleway organization ID to sync. "
691
+ "Required if you are using the Scaleway intel module. Ignored otherwise."
692
+ ),
693
+ )
694
+ parser.add_argument(
695
+ "--scaleway-access-key",
696
+ type=str,
697
+ default=None,
698
+ help=(
699
+ "The Scaleway access key to use for authentication. "
700
+ "Required if you are using the Scaleway intel module. Ignored otherwise."
701
+ ),
702
+ )
703
+ parser.add_argument(
704
+ "--scaleway-secret-key-env-var",
705
+ type=str,
706
+ default=None,
707
+ help=(
708
+ "The name of an environment variable containing the Scaleway secret key for authentication. "
709
+ "Required if you are using the Scaleway intel module. Ignored otherwise."
710
+ ),
711
+ )
658
712
 
659
713
  return parser
660
714
 
@@ -973,6 +1027,17 @@ class CLI:
973
1027
  else:
974
1028
  config.anthropic_apikey = None
975
1029
 
1030
+ # Airbyte config
1031
+ if config.airbyte_client_id and config.airbyte_client_secret_env_var:
1032
+ logger.debug(
1033
+ f"Reading Airbyte client secret from environment variable {config.airbyte_client_secret_env_var}",
1034
+ )
1035
+ config.airbyte_client_secret = os.environ.get(
1036
+ config.airbyte_client_secret_env_var,
1037
+ )
1038
+ else:
1039
+ config.airbyte_client_secret = None
1040
+
976
1041
  # Trivy config
977
1042
  if config.trivy_s3_bucket:
978
1043
  logger.debug(f"Trivy S3 bucket: {config.trivy_s3_bucket}")
@@ -980,6 +1045,17 @@ class CLI:
980
1045
  if config.trivy_s3_prefix:
981
1046
  logger.debug(f"Trivy S3 prefix: {config.trivy_s3_prefix}")
982
1047
 
1048
+ # Scaleway config
1049
+ if config.scaleway_secret_key_env_var:
1050
+ logger.debug(
1051
+ f"Reading Scaleway secret key from environment variable {config.scaleway_secret_key_env_var}",
1052
+ )
1053
+ config.scaleway_secret_key = os.environ.get(
1054
+ config.scaleway_secret_key_env_var,
1055
+ )
1056
+ else:
1057
+ config.scaleway_secret_key = None
1058
+
983
1059
  # Run cartography
984
1060
  try:
985
1061
  return cartography.sync.run_with_config(self.sync, config)
@@ -1,3 +1,4 @@
1
+ import logging
1
2
  from typing import Any
2
3
  from typing import Dict
3
4
  from typing import List
@@ -8,10 +9,15 @@ from typing import Union
8
9
  import neo4j
9
10
 
10
11
  from cartography.graph.querybuilder import build_create_index_queries
12
+ from cartography.graph.querybuilder import build_create_index_queries_for_matchlink
11
13
  from cartography.graph.querybuilder import build_ingestion_query
14
+ from cartography.graph.querybuilder import build_matchlink_query
12
15
  from cartography.models.core.nodes import CartographyNodeSchema
16
+ from cartography.models.core.relationships import CartographyRelSchema
13
17
  from cartography.util import batch
14
18
 
19
+ logger = logging.getLogger(__name__)
20
+
15
21
 
16
22
  def read_list_of_values_tx(
17
23
  tx: neo4j.Transaction,
@@ -255,6 +261,25 @@ def ensure_indexes(
255
261
  neo4j_session.run(query)
256
262
 
257
263
 
264
+ def ensure_indexes_for_matchlinks(
265
+ neo4j_session: neo4j.Session,
266
+ rel_schema: CartographyRelSchema,
267
+ ) -> None:
268
+ """
269
+ Creates indexes for node fields if they don't exist for the given CartographyRelSchema object.
270
+ This is only used for load_rels() where we match on and connect existing nodes.
271
+ This is not used for CartographyNodeSchema objects.
272
+ """
273
+ queries = build_create_index_queries_for_matchlink(rel_schema)
274
+ logger.debug(f"CREATE INDEX queries for {rel_schema.rel_label}: {queries}")
275
+ for query in queries:
276
+ if not query.startswith("CREATE INDEX IF NOT EXISTS"):
277
+ raise ValueError(
278
+ 'Query provided to `ensure_indexes_for_matchlinks()` does not start with "CREATE INDEX IF NOT EXISTS".',
279
+ )
280
+ neo4j_session.run(query)
281
+
282
+
258
283
  def load(
259
284
  neo4j_session: neo4j.Session,
260
285
  node_schema: CartographyNodeSchema,
@@ -276,3 +301,40 @@ def load(
276
301
  ensure_indexes(neo4j_session, node_schema)
277
302
  ingestion_query = build_ingestion_query(node_schema)
278
303
  load_graph_data(neo4j_session, ingestion_query, dict_list, **kwargs)
304
+
305
+
306
+ def load_matchlinks(
307
+ neo4j_session: neo4j.Session,
308
+ rel_schema: CartographyRelSchema,
309
+ dict_list: list[dict[str, Any]],
310
+ **kwargs,
311
+ ) -> None:
312
+ """
313
+ Main entrypoint for intel modules to write relationships to the graph between two existing nodes.
314
+ :param neo4j_session: The Neo4j session
315
+ :param rel_schema: The CartographyRelSchema object to generate a query.
316
+ :param dict_list: The data to load to the graph represented as a list of dicts. The dicts must contain the source and
317
+ target node ids.
318
+ :param kwargs: Allows additional keyword args to be supplied to the Neo4j query.
319
+ :return: None
320
+ """
321
+ if len(dict_list) == 0:
322
+ # If there is no data to load, save some time.
323
+ return
324
+
325
+ # Validate that required kwargs are provided for cleanup queries
326
+ if "_sub_resource_label" not in kwargs:
327
+ raise ValueError(
328
+ f"Required kwarg '_sub_resource_label' not provided for {rel_schema.rel_label}. "
329
+ "This is needed for cleanup queries."
330
+ )
331
+ if "_sub_resource_id" not in kwargs:
332
+ raise ValueError(
333
+ f"Required kwarg '_sub_resource_id' not provided for {rel_schema.rel_label}. "
334
+ "This is needed for cleanup queries."
335
+ )
336
+
337
+ ensure_indexes_for_matchlinks(neo4j_session, rel_schema)
338
+ matchlink_query = build_matchlink_query(rel_schema)
339
+ logger.debug(f"Matchlink query: {matchlink_query}")
340
+ load_graph_data(neo4j_session, matchlink_query, dict_list, **kwargs)
cartography/config.py CHANGED
@@ -137,10 +137,22 @@ class Config:
137
137
  :param openai_org_id: OpenAI organization id. Optional.
138
138
  :type anthropic_apikey: string
139
139
  :param anthropic_apikey: Anthropic API key. Optional.
140
+ :type airbyte_client_id: str
141
+ :param airbyte_client_id: Airbyte client ID for API authentication. Optional.
142
+ :type airbyte_client_secret: str
143
+ :param airbyte_client_secret: Airbyte client secret for API authentication. Optional.
144
+ :type airbyte_api_url: str
145
+ :param airbyte_api_url: Airbyte API base URL, e.g. https://api.airbyte.com/v1. Optional.
140
146
  :type trivy_s3_bucket: str
141
147
  :param trivy_s3_bucket: The S3 bucket name containing Trivy scan results. Optional.
142
148
  :type trivy_s3_prefix: str
143
149
  :param trivy_s3_prefix: The S3 prefix path containing Trivy scan results. Optional.
150
+ :type scaleway_access_key: str
151
+ :param scaleway_access_key: Scaleway access key. Optional.
152
+ :type scaleway_secret_key: str
153
+ :param scaleway_secret_key: Scaleway secret key. Optional.
154
+ :type scaleway_org: str
155
+ :param scaleway_org: Scaleway organization id. Optional.
144
156
  """
145
157
 
146
158
  def __init__(
@@ -213,8 +225,14 @@ class Config:
213
225
  openai_apikey=None,
214
226
  openai_org_id=None,
215
227
  anthropic_apikey=None,
228
+ airbyte_client_id=None,
229
+ airbyte_client_secret=None,
230
+ airbyte_api_url=None,
216
231
  trivy_s3_bucket=None,
217
232
  trivy_s3_prefix=None,
233
+ scaleway_access_key=None,
234
+ scaleway_secret_key=None,
235
+ scaleway_org=None,
218
236
  ):
219
237
  self.neo4j_uri = neo4j_uri
220
238
  self.neo4j_user = neo4j_user
@@ -284,5 +302,11 @@ class Config:
284
302
  self.openai_apikey = openai_apikey
285
303
  self.openai_org_id = openai_org_id
286
304
  self.anthropic_apikey = anthropic_apikey
305
+ self.airbyte_client_id = airbyte_client_id
306
+ self.airbyte_client_secret = airbyte_client_secret
307
+ self.airbyte_api_url = airbyte_api_url
287
308
  self.trivy_s3_bucket = trivy_s3_bucket
288
309
  self.trivy_s3_prefix = trivy_s3_prefix
310
+ self.scaleway_access_key = scaleway_access_key
311
+ self.scaleway_secret_key = scaleway_secret_key
312
+ self.scaleway_org = scaleway_org
@@ -99,21 +99,6 @@ CREATE INDEX IF NOT EXISTS FOR (n:ECRRepositoryImage) ON (n.tag);
99
99
  CREATE INDEX IF NOT EXISTS FOR (n:ECRRepositoryImage) ON (n.lastupdated);
100
100
  CREATE INDEX IF NOT EXISTS FOR (n:ECRScanFinding) ON (n.id);
101
101
  CREATE INDEX IF NOT EXISTS FOR (n:ECRScanFinding) ON (n.lastupdated);
102
- CREATE INDEX IF NOT EXISTS FOR (n:ECSCluster) ON (n.id);
103
- CREATE INDEX IF NOT EXISTS FOR (n:ECSCluster) ON (n.lastupdated);
104
- CREATE INDEX IF NOT EXISTS FOR (n:ECSContainerInstance) ON (n.id);
105
- CREATE INDEX IF NOT EXISTS FOR (n:ECSContainerInstance) ON (n.lastupdated);
106
- CREATE INDEX IF NOT EXISTS FOR (n:ECSService) ON (n.id);
107
- CREATE INDEX IF NOT EXISTS FOR (n:ECSService) ON (n.lastupdated);
108
- CREATE INDEX IF NOT EXISTS FOR (n:ECSTaskDefinition) ON (n.id);
109
- CREATE INDEX IF NOT EXISTS FOR (n:ECSTaskDefinition) ON (n.arn);
110
- CREATE INDEX IF NOT EXISTS FOR (n:ECSTaskDefinition) ON (n.lastupdated);
111
- CREATE INDEX IF NOT EXISTS FOR (n:ECSTask) ON (n.id);
112
- CREATE INDEX IF NOT EXISTS FOR (n:ECSTask) ON (n.lastupdated);
113
- CREATE INDEX IF NOT EXISTS FOR (n:ECSContainerDefinition) ON (n.id);
114
- CREATE INDEX IF NOT EXISTS FOR (n:ECSContainerDefinition) ON (n.lastupdated);
115
- CREATE INDEX IF NOT EXISTS FOR (n:ECSContainer) ON (n.id);
116
- CREATE INDEX IF NOT EXISTS FOR (n:ECSContainer) ON (n.lastupdated);
117
102
  CREATE INDEX IF NOT EXISTS FOR (n:ElasticacheCluster) ON (n.id);
118
103
  CREATE INDEX IF NOT EXISTS FOR (n:ElasticacheCluster) ON (n.arn);
119
104
  CREATE INDEX IF NOT EXISTS FOR (n:ElasticacheCluster) ON (n.lastupdated);
@@ -375,22 +360,3 @@ CREATE INDEX IF NOT EXISTS FOR (n:AzureDisk) ON (n.id);
375
360
  CREATE INDEX IF NOT EXISTS FOR (n:AzureDisk) ON (n.lastupdated);
376
361
  CREATE INDEX IF NOT EXISTS FOR (n:AzureSnapshot) ON (n.id);
377
362
  CREATE INDEX IF NOT EXISTS FOR (n:AzureSnapshot) ON (n.lastupdated);
378
- CREATE INDEX IF NOT EXISTS FOR (n:KubernetesCluster) ON (n.id);
379
- CREATE INDEX IF NOT EXISTS FOR (n:KubernetesCluster) ON (n.name);
380
- CREATE INDEX IF NOT EXISTS FOR (n:KubernetesCluster) ON (n.lastupdated);
381
- CREATE INDEX IF NOT EXISTS FOR (n:KubernetesNamespace) ON (n.id);
382
- CREATE INDEX IF NOT EXISTS FOR (n:KubernetesNamespace) ON (n.name);
383
- CREATE INDEX IF NOT EXISTS FOR (n:KubernetesNamespace) ON (n.lastupdated);
384
- CREATE INDEX IF NOT EXISTS FOR (n:KubernetesPod) ON (n.id);
385
- CREATE INDEX IF NOT EXISTS FOR (n:KubernetesPod) ON (n.name);
386
- CREATE INDEX IF NOT EXISTS FOR (n:KubernetesPod) ON (n.lastupdated);
387
- CREATE INDEX IF NOT EXISTS FOR (n:KubernetesContainer) ON (n.id);
388
- CREATE INDEX IF NOT EXISTS FOR (n:KubernetesContainer) ON (n.name);
389
- CREATE INDEX IF NOT EXISTS FOR (n:KubernetesContainer) ON (n.image);
390
- CREATE INDEX IF NOT EXISTS FOR (n:KubernetesContainer) ON (n.lastupdated);
391
- CREATE INDEX IF NOT EXISTS FOR (n:KubernetesSecret) ON (n.id);
392
- CREATE INDEX IF NOT EXISTS FOR (n:KubernetesSecret) ON (n.name);
393
- CREATE INDEX IF NOT EXISTS FOR (n:KubernetesSecret) ON (n.lastupdated);
394
- CREATE INDEX IF NOT EXISTS FOR (n:KubernetesService) ON (n.id);
395
- CREATE INDEX IF NOT EXISTS FOR (n:KubernetesService) ON (n.name);
396
- CREATE INDEX IF NOT EXISTS FOR (n:KubernetesService) ON (n.lastupdated);
@@ -63,8 +63,9 @@ class CLI:
63
63
  default="bolt://localhost:7687",
64
64
  help=(
65
65
  "A valid Neo4j URI to sync against. See "
66
- "https://neo4j.com/docs/api/python-driver/current/driver.html#uri for complete documentation on the "
67
- "structure of a Neo4j URI."
66
+ "https://neo4j.com/docs/browser-manual/current/operations/dbms-connection/#uri-scheme for "
67
+ "documentation on the structure of a Neo4j URI, and "
68
+ "https://neo4j.com/docs/api/python-driver/current/ for complete documentation on the Python driver."
68
69
  ),
69
70
  )
70
71
  parser_get_state.add_argument(
@@ -3,6 +3,7 @@ from string import Template
3
3
  from typing import Dict
4
4
  from typing import List
5
5
 
6
+ from cartography.graph.querybuilder import _asdict_with_validate_relprops
6
7
  from cartography.graph.querybuilder import _build_match_clause
7
8
  from cartography.graph.querybuilder import rel_present_on_node_schema
8
9
  from cartography.models.core.common import PropertyRef
@@ -334,3 +335,49 @@ def _validate_target_node_matcher_for_cleanup_job(tgm: TargetNodeMatcher):
334
335
  f"{key} has set_in_kwargs=False, please check by reviewing the full stack trace to know which object"
335
336
  f"this message was raised from. Debug information: PropertyRef name = {prop_ref.name}.",
336
337
  )
338
+
339
+
340
+ def build_cleanup_query_for_matchlink(rel_schema: CartographyRelSchema) -> str:
341
+ """
342
+ Generates a cleanup query for a matchlink relationship.
343
+ :param rel_schema: The CartographyRelSchema object to generate a query. This CartographyRelSchema object
344
+ - Must have a source_node_matcher and source_node_label defined
345
+ - Must have a CartographyRelProperties object where _sub_resource_label and _sub_resource_id are defined
346
+ :return: A Neo4j query used to clean up stale matchlink relationships.
347
+ """
348
+ if not rel_schema.source_node_matcher:
349
+ raise ValueError(
350
+ f"No source node matcher found for {rel_schema.rel_label}; returning empty list."
351
+ )
352
+
353
+ query_template = Template(
354
+ """
355
+ MATCH (from:$source_node_label)$rel_direction[r:$rel_label]$rel_direction_end(to:$target_node_label)
356
+ WHERE r.lastupdated <> $UPDATE_TAG
357
+ AND r._sub_resource_label = $sub_resource_label
358
+ AND r._sub_resource_id = $sub_resource_id
359
+ WITH r LIMIT $LIMIT_SIZE
360
+ DELETE r;
361
+ """
362
+ )
363
+
364
+ # Determine which way to point the arrow. INWARD is toward the source, otherwise we go toward the target.
365
+ if rel_schema.direction == LinkDirection.INWARD:
366
+ rel_direction = "<-"
367
+ rel_direction_end = "-"
368
+ else:
369
+ rel_direction = "-"
370
+ rel_direction_end = "->"
371
+
372
+ # Small hack: avoid type-checking errors by converting the rel_schema to a dict.
373
+ rel_props_as_dict = _asdict_with_validate_relprops(rel_schema)
374
+
375
+ return query_template.safe_substitute(
376
+ source_node_label=rel_schema.source_node_label,
377
+ target_node_label=rel_schema.target_node_label,
378
+ rel_label=rel_schema.rel_label,
379
+ rel_direction=rel_direction,
380
+ rel_direction_end=rel_direction_end,
381
+ sub_resource_label=rel_props_as_dict["_sub_resource_label"],
382
+ sub_resource_id=rel_props_as_dict["_sub_resource_id"],
383
+ )
cartography/graph/job.py CHANGED
@@ -13,9 +13,11 @@ from typing import Union
13
13
  import neo4j
14
14
 
15
15
  from cartography.graph.cleanupbuilder import build_cleanup_queries
16
+ from cartography.graph.cleanupbuilder import build_cleanup_query_for_matchlink
16
17
  from cartography.graph.statement import get_job_shortname
17
18
  from cartography.graph.statement import GraphStatement
18
19
  from cartography.models.core.nodes import CartographyNodeSchema
20
+ from cartography.models.core.relationships import CartographyRelSchema
19
21
 
20
22
  logger = logging.getLogger(__name__)
21
23
 
@@ -176,6 +178,46 @@ class GraphJob:
176
178
  node_schema.label,
177
179
  )
178
180
 
181
+ @classmethod
182
+ def from_matchlink(
183
+ cls,
184
+ rel_schema: CartographyRelSchema,
185
+ sub_resource_label: str,
186
+ sub_resource_id: str,
187
+ update_tag: int,
188
+ ) -> "GraphJob":
189
+ """
190
+ Create a cleanup job from a CartographyRelSchema object (specifically, a MatchLink).
191
+ This is used for cleaning up stale links between nodes created by load_rels(). Do not use for other purposes.
192
+
193
+ Other notes:
194
+ - For a given rel_schema, the fields used in the rel_schema.properties._sub_resource_label.name and
195
+ rel_schema.properties._sub_resource_id.name must be provided as keys and values in the params dict.
196
+ - The rel_schema must have a source_node_matcher and target_node_matcher.
197
+ """
198
+ cleanup_link_query = build_cleanup_query_for_matchlink(rel_schema)
199
+ logger.debug(f"Cleanup query: {cleanup_link_query}")
200
+
201
+ parameters = {
202
+ "UPDATE_TAG": update_tag,
203
+ "_sub_resource_label": sub_resource_label,
204
+ "_sub_resource_id": sub_resource_id,
205
+ }
206
+
207
+ statement = GraphStatement(
208
+ cleanup_link_query,
209
+ parameters=parameters,
210
+ iterative=True,
211
+ iterationsize=100,
212
+ parent_job_name=rel_schema.rel_label,
213
+ )
214
+
215
+ return cls(
216
+ f"Cleanup {rel_schema.rel_label} between {rel_schema.source_node_label} and {rel_schema.target_node_label}",
217
+ [statement],
218
+ rel_schema.rel_label,
219
+ )
220
+
179
221
  @classmethod
180
222
  def from_json_file(cls, file_path: Union[str, Path]) -> "GraphJob":
181
223
  """
@@ -14,6 +14,7 @@ from cartography.models.core.nodes import ExtraNodeLabels
14
14
  from cartography.models.core.relationships import CartographyRelSchema
15
15
  from cartography.models.core.relationships import LinkDirection
16
16
  from cartography.models.core.relationships import OtherRelationships
17
+ from cartography.models.core.relationships import SourceNodeMatcher
17
18
  from cartography.models.core.relationships import TargetNodeMatcher
18
19
 
19
20
  logger = logging.getLogger(__name__)
@@ -109,10 +110,10 @@ def _build_rel_properties_statement(
109
110
  return set_clause
110
111
 
111
112
 
112
- def _build_match_clause(matcher: TargetNodeMatcher) -> str:
113
+ def _build_match_clause(matcher: TargetNodeMatcher | SourceNodeMatcher) -> str:
113
114
  """
114
115
  Generate a Neo4j match statement on one or more keys and values for a given node.
115
- :param matcher: A TargetNodeMatcher object
116
+ :param matcher: A TargetNodeMatcher or SourceNodeMatcher object
116
117
  :return: a Neo4j match clause
117
118
  """
118
119
  match = Template("$Key: $PropRef")
@@ -548,3 +549,136 @@ def build_create_index_queries(node_schema: CartographyNodeSchema) -> List[str]:
548
549
  ],
549
550
  )
550
551
  return result
552
+
553
+
554
+ def build_create_index_queries_for_matchlink(
555
+ rel_schema: CartographyRelSchema,
556
+ ) -> list[str]:
557
+ """
558
+ Generate queries to create indexes for the given CartographyRelSchema and all node types attached to it via its
559
+ relationships.
560
+ :param rel_schema: The CartographyRelSchema object
561
+ :return: A list of queries of the form `CREATE INDEX IF NOT EXISTS FOR (n:$TargetNodeLabel) ON (n.$TargetAttribute)`
562
+ """
563
+ if not rel_schema.source_node_matcher:
564
+ logger.warning(
565
+ f"No source node matcher found for {rel_schema.rel_label}; returning empty list."
566
+ "Please note that build_create_index_queries_for_matchlink() is only used for load_matchlinks() where we match on "
567
+ "and connect existing nodes in the graph."
568
+ )
569
+ return []
570
+
571
+ index_template = Template(
572
+ "CREATE INDEX IF NOT EXISTS FOR (n:$NodeLabel) ON (n.$NodeAttribute);",
573
+ )
574
+
575
+ result = []
576
+ for source_key in asdict(rel_schema.source_node_matcher).keys():
577
+ result.append(
578
+ index_template.safe_substitute(
579
+ NodeLabel=rel_schema.source_node_label,
580
+ NodeAttribute=source_key,
581
+ ),
582
+ )
583
+ for target_key in asdict(rel_schema.target_node_matcher).keys():
584
+ result.append(
585
+ index_template.safe_substitute(
586
+ NodeLabel=rel_schema.target_node_label,
587
+ NodeAttribute=target_key,
588
+ ),
589
+ )
590
+
591
+ # Create a composite index for the relationship between the source and target nodes.
592
+ # https://neo4j.com/docs/cypher-manual/4.3/indexes-for-search-performance/#administration-indexes-create-a-composite-index-for-relationships
593
+ rel_index_template = Template(
594
+ "CREATE INDEX IF NOT EXISTS FOR ()$rel_direction[r:$RelLabel]$rel_direction_end() "
595
+ "ON (r.lastupdated, r._sub_resource_label, r._sub_resource_id);",
596
+ )
597
+ if rel_schema.direction == LinkDirection.INWARD:
598
+ result.append(
599
+ rel_index_template.safe_substitute(
600
+ RelLabel=rel_schema.rel_label,
601
+ rel_direction="<-",
602
+ rel_direction_end="-",
603
+ )
604
+ )
605
+ else:
606
+ result.append(
607
+ rel_index_template.safe_substitute(
608
+ RelLabel=rel_schema.rel_label,
609
+ rel_direction="-",
610
+ rel_direction_end="->",
611
+ )
612
+ )
613
+ return result
614
+
615
+
616
+ def build_matchlink_query(rel_schema: CartographyRelSchema) -> str:
617
+ """
618
+ Generate a Neo4j query to link two existing nodes when given a CartographyRelSchema object.
619
+ This is only used for load_matchlinks().
620
+ :param rel_schema: The CartographyRelSchema object to generate a query. This CartographyRelSchema object
621
+ - Must have a source_node_matcher and source_node_label defined
622
+ - Must have a CartographyRelProperties object where _sub_resource_label and _sub_resource_id are defined
623
+ :return: A Neo4j query that can be used to link two existing nodes.
624
+ """
625
+ if not rel_schema.source_node_matcher or not rel_schema.source_node_label:
626
+ raise ValueError(
627
+ f"No source node matcher or source node label found for {rel_schema.rel_label}. "
628
+ "MatchLink relationships require a source_node_matcher and source_node_label to be defined."
629
+ )
630
+
631
+ rel_props_as_dict = _asdict_with_validate_relprops(rel_schema)
632
+
633
+ # These are needed for the cleanup query
634
+ if "_sub_resource_label" not in rel_props_as_dict:
635
+ raise ValueError(
636
+ f"Expected _sub_resource_label to be defined on {rel_schema.properties.__class__.__name__}"
637
+ "Please include `_sub_resource_label: PropertyRef = PropertyRef('_sub_resource_label', set_in_kwargs=True)`"
638
+ )
639
+ if "_sub_resource_id" not in rel_props_as_dict:
640
+ raise ValueError(
641
+ f"Expected _sub_resource_id to be defined on {rel_schema.properties.__class__.__name__}"
642
+ "Please include `_sub_resource_id: PropertyRef = PropertyRef('_sub_resource_id', set_in_kwargs=True)`"
643
+ )
644
+
645
+ matchlink_query_template = Template(
646
+ """
647
+ UNWIND $DictList as item
648
+ $source_match
649
+ $target_match
650
+ MERGE $rel
651
+ ON CREATE SET r.firstseen = timestamp()
652
+ SET
653
+ $set_rel_properties_statement;
654
+ """
655
+ )
656
+
657
+ source_match = Template(
658
+ "MATCH (from:$source_node_label{$match_clause})"
659
+ ).safe_substitute(
660
+ source_node_label=rel_schema.source_node_label,
661
+ match_clause=_build_match_clause(rel_schema.source_node_matcher),
662
+ )
663
+
664
+ target_match = Template(
665
+ "MATCH (to:$target_node_label{$match_clause})"
666
+ ).safe_substitute(
667
+ target_node_label=rel_schema.target_node_label,
668
+ match_clause=_build_match_clause(rel_schema.target_node_matcher),
669
+ )
670
+
671
+ if rel_schema.direction == LinkDirection.INWARD:
672
+ rel = f"(from)<-[r:{rel_schema.rel_label}]-(to)"
673
+ else:
674
+ rel = f"(from)-[r:{rel_schema.rel_label}]->(to)"
675
+
676
+ return matchlink_query_template.safe_substitute(
677
+ source_match=source_match,
678
+ target_match=target_match,
679
+ rel=rel,
680
+ set_rel_properties_statement=_build_rel_properties_statement(
681
+ "r",
682
+ rel_props_as_dict,
683
+ ),
684
+ )
@@ -56,7 +56,7 @@ class GraphStatement:
56
56
 
57
57
  self.parent_job_name = parent_job_name if parent_job_name else None
58
58
  self.parent_job_sequence_num = (
59
- parent_job_sequence_num if parent_job_sequence_num else None
59
+ parent_job_sequence_num if parent_job_sequence_num else 1
60
60
  )
61
61
 
62
62
  def merge_parameters(self, parameters: Dict) -> None: