cartography 0.117.0__py3-none-any.whl → 0.119.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (107) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +31 -0
  3. cartography/client/core/tx.py +19 -3
  4. cartography/config.py +14 -0
  5. cartography/data/indexes.cypher +0 -6
  6. cartography/graph/job.py +13 -7
  7. cartography/graph/statement.py +4 -0
  8. cartography/intel/aws/__init__.py +22 -9
  9. cartography/intel/aws/apigateway.py +18 -5
  10. cartography/intel/aws/ec2/elastic_ip_addresses.py +3 -1
  11. cartography/intel/aws/ec2/internet_gateways.py +4 -2
  12. cartography/intel/aws/ec2/load_balancer_v2s.py +11 -5
  13. cartography/intel/aws/ec2/network_interfaces.py +4 -0
  14. cartography/intel/aws/ec2/reserved_instances.py +3 -1
  15. cartography/intel/aws/ec2/tgw.py +11 -5
  16. cartography/intel/aws/ec2/volumes.py +1 -1
  17. cartography/intel/aws/ecr.py +209 -26
  18. cartography/intel/aws/ecr_image_layers.py +143 -42
  19. cartography/intel/aws/elasticsearch.py +13 -4
  20. cartography/intel/aws/identitycenter.py +93 -54
  21. cartography/intel/aws/inspector.py +90 -46
  22. cartography/intel/aws/permission_relationships.py +3 -3
  23. cartography/intel/aws/resourcegroupstaggingapi.py +1 -1
  24. cartography/intel/aws/s3.py +26 -13
  25. cartography/intel/aws/ssm.py +3 -5
  26. cartography/intel/azure/compute.py +9 -4
  27. cartography/intel/azure/cosmosdb.py +31 -15
  28. cartography/intel/azure/sql.py +25 -12
  29. cartography/intel/azure/storage.py +19 -9
  30. cartography/intel/azure/subscription.py +3 -1
  31. cartography/intel/crowdstrike/spotlight.py +5 -2
  32. cartography/intel/entra/app_role_assignments.py +9 -2
  33. cartography/intel/gcp/__init__.py +26 -9
  34. cartography/intel/gcp/clients.py +8 -4
  35. cartography/intel/gcp/compute.py +42 -21
  36. cartography/intel/gcp/crm/folders.py +9 -3
  37. cartography/intel/gcp/crm/orgs.py +8 -3
  38. cartography/intel/gcp/crm/projects.py +14 -3
  39. cartography/intel/github/repos.py +23 -5
  40. cartography/intel/gsuite/__init__.py +12 -8
  41. cartography/intel/gsuite/groups.py +291 -0
  42. cartography/intel/gsuite/users.py +142 -0
  43. cartography/intel/jamf/computers.py +7 -1
  44. cartography/intel/oci/iam.py +23 -9
  45. cartography/intel/oci/organizations.py +3 -1
  46. cartography/intel/oci/utils.py +28 -5
  47. cartography/intel/okta/awssaml.py +9 -8
  48. cartography/intel/okta/users.py +1 -1
  49. cartography/intel/ontology/__init__.py +44 -0
  50. cartography/intel/ontology/devices.py +54 -0
  51. cartography/intel/ontology/users.py +54 -0
  52. cartography/intel/ontology/utils.py +121 -0
  53. cartography/intel/pagerduty/escalation_policies.py +13 -6
  54. cartography/intel/pagerduty/schedules.py +9 -4
  55. cartography/intel/pagerduty/services.py +7 -3
  56. cartography/intel/pagerduty/teams.py +5 -2
  57. cartography/intel/pagerduty/users.py +3 -1
  58. cartography/intel/pagerduty/vendors.py +3 -1
  59. cartography/intel/trivy/__init__.py +109 -58
  60. cartography/models/airbyte/user.py +4 -0
  61. cartography/models/anthropic/user.py +4 -0
  62. cartography/models/aws/ec2/networkinterfaces.py +2 -0
  63. cartography/models/aws/ecr/image.py +55 -0
  64. cartography/models/aws/ecr/repository_image.py +1 -1
  65. cartography/models/aws/iam/group_membership.py +3 -2
  66. cartography/models/aws/identitycenter/awsssouser.py +3 -1
  67. cartography/models/bigfix/bigfix_computer.py +1 -1
  68. cartography/models/cloudflare/member.py +4 -0
  69. cartography/models/crowdstrike/hosts.py +1 -1
  70. cartography/models/duo/endpoint.py +1 -1
  71. cartography/models/duo/phone.py +2 -2
  72. cartography/models/duo/user.py +4 -0
  73. cartography/models/entra/user.py +2 -1
  74. cartography/models/github/users.py +4 -0
  75. cartography/models/gsuite/__init__.py +0 -0
  76. cartography/models/gsuite/group.py +218 -0
  77. cartography/models/gsuite/tenant.py +29 -0
  78. cartography/models/gsuite/user.py +107 -0
  79. cartography/models/kandji/device.py +1 -2
  80. cartography/models/keycloak/user.py +4 -0
  81. cartography/models/lastpass/user.py +4 -0
  82. cartography/models/ontology/__init__.py +0 -0
  83. cartography/models/ontology/device.py +125 -0
  84. cartography/models/ontology/mapping/__init__.py +16 -0
  85. cartography/models/ontology/mapping/data/__init__.py +1 -0
  86. cartography/models/ontology/mapping/data/devices.py +160 -0
  87. cartography/models/ontology/mapping/data/users.py +239 -0
  88. cartography/models/ontology/mapping/specs.py +65 -0
  89. cartography/models/ontology/user.py +52 -0
  90. cartography/models/openai/user.py +4 -0
  91. cartography/models/scaleway/iam/user.py +4 -0
  92. cartography/models/snipeit/asset.py +1 -0
  93. cartography/models/snipeit/user.py +4 -0
  94. cartography/models/tailscale/device.py +1 -1
  95. cartography/models/tailscale/user.py +6 -1
  96. cartography/rules/data/frameworks/mitre_attack/requirements/t1098_account_manipulation/__init__.py +176 -89
  97. cartography/sync.py +4 -1
  98. cartography/util.py +49 -18
  99. {cartography-0.117.0.dist-info → cartography-0.119.0.dist-info}/METADATA +3 -3
  100. {cartography-0.117.0.dist-info → cartography-0.119.0.dist-info}/RECORD +104 -89
  101. cartography/data/jobs/cleanup/gsuite_ingest_groups_cleanup.json +0 -23
  102. cartography/data/jobs/cleanup/gsuite_ingest_users_cleanup.json +0 -11
  103. cartography/intel/gsuite/api.py +0 -355
  104. {cartography-0.117.0.dist-info → cartography-0.119.0.dist-info}/WHEEL +0 -0
  105. {cartography-0.117.0.dist-info → cartography-0.119.0.dist-info}/entry_points.txt +0 -0
  106. {cartography-0.117.0.dist-info → cartography-0.119.0.dist-info}/licenses/LICENSE +0 -0
  107. {cartography-0.117.0.dist-info → cartography-0.119.0.dist-info}/top_level.txt +0 -0
cartography/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.117.0'
32
- __version_tuple__ = version_tuple = (0, 117, 0)
31
+ __version__ = version = '0.119.0'
32
+ __version_tuple__ = version_tuple = (0, 119, 0)
33
33
 
34
34
  __commit_id__ = commit_id = None
cartography/cli.py CHANGED
@@ -279,6 +279,17 @@ class CLI:
279
279
  "Example: 'HIGH' will sync only HIGH and CRITICAL findings, filtering out LOW and MEDIUM severity findings."
280
280
  ),
281
281
  )
282
+ parser.add_argument(
283
+ "--experimental-aws-inspector-batch",
284
+ type=int,
285
+ default=1000,
286
+ help=(
287
+ "EXPERIMENTAL: This feature is experimental and may be removed in the future. "
288
+ "Batch size for AWS Inspector findings sync. Controls how many findings are fetched, processed and cleaned up at a time. "
289
+ "Default is 1000. Increase this value if you have a large number of findings and want to reduce API calls, "
290
+ "or decrease it if you're experiencing memory issues."
291
+ ),
292
+ )
282
293
  parser.add_argument(
283
294
  "--analysis-job-directory",
284
295
  type=str,
@@ -719,6 +730,26 @@ class CLI:
719
730
  "Required if you are using the Trivy module. Ignored otherwise."
720
731
  ),
721
732
  )
733
+ parser.add_argument(
734
+ "--ontology-users-source",
735
+ type=str,
736
+ default=None,
737
+ help=(
738
+ "Comma-separated list of sources of truth for user data in the ontology. "
739
+ "'User' nodes will only be created for users that exist in one of the sources. "
740
+ "Required if you are using the ontology module. Ignored otherwise."
741
+ ),
742
+ )
743
+ parser.add_argument(
744
+ "--ontology-devices-source",
745
+ type=str,
746
+ default=None,
747
+ help=(
748
+ "Comma-separated list of sources of truth for client computer data in the ontology. "
749
+ "'Device' nodes will only be created for groups that exist in one of the sources. "
750
+ "Required if you are using the ontology module. Ignored otherwise."
751
+ ),
752
+ )
722
753
  parser.add_argument(
723
754
  "--trivy-results-dir",
724
755
  type=str,
@@ -249,6 +249,7 @@ def load_graph_data(
249
249
  neo4j_session: neo4j.Session,
250
250
  query: str,
251
251
  dict_list: List[Dict[str, Any]],
252
+ batch_size: int = 10000,
252
253
  **kwargs,
253
254
  ) -> None:
254
255
  """
@@ -257,10 +258,13 @@ def load_graph_data(
257
258
  :param query: The Neo4j write query to run. This query is not meant to be handwritten, rather it should be generated
258
259
  with cartography.graph.querybuilder.build_ingestion_query().
259
260
  :param dict_list: The data to load to the graph represented as a list of dicts.
261
+ :param batch_size: The number of items to process per transaction. Defaults to 10000.
260
262
  :param kwargs: Allows additional keyword args to be supplied to the Neo4j query.
261
263
  :return: None
262
264
  """
263
- for data_batch in batch(dict_list, size=10000):
265
+ if batch_size <= 0:
266
+ raise ValueError(f"batch_size must be greater than 0, got {batch_size}")
267
+ for data_batch in batch(dict_list, size=batch_size):
264
268
  neo4j_session.write_transaction(
265
269
  write_list_of_dicts_tx,
266
270
  query,
@@ -316,6 +320,7 @@ def load(
316
320
  neo4j_session: neo4j.Session,
317
321
  node_schema: CartographyNodeSchema,
318
322
  dict_list: List[Dict[str, Any]],
323
+ batch_size: int = 10000,
319
324
  **kwargs,
320
325
  ) -> None:
321
326
  """
@@ -324,21 +329,27 @@ def load(
324
329
  :param neo4j_session: The Neo4j session
325
330
  :param node_schema: The CartographyNodeSchema object to create indexes for and generate a query.
326
331
  :param dict_list: The data to load to the graph represented as a list of dicts.
332
+ :param batch_size: The number of items to process per transaction. Defaults to 10000.
327
333
  :param kwargs: Allows additional keyword args to be supplied to the Neo4j query.
328
334
  :return: None
329
335
  """
336
+ if batch_size <= 0:
337
+ raise ValueError(f"batch_size must be greater than 0, got {batch_size}")
330
338
  if len(dict_list) == 0:
331
339
  # If there is no data to load, save some time.
332
340
  return
333
341
  ensure_indexes(neo4j_session, node_schema)
334
342
  ingestion_query = build_ingestion_query(node_schema)
335
- load_graph_data(neo4j_session, ingestion_query, dict_list, **kwargs)
343
+ load_graph_data(
344
+ neo4j_session, ingestion_query, dict_list, batch_size=batch_size, **kwargs
345
+ )
336
346
 
337
347
 
338
348
  def load_matchlinks(
339
349
  neo4j_session: neo4j.Session,
340
350
  rel_schema: CartographyRelSchema,
341
351
  dict_list: list[dict[str, Any]],
352
+ batch_size: int = 10000,
342
353
  **kwargs,
343
354
  ) -> None:
344
355
  """
@@ -347,9 +358,12 @@ def load_matchlinks(
347
358
  :param rel_schema: The CartographyRelSchema object to generate a query.
348
359
  :param dict_list: The data to load to the graph represented as a list of dicts. The dicts must contain the source and
349
360
  target node ids.
361
+ :param batch_size: The number of items to process per transaction. Defaults to 10000.
350
362
  :param kwargs: Allows additional keyword args to be supplied to the Neo4j query.
351
363
  :return: None
352
364
  """
365
+ if batch_size <= 0:
366
+ raise ValueError(f"batch_size must be greater than 0, got {batch_size}")
353
367
  if len(dict_list) == 0:
354
368
  # If there is no data to load, save some time.
355
369
  return
@@ -369,4 +383,6 @@ def load_matchlinks(
369
383
  ensure_indexes_for_matchlinks(neo4j_session, rel_schema)
370
384
  matchlink_query = build_matchlink_query(rel_schema)
371
385
  logger.debug(f"Matchlink query: {matchlink_query}")
372
- load_graph_data(neo4j_session, matchlink_query, dict_list, **kwargs)
386
+ load_graph_data(
387
+ neo4j_session, matchlink_query, dict_list, batch_size=batch_size, **kwargs
388
+ )
cartography/config.py CHANGED
@@ -58,6 +58,9 @@ class Config:
58
58
  :type aws_guardduty_severity_threshold: str
59
59
  :param aws_guardduty_severity_threshold: GuardDuty severity threshold filter. Only findings at or above this
60
60
  severity level will be synced. Valid values: LOW, MEDIUM, HIGH, CRITICAL. Optional.
61
+ :type experimental_aws_inspector_batch: int
62
+ :param experimental_aws_inspector_batch: EXPERIMENTAL: Batch size for AWS Inspector findings sync. Controls how
63
+ many findings are fetched, processed and cleaned up at a time. Default is 1000. Optional.
61
64
  :type analysis_job_directory: str
62
65
  :param analysis_job_directory: Path to a directory tree containing analysis jobs to run. Optional.
63
66
  :type oci_sync_all_profiles: bool
@@ -158,6 +161,11 @@ class Config:
158
161
  :param trivy_s3_bucket: The S3 bucket name containing Trivy scan results. Optional.
159
162
  :type trivy_s3_prefix: str
160
163
  :param trivy_s3_prefix: The S3 prefix path containing Trivy scan results. Optional.
164
+ :type ontology_users_source: str
165
+ :param ontology_users_source: Comma-separated list of sources of truth for user data in the ontology. Optional.
166
+ :type ontology_devices_source: str
167
+ :param ontology_devices_source: Comma-separated list of sources of truth for client computers data in the ontology.
168
+ Optional.
161
169
  :type trivy_results_dir: str
162
170
  :param trivy_results_dir: Local directory containing Trivy scan results. Optional.
163
171
  :type scaleway_access_key: str
@@ -195,6 +203,7 @@ class Config:
195
203
  aws_regions=None,
196
204
  aws_best_effort_mode=False,
197
205
  aws_cloudtrail_management_events_lookback_hours=None,
206
+ experimental_aws_inspector_batch=1000,
198
207
  azure_sync_all_subscriptions=False,
199
208
  azure_sp_auth=None,
200
209
  azure_tenant_id=None,
@@ -262,6 +271,8 @@ class Config:
262
271
  airbyte_api_url=None,
263
272
  trivy_s3_bucket=None,
264
273
  trivy_s3_prefix=None,
274
+ ontology_users_source=None,
275
+ ontology_devices_source=None,
265
276
  trivy_results_dir=None,
266
277
  scaleway_access_key=None,
267
278
  scaleway_secret_key=None,
@@ -287,6 +298,7 @@ class Config:
287
298
  self.aws_cloudtrail_management_events_lookback_hours = (
288
299
  aws_cloudtrail_management_events_lookback_hours
289
300
  )
301
+ self.experimental_aws_inspector_batch = experimental_aws_inspector_batch
290
302
  self.azure_sync_all_subscriptions = azure_sync_all_subscriptions
291
303
  self.azure_sp_auth = azure_sp_auth
292
304
  self.azure_tenant_id = azure_tenant_id
@@ -354,6 +366,8 @@ class Config:
354
366
  self.airbyte_api_url = airbyte_api_url
355
367
  self.trivy_s3_bucket = trivy_s3_bucket
356
368
  self.trivy_s3_prefix = trivy_s3_prefix
369
+ self.ontology_users_source = ontology_users_source
370
+ self.ontology_devices_source = ontology_devices_source
357
371
  self.trivy_results_dir = trivy_results_dir
358
372
  self.scaleway_access_key = scaleway_access_key
359
373
  self.scaleway_secret_key = scaleway_secret_key
@@ -102,12 +102,6 @@ CREATE INDEX IF NOT EXISTS FOR (n:GCPVpc) ON (n.id);
102
102
  CREATE INDEX IF NOT EXISTS FOR (n:GCPVpc) ON (n.lastupdated);
103
103
  CREATE INDEX IF NOT EXISTS FOR (n:GitHubRepository) ON (n.id);
104
104
  CREATE INDEX IF NOT EXISTS FOR (n:GitHubRepository) ON (n.lastupdated);
105
- CREATE INDEX IF NOT EXISTS FOR (n:GSuiteGroup) ON (n.email);
106
- CREATE INDEX IF NOT EXISTS FOR (n:GSuiteGroup) ON (n.id);
107
- CREATE INDEX IF NOT EXISTS FOR (n:GSuiteGroup) ON (n.lastupdated);
108
- CREATE INDEX IF NOT EXISTS FOR (n:GSuiteUser) ON (n.email);
109
- CREATE INDEX IF NOT EXISTS FOR (n:GSuiteUser) ON (n.id);
110
- CREATE INDEX IF NOT EXISTS FOR (n:GSuiteUser) ON (n.lastupdated);
111
105
  CREATE INDEX IF NOT EXISTS FOR (n:Ip) ON (n.id);
112
106
  CREATE INDEX IF NOT EXISTS FOR (n:Ip) ON (n.ip);
113
107
  CREATE INDEX IF NOT EXISTS FOR (n:Ip) ON (n.lastupdated);
cartography/graph/job.py CHANGED
@@ -125,11 +125,13 @@ class GraphJob:
125
125
  }
126
126
 
127
127
  @classmethod
128
- def from_json(cls, blob: str, short_name: Optional[str] = None) -> "GraphJob":
128
+ def from_json(
129
+ cls, blob: Union[str, dict], short_name: Optional[str] = None
130
+ ) -> "GraphJob":
129
131
  """
130
- Create a job from a JSON blob.
132
+ Create a job from a JSON dict or blob.
131
133
  """
132
- data: Dict = json.loads(blob)
134
+ data = json.loads(blob) if isinstance(blob, str) else blob
133
135
  statements = _get_statements_from_json(data, short_name)
134
136
  name = data["name"]
135
137
  return cls(name, statements, short_name)
@@ -139,11 +141,13 @@ class GraphJob:
139
141
  cls,
140
142
  node_schema: CartographyNodeSchema,
141
143
  parameters: Dict[str, Any],
144
+ iterationsize: int = 100,
142
145
  ) -> "GraphJob":
143
146
  """
144
147
  Create a cleanup job from a CartographyNodeSchema object.
145
148
  For a given node, the fields used in the node_schema.sub_resource_relationship.target_node_node_matcher.keys()
146
149
  must be provided as keys and values in the params dict.
150
+ :param iterationsize: The number of items to process in each iteration. Defaults to 100.
147
151
  """
148
152
  queries: List[str] = build_cleanup_queries(node_schema)
149
153
 
@@ -165,7 +169,7 @@ class GraphJob:
165
169
  query,
166
170
  parameters=parameters,
167
171
  iterative=True,
168
- iterationsize=100,
172
+ iterationsize=iterationsize,
169
173
  parent_job_name=node_schema.label,
170
174
  parent_job_sequence_num=idx,
171
175
  )
@@ -185,6 +189,7 @@ class GraphJob:
185
189
  sub_resource_label: str,
186
190
  sub_resource_id: str,
187
191
  update_tag: int,
192
+ iterationsize: int = 100,
188
193
  ) -> "GraphJob":
189
194
  """
190
195
  Create a cleanup job from a CartographyRelSchema object (specifically, a MatchLink).
@@ -194,6 +199,7 @@ class GraphJob:
194
199
  - For a given rel_schema, the fields used in the rel_schema.properties._sub_resource_label.name and
195
200
  rel_schema.properties._sub_resource_id.name must be provided as keys and values in the params dict.
196
201
  - The rel_schema must have a source_node_matcher and target_node_matcher.
202
+ :param iterationsize: The number of items to process in each iteration. Defaults to 100.
197
203
  """
198
204
  cleanup_link_query = build_cleanup_query_for_matchlink(rel_schema)
199
205
  logger.debug(f"Cleanup query: {cleanup_link_query}")
@@ -208,7 +214,7 @@ class GraphJob:
208
214
  cleanup_link_query,
209
215
  parameters=parameters,
210
216
  iterative=True,
211
- iterationsize=100,
217
+ iterationsize=iterationsize,
212
218
  parent_job_name=rel_schema.rel_label,
213
219
  )
214
220
 
@@ -238,12 +244,12 @@ class GraphJob:
238
244
  def run_from_json(
239
245
  cls,
240
246
  neo4j_session: neo4j.Session,
241
- blob: str,
247
+ blob: Union[str, dict],
242
248
  parameters: Dict,
243
249
  short_name: Optional[str] = None,
244
250
  ) -> None:
245
251
  """
246
- Run a job from a JSON blob. This will deserialize the job and execute all statements sequentially.
252
+ Run a job from a JSON dict or blob. This will deserialize the job and execute all statements sequentially.
247
253
  """
248
254
  if not parameters:
249
255
  parameters = {}
@@ -52,6 +52,10 @@ class GraphStatement:
52
52
  self.parameters = parameters or {}
53
53
  self.iterative = iterative
54
54
  self.iterationsize = iterationsize
55
+ if iterationsize < 0:
56
+ raise ValueError(
57
+ f"iterationsize must be a positive integer, got {iterationsize}",
58
+ )
55
59
  self.parameters["LIMIT_SIZE"] = self.iterationsize
56
60
 
57
61
  self.parent_job_name = parent_job_name if parent_job_name else None
@@ -6,6 +6,7 @@ from typing import Dict
6
6
  from typing import Iterable
7
7
  from typing import List
8
8
 
9
+ import aioboto3
9
10
  import boto3
10
11
  import botocore.exceptions
11
12
  import neo4j
@@ -49,12 +50,13 @@ def _build_aws_sync_kwargs(
49
50
 
50
51
  def _sync_one_account(
51
52
  neo4j_session: neo4j.Session,
52
- boto3_session: boto3.session.Session,
53
+ boto3_session: boto3.Session,
53
54
  current_aws_account_id: str,
54
55
  update_tag: int,
55
56
  common_job_parameters: Dict[str, Any],
56
57
  regions: list[str] | None = None,
57
58
  aws_requested_syncs: Iterable[str] = RESOURCE_FUNCTIONS.keys(),
59
+ aioboto3_session: aioboto3.Session = aioboto3.Session(),
58
60
  ) -> None:
59
61
  # Autodiscover the regions supported by the account unless the user has specified the regions to sync.
60
62
  if not regions:
@@ -72,13 +74,20 @@ def _sync_one_account(
72
74
  for func_name in aws_requested_syncs:
73
75
  if func_name in RESOURCE_FUNCTIONS:
74
76
  # Skip permission relationships and tags for now because they rely on data already being in the graph
75
- if func_name not in [
76
- "permission_relationships",
77
- "resourcegroupstaggingapi",
78
- ]:
79
- RESOURCE_FUNCTIONS[func_name](**sync_args)
80
- else:
77
+ if func_name == "ecr:image_layers":
78
+ # has a different signature than the other functions (aioboto3_session replaces boto3_session)
79
+ RESOURCE_FUNCTIONS[func_name](
80
+ neo4j_session,
81
+ aioboto3_session,
82
+ regions,
83
+ current_aws_account_id,
84
+ update_tag,
85
+ common_job_parameters,
86
+ )
87
+ elif func_name in ["permission_relationships", "resourcegroupstaggingapi"]:
81
88
  continue
89
+ else:
90
+ RESOURCE_FUNCTIONS[func_name](**sync_args)
82
91
  else:
83
92
  raise ValueError(
84
93
  f'AWS sync function "{func_name}" was specified but does not exist. Did you misspell it?',
@@ -115,7 +124,7 @@ def _sync_one_account(
115
124
 
116
125
 
117
126
  def _autodiscover_account_regions(
118
- boto3_session: boto3.session.Session,
127
+ boto3_session: boto3.Session,
119
128
  account_id: str,
120
129
  ) -> List[str]:
121
130
  regions: List[str] = []
@@ -136,7 +145,7 @@ def _autodiscover_account_regions(
136
145
 
137
146
  def _autodiscover_accounts(
138
147
  neo4j_session: neo4j.Session,
139
- boto3_session: boto3.session.Session,
148
+ boto3_session: boto3.Session,
140
149
  account_id: str,
141
150
  sync_tag: int,
142
151
  common_job_parameters: Dict,
@@ -197,8 +206,10 @@ def _sync_multiple_accounts(
197
206
  if num_accounts == 1:
198
207
  # Use the default boto3 session because boto3 gets confused if you give it a profile name with 1 account
199
208
  boto3_session = boto3.Session()
209
+ aioboto3_session = aioboto3.Session()
200
210
  else:
201
211
  boto3_session = boto3.Session(profile_name=profile_name)
212
+ aioboto3_session = aioboto3.Session(profile_name=profile_name)
202
213
 
203
214
  _autodiscover_accounts(
204
215
  neo4j_session,
@@ -217,6 +228,7 @@ def _sync_multiple_accounts(
217
228
  common_job_parameters,
218
229
  regions=regions,
219
230
  aws_requested_syncs=aws_requested_syncs, # Could be replaced later with per-account requested syncs
231
+ aioboto3_session=aioboto3_session,
220
232
  )
221
233
  except Exception as e:
222
234
  if aws_best_effort_mode:
@@ -312,6 +324,7 @@ def start_aws_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
312
324
  "permission_relationships_file": config.permission_relationships_file,
313
325
  "aws_guardduty_severity_threshold": config.aws_guardduty_severity_threshold,
314
326
  "aws_cloudtrail_management_events_lookback_hours": config.aws_cloudtrail_management_events_lookback_hours,
327
+ "experimental_aws_inspector_batch": config.experimental_aws_inspector_batch,
315
328
  }
316
329
  try:
317
330
  boto3_session = boto3.Session()
@@ -178,11 +178,24 @@ def get_rest_api_resources_methods_integrations(
178
178
  method["apiId"] = api["id"]
179
179
  method["httpMethod"] = http_method
180
180
  methods.append(method)
181
- integration = client.get_integration(
182
- restApiId=api["id"],
183
- resourceId=resource_id,
184
- httpMethod=http_method,
185
- )
181
+ try:
182
+ integration = client.get_integration(
183
+ restApiId=api["id"],
184
+ resourceId=resource_id,
185
+ httpMethod=http_method,
186
+ )
187
+ except ClientError as e:
188
+ error_code = e.response.get("Error", {}).get("Code")
189
+ if error_code == "NotFoundException":
190
+ logger.warning(
191
+ "No integration found for API %s resource %s method %s: %s",
192
+ api["id"],
193
+ resource_id,
194
+ http_method,
195
+ e,
196
+ )
197
+ continue
198
+ raise
186
199
  integration["resourceId"] = resource_id
187
200
  integration["apiId"] = api["id"]
188
201
  integration["integrationHttpMethod"] = integration.get("httpMethod")
@@ -6,6 +6,7 @@ import boto3
6
6
  import neo4j
7
7
  from botocore.exceptions import ClientError
8
8
 
9
+ from cartography.client.core.tx import run_write_query
9
10
  from cartography.util import aws_handle_regions
10
11
  from cartography.util import run_cleanup_job
11
12
  from cartography.util import timeit
@@ -83,7 +84,8 @@ def load_elastic_ip_addresses(
83
84
  SET r.lastupdated = $update_tag
84
85
  """
85
86
 
86
- neo4j_session.run(
87
+ run_write_query(
88
+ neo4j_session,
87
89
  ingest_addresses,
88
90
  elastic_ip_addresses=elastic_ip_addresses,
89
91
  Region=region,
@@ -5,6 +5,7 @@ from typing import List
5
5
  import boto3
6
6
  import neo4j
7
7
 
8
+ from cartography.client.core.tx import run_write_query
8
9
  from cartography.util import aws_handle_regions
9
10
  from cartography.util import run_cleanup_job
10
11
  from cartography.util import timeit
@@ -63,13 +64,14 @@ def load_internet_gateways(
63
64
  SET r.lastupdated = $aws_update_tag
64
65
  """
65
66
 
66
- neo4j_session.run(
67
+ run_write_query(
68
+ neo4j_session,
67
69
  query,
68
70
  internet_gateways=internet_gateways,
69
71
  region=region,
70
72
  aws_account_id=current_aws_account_id,
71
73
  aws_update_tag=update_tag,
72
- ).consume()
74
+ )
73
75
 
74
76
 
75
77
  @timeit
@@ -6,6 +6,7 @@ import boto3
6
6
  import botocore
7
7
  import neo4j
8
8
 
9
+ from cartography.client.core.tx import run_write_query
9
10
  from cartography.util import aws_handle_regions
10
11
  from cartography.util import run_cleanup_job
11
12
  from cartography.util import timeit
@@ -104,7 +105,8 @@ def load_load_balancer_v2s(
104
105
  logger.warning("Skipping load balancer entry with missing DNSName: %r", lb)
105
106
  continue
106
107
 
107
- neo4j_session.run(
108
+ run_write_query(
109
+ neo4j_session,
108
110
  ingest_load_balancer_v2,
109
111
  ID=load_balancer_id,
110
112
  CREATED_TIME=str(lb["CreatedTime"]),
@@ -138,7 +140,8 @@ def load_load_balancer_v2s(
138
140
  SET r.lastupdated = $update_tag
139
141
  """
140
142
  for group in lb["SecurityGroups"]:
141
- neo4j_session.run(
143
+ run_write_query(
144
+ neo4j_session,
142
145
  ingest_load_balancer_v2_security_group,
143
146
  ID=load_balancer_id,
144
147
  GROUP_ID=str(group),
@@ -182,7 +185,8 @@ def load_load_balancer_v2_subnets(
182
185
  SET r.lastupdated = $update_tag
183
186
  """
184
187
  for az in az_data:
185
- neo4j_session.run(
188
+ run_write_query(
189
+ neo4j_session,
186
190
  ingest_load_balancer_subnet,
187
191
  ID=load_balancer_id,
188
192
  SubnetId=az["SubnetId"],
@@ -219,7 +223,8 @@ def load_load_balancer_v2_target_groups(
219
223
  continue
220
224
 
221
225
  for instance in target_group["Targets"]:
222
- neo4j_session.run(
226
+ run_write_query(
227
+ neo4j_session,
223
228
  ingest_instances,
224
229
  ID=load_balancer_id,
225
230
  INSTANCE_ID=instance,
@@ -253,7 +258,8 @@ def load_load_balancer_v2_listeners(
253
258
  ON CREATE SET r.firstseen = timestamp()
254
259
  SET r.lastupdated = $update_tag
255
260
  """
256
- neo4j_session.run(
261
+ run_write_query(
262
+ neo4j_session,
257
263
  ingest_listener,
258
264
  LoadBalancerId=load_balancer_id,
259
265
  Listeners=listener_data,
@@ -98,6 +98,10 @@ def transform_network_interface_data(
98
98
  "SourceDestCheck": network_interface["SourceDestCheck"],
99
99
  "Status": network_interface["Status"],
100
100
  "SubnetId": network_interface["SubnetId"],
101
+ "AttachTime": network_interface.get("Attachment", {}).get("AttachTime"),
102
+ "DeviceIndex": network_interface.get("Attachment", {}).get(
103
+ "DeviceIndex"
104
+ ),
101
105
  "ElbV1Id": elb_v1_id,
102
106
  "ElbV2Id": elb_v2_id,
103
107
  },
@@ -6,6 +6,7 @@ import boto3
6
6
  import neo4j
7
7
  from botocore.exceptions import ClientError
8
8
 
9
+ from cartography.client.core.tx import run_write_query
9
10
  from cartography.util import aws_handle_regions
10
11
  from cartography.util import run_cleanup_job
11
12
  from cartography.util import timeit
@@ -64,7 +65,8 @@ def load_reserved_instances(
64
65
  r_instance["Start"] = str(r_instance["Start"])
65
66
  r_instance["End"] = str(r_instance["End"])
66
67
 
67
- neo4j_session.run(
68
+ run_write_query(
69
+ neo4j_session,
68
70
  ingest_reserved_instances,
69
71
  reserved_instances_list=data,
70
72
  AWS_ACCOUNT_ID=current_aws_account_id,
@@ -6,6 +6,7 @@ import boto3
6
6
  import botocore.exceptions
7
7
  import neo4j
8
8
 
9
+ from cartography.client.core.tx import run_write_query
9
10
  from cartography.util import aws_handle_regions
10
11
  from cartography.util import run_cleanup_job
11
12
  from cartography.util import timeit
@@ -120,7 +121,8 @@ def load_transit_gateways(
120
121
  for tgw in data:
121
122
  tgw_id = tgw["TransitGatewayId"]
122
123
 
123
- neo4j_session.run(
124
+ run_write_query(
125
+ neo4j_session,
124
126
  ingest_transit_gateway,
125
127
  TgwId=tgw_id,
126
128
  ARN=tgw["TransitGatewayArn"],
@@ -161,7 +163,8 @@ def _attach_shared_transit_gateway(
161
163
  """
162
164
 
163
165
  if tgw["OwnerId"] != current_aws_account_id:
164
- neo4j_session.run(
166
+ run_write_query(
167
+ neo4j_session,
165
168
  attach_tgw,
166
169
  ARN=tgw["TransitGatewayArn"],
167
170
  TransitGatewayId=tgw["TransitGatewayId"],
@@ -202,7 +205,8 @@ def load_tgw_attachments(
202
205
  for tgwa in data:
203
206
  tgwa_id = tgwa["TransitGatewayAttachmentId"]
204
207
 
205
- neo4j_session.run(
208
+ run_write_query(
209
+ neo4j_session,
206
210
  ingest_transit_gateway,
207
211
  TgwAttachmentId=tgwa_id,
208
212
  TransitGatewayId=tgwa["TransitGatewayId"],
@@ -261,7 +265,8 @@ def _attach_tgw_vpc_attachment_to_vpc_subnets(
261
265
  SET p.lastupdated = $update_tag
262
266
  """
263
267
 
264
- neo4j_session.run(
268
+ run_write_query(
269
+ neo4j_session,
265
270
  attach_vpc_tgw_attachment_to_vpc,
266
271
  VpcId=tgw_vpc_attachment["VpcId"],
267
272
  TgwAttachmentId=tgw_vpc_attachment["TransitGatewayAttachmentId"],
@@ -269,7 +274,8 @@ def _attach_tgw_vpc_attachment_to_vpc_subnets(
269
274
  )
270
275
 
271
276
  for subnet_id in tgw_vpc_attachment["SubnetIds"]:
272
- neo4j_session.run(
277
+ run_write_query(
278
+ neo4j_session,
273
279
  attach_vpc_tgw_attachment_to_subnet,
274
280
  SubnetId=subnet_id,
275
281
  TgwAttachmentId=tgw_vpc_attachment["TransitGatewayAttachmentId"],
@@ -70,7 +70,7 @@ def transform_volumes(
70
70
 
71
71
  for attachment in active_attachments:
72
72
  vol_with_attachment = raw_vol.copy()
73
- vol_with_attachment["InstanceId"] = attachment["InstanceId"]
73
+ vol_with_attachment["InstanceId"] = attachment.get("InstanceId")
74
74
  result.append(vol_with_attachment)
75
75
 
76
76
  return result