cartography 0.102.0rc1__py3-none-any.whl → 0.103.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (251) hide show
  1. cartography/__main__.py +1 -2
  2. cartography/_version.py +2 -2
  3. cartography/cli.py +302 -253
  4. cartography/client/core/tx.py +39 -18
  5. cartography/config.py +4 -0
  6. cartography/driftdetect/__main__.py +1 -2
  7. cartography/driftdetect/add_shortcut.py +10 -2
  8. cartography/driftdetect/cli.py +71 -75
  9. cartography/driftdetect/detect_deviations.py +7 -3
  10. cartography/driftdetect/get_states.py +20 -8
  11. cartography/driftdetect/model.py +5 -5
  12. cartography/driftdetect/serializers.py +8 -6
  13. cartography/driftdetect/storage.py +2 -2
  14. cartography/graph/cleanupbuilder.py +35 -15
  15. cartography/graph/job.py +46 -17
  16. cartography/graph/querybuilder.py +165 -80
  17. cartography/graph/statement.py +35 -26
  18. cartography/intel/analysis.py +4 -1
  19. cartography/intel/aws/__init__.py +114 -55
  20. cartography/intel/aws/apigateway.py +134 -63
  21. cartography/intel/aws/cloudtrail.py +127 -0
  22. cartography/intel/aws/config.py +56 -20
  23. cartography/intel/aws/dynamodb.py +108 -40
  24. cartography/intel/aws/ec2/__init__.py +2 -2
  25. cartography/intel/aws/ec2/auto_scaling_groups.py +181 -78
  26. cartography/intel/aws/ec2/elastic_ip_addresses.py +41 -13
  27. cartography/intel/aws/ec2/images.py +49 -20
  28. cartography/intel/aws/ec2/instances.py +234 -136
  29. cartography/intel/aws/ec2/internet_gateways.py +40 -11
  30. cartography/intel/aws/ec2/key_pairs.py +44 -20
  31. cartography/intel/aws/ec2/launch_templates.py +101 -59
  32. cartography/intel/aws/ec2/load_balancer_v2s.py +104 -39
  33. cartography/intel/aws/ec2/load_balancers.py +82 -42
  34. cartography/intel/aws/ec2/network_acls.py +89 -65
  35. cartography/intel/aws/ec2/network_interfaces.py +146 -87
  36. cartography/intel/aws/ec2/reserved_instances.py +45 -16
  37. cartography/intel/aws/ec2/route_tables.py +327 -0
  38. cartography/intel/aws/ec2/security_groups.py +71 -21
  39. cartography/intel/aws/ec2/snapshots.py +61 -22
  40. cartography/intel/aws/ec2/subnets.py +54 -18
  41. cartography/intel/aws/ec2/tgw.py +100 -34
  42. cartography/intel/aws/ec2/util.py +1 -1
  43. cartography/intel/aws/ec2/volumes.py +69 -41
  44. cartography/intel/aws/ec2/vpc.py +37 -12
  45. cartography/intel/aws/ec2/vpc_peerings.py +83 -24
  46. cartography/intel/aws/ecr.py +88 -32
  47. cartography/intel/aws/ecs.py +83 -47
  48. cartography/intel/aws/eks.py +55 -29
  49. cartography/intel/aws/elasticache.py +42 -18
  50. cartography/intel/aws/elasticsearch.py +57 -20
  51. cartography/intel/aws/emr.py +61 -23
  52. cartography/intel/aws/iam.py +401 -145
  53. cartography/intel/aws/iam_instance_profiles.py +22 -22
  54. cartography/intel/aws/identitycenter.py +71 -37
  55. cartography/intel/aws/inspector.py +159 -89
  56. cartography/intel/aws/kms.py +92 -38
  57. cartography/intel/aws/lambda_function.py +103 -34
  58. cartography/intel/aws/organizations.py +30 -10
  59. cartography/intel/aws/permission_relationships.py +133 -51
  60. cartography/intel/aws/rds.py +249 -85
  61. cartography/intel/aws/redshift.py +107 -46
  62. cartography/intel/aws/resourcegroupstaggingapi.py +120 -66
  63. cartography/intel/aws/resources.py +53 -44
  64. cartography/intel/aws/route53.py +108 -61
  65. cartography/intel/aws/s3.py +168 -83
  66. cartography/intel/aws/s3accountpublicaccessblock.py +157 -0
  67. cartography/intel/aws/secretsmanager.py +24 -12
  68. cartography/intel/aws/securityhub.py +20 -9
  69. cartography/intel/aws/sns.py +166 -0
  70. cartography/intel/aws/sqs.py +60 -28
  71. cartography/intel/aws/ssm.py +70 -30
  72. cartography/intel/aws/util/arns.py +7 -7
  73. cartography/intel/aws/util/common.py +31 -4
  74. cartography/intel/azure/__init__.py +78 -19
  75. cartography/intel/azure/compute.py +101 -27
  76. cartography/intel/azure/cosmosdb.py +496 -170
  77. cartography/intel/azure/sql.py +296 -105
  78. cartography/intel/azure/storage.py +322 -113
  79. cartography/intel/azure/subscription.py +39 -23
  80. cartography/intel/azure/tenant.py +13 -4
  81. cartography/intel/azure/util/credentials.py +95 -55
  82. cartography/intel/bigfix/__init__.py +2 -2
  83. cartography/intel/bigfix/computers.py +93 -65
  84. cartography/intel/create_indexes.py +3 -2
  85. cartography/intel/crowdstrike/__init__.py +11 -9
  86. cartography/intel/crowdstrike/endpoints.py +5 -1
  87. cartography/intel/crowdstrike/spotlight.py +8 -3
  88. cartography/intel/cve/__init__.py +46 -13
  89. cartography/intel/cve/feed.py +48 -12
  90. cartography/intel/digitalocean/__init__.py +22 -13
  91. cartography/intel/digitalocean/compute.py +75 -108
  92. cartography/intel/digitalocean/management.py +44 -80
  93. cartography/intel/digitalocean/platform.py +48 -43
  94. cartography/intel/dns.py +36 -10
  95. cartography/intel/duo/__init__.py +21 -16
  96. cartography/intel/duo/api_host.py +14 -9
  97. cartography/intel/duo/endpoints.py +50 -45
  98. cartography/intel/duo/groups.py +18 -14
  99. cartography/intel/duo/phones.py +37 -34
  100. cartography/intel/duo/tokens.py +26 -23
  101. cartography/intel/duo/users.py +54 -50
  102. cartography/intel/duo/web_authn_credentials.py +30 -25
  103. cartography/intel/entra/__init__.py +25 -7
  104. cartography/intel/entra/ou.py +112 -0
  105. cartography/intel/entra/users.py +69 -63
  106. cartography/intel/gcp/__init__.py +185 -49
  107. cartography/intel/gcp/compute.py +418 -231
  108. cartography/intel/gcp/crm.py +96 -43
  109. cartography/intel/gcp/dns.py +60 -19
  110. cartography/intel/gcp/gke.py +72 -38
  111. cartography/intel/gcp/iam.py +61 -41
  112. cartography/intel/gcp/storage.py +84 -55
  113. cartography/intel/github/__init__.py +13 -11
  114. cartography/intel/github/repos.py +270 -137
  115. cartography/intel/github/teams.py +170 -88
  116. cartography/intel/github/users.py +70 -39
  117. cartography/intel/github/util.py +36 -34
  118. cartography/intel/gsuite/__init__.py +47 -26
  119. cartography/intel/gsuite/api.py +73 -30
  120. cartography/intel/jamf/__init__.py +19 -1
  121. cartography/intel/jamf/computers.py +30 -7
  122. cartography/intel/jamf/util.py +7 -2
  123. cartography/intel/kandji/__init__.py +6 -3
  124. cartography/intel/kandji/devices.py +14 -8
  125. cartography/intel/kubernetes/namespaces.py +7 -4
  126. cartography/intel/kubernetes/pods.py +7 -4
  127. cartography/intel/kubernetes/services.py +8 -4
  128. cartography/intel/lastpass/__init__.py +2 -2
  129. cartography/intel/lastpass/users.py +23 -12
  130. cartography/intel/oci/__init__.py +44 -11
  131. cartography/intel/oci/iam.py +134 -38
  132. cartography/intel/oci/organizations.py +13 -6
  133. cartography/intel/oci/utils.py +43 -20
  134. cartography/intel/okta/__init__.py +66 -15
  135. cartography/intel/okta/applications.py +42 -20
  136. cartography/intel/okta/awssaml.py +93 -33
  137. cartography/intel/okta/factors.py +16 -4
  138. cartography/intel/okta/groups.py +56 -29
  139. cartography/intel/okta/organization.py +5 -1
  140. cartography/intel/okta/origins.py +6 -2
  141. cartography/intel/okta/roles.py +15 -5
  142. cartography/intel/okta/users.py +20 -8
  143. cartography/intel/okta/utils.py +6 -4
  144. cartography/intel/pagerduty/__init__.py +8 -7
  145. cartography/intel/pagerduty/escalation_policies.py +18 -6
  146. cartography/intel/pagerduty/schedules.py +12 -4
  147. cartography/intel/pagerduty/services.py +11 -4
  148. cartography/intel/pagerduty/teams.py +8 -3
  149. cartography/intel/pagerduty/users.py +3 -1
  150. cartography/intel/pagerduty/vendors.py +3 -1
  151. cartography/intel/semgrep/__init__.py +24 -6
  152. cartography/intel/semgrep/dependencies.py +50 -28
  153. cartography/intel/semgrep/deployment.py +3 -1
  154. cartography/intel/semgrep/findings.py +42 -18
  155. cartography/intel/snipeit/__init__.py +17 -3
  156. cartography/intel/snipeit/asset.py +12 -6
  157. cartography/intel/snipeit/user.py +8 -5
  158. cartography/intel/snipeit/util.py +9 -4
  159. cartography/models/aws/apigateway.py +21 -17
  160. cartography/models/aws/apigatewaycertificate.py +28 -22
  161. cartography/models/aws/apigatewayresource.py +28 -20
  162. cartography/models/aws/apigatewaystage.py +33 -25
  163. cartography/models/aws/cloudtrail/__init__.py +0 -0
  164. cartography/models/aws/cloudtrail/trail.py +61 -0
  165. cartography/models/aws/dynamodb/gsi.py +30 -22
  166. cartography/models/aws/dynamodb/tables.py +25 -17
  167. cartography/models/aws/ec2/auto_scaling_groups.py +102 -82
  168. cartography/models/aws/ec2/images.py +36 -34
  169. cartography/models/aws/ec2/instances.py +51 -45
  170. cartography/models/aws/ec2/keypair.py +21 -16
  171. cartography/models/aws/ec2/keypair_instance.py +28 -21
  172. cartography/models/aws/ec2/launch_configurations.py +30 -26
  173. cartography/models/aws/ec2/launch_template_versions.py +48 -38
  174. cartography/models/aws/ec2/launch_templates.py +21 -17
  175. cartography/models/aws/ec2/load_balancer_listeners.py +27 -23
  176. cartography/models/aws/ec2/load_balancers.py +47 -37
  177. cartography/models/aws/ec2/network_acl_rules.py +38 -30
  178. cartography/models/aws/ec2/network_acls.py +38 -29
  179. cartography/models/aws/ec2/networkinterface_instance.py +52 -39
  180. cartography/models/aws/ec2/networkinterfaces.py +53 -37
  181. cartography/models/aws/ec2/privateip_networkinterface.py +32 -22
  182. cartography/models/aws/ec2/reservations.py +18 -14
  183. cartography/models/aws/ec2/route_table_associations.py +97 -0
  184. cartography/models/aws/ec2/route_tables.py +128 -0
  185. cartography/models/aws/ec2/routes.py +85 -0
  186. cartography/models/aws/ec2/securitygroup_instance.py +29 -20
  187. cartography/models/aws/ec2/securitygroup_networkinterface.py +24 -15
  188. cartography/models/aws/ec2/subnet_instance.py +24 -19
  189. cartography/models/aws/ec2/subnet_networkinterface.py +40 -31
  190. cartography/models/aws/ec2/volumes.py +47 -40
  191. cartography/models/aws/eks/clusters.py +23 -21
  192. cartography/models/aws/emr.py +32 -30
  193. cartography/models/aws/iam/instanceprofile.py +33 -24
  194. cartography/models/aws/identitycenter/awsidentitycenter.py +18 -14
  195. cartography/models/aws/identitycenter/awspermissionset.py +37 -29
  196. cartography/models/aws/identitycenter/awsssouser.py +23 -21
  197. cartography/models/aws/inspector/findings.py +77 -65
  198. cartography/models/aws/inspector/packages.py +35 -29
  199. cartography/models/aws/s3/__init__.py +0 -0
  200. cartography/models/aws/s3/account_public_access_block.py +51 -0
  201. cartography/models/aws/sns/__init__.py +0 -0
  202. cartography/models/aws/sns/topic.py +50 -0
  203. cartography/models/aws/ssm/instance_information.py +51 -39
  204. cartography/models/aws/ssm/instance_patch.py +32 -26
  205. cartography/models/bigfix/bigfix_computer.py +42 -38
  206. cartography/models/bigfix/bigfix_root.py +3 -3
  207. cartography/models/core/common.py +12 -10
  208. cartography/models/core/nodes.py +5 -2
  209. cartography/models/core/relationships.py +14 -6
  210. cartography/models/crowdstrike/hosts.py +37 -35
  211. cartography/models/cve/cve.py +34 -32
  212. cartography/models/cve/cve_feed.py +6 -6
  213. cartography/models/digitalocean/__init__.py +0 -0
  214. cartography/models/digitalocean/account.py +21 -0
  215. cartography/models/digitalocean/droplet.py +56 -0
  216. cartography/models/digitalocean/project.py +48 -0
  217. cartography/models/duo/api_host.py +3 -3
  218. cartography/models/duo/endpoint.py +43 -41
  219. cartography/models/duo/group.py +14 -14
  220. cartography/models/duo/phone.py +27 -27
  221. cartography/models/duo/token.py +16 -16
  222. cartography/models/duo/user.py +46 -44
  223. cartography/models/duo/web_authn_credential.py +27 -19
  224. cartography/models/entra/ou.py +48 -0
  225. cartography/models/entra/tenant.py +24 -18
  226. cartography/models/entra/user.py +64 -48
  227. cartography/models/gcp/iam.py +23 -23
  228. cartography/models/github/orgs.py +5 -4
  229. cartography/models/github/teams.py +37 -31
  230. cartography/models/github/users.py +34 -23
  231. cartography/models/kandji/device.py +22 -16
  232. cartography/models/kandji/tenant.py +6 -4
  233. cartography/models/lastpass/tenant.py +3 -3
  234. cartography/models/lastpass/user.py +32 -28
  235. cartography/models/semgrep/dependencies.py +36 -24
  236. cartography/models/semgrep/deployment.py +5 -5
  237. cartography/models/semgrep/findings.py +58 -42
  238. cartography/models/semgrep/locations.py +27 -21
  239. cartography/models/snipeit/asset.py +30 -21
  240. cartography/models/snipeit/tenant.py +6 -4
  241. cartography/models/snipeit/user.py +19 -12
  242. cartography/stats.py +3 -3
  243. cartography/sync.py +107 -31
  244. cartography/util.py +84 -62
  245. {cartography-0.102.0rc1.dist-info → cartography-0.103.0rc1.dist-info}/METADATA +3 -14
  246. cartography-0.103.0rc1.dist-info/RECORD +396 -0
  247. {cartography-0.102.0rc1.dist-info → cartography-0.103.0rc1.dist-info}/WHEEL +1 -1
  248. cartography-0.102.0rc1.dist-info/RECORD +0 -377
  249. {cartography-0.102.0rc1.dist-info → cartography-0.103.0rc1.dist-info}/entry_points.txt +0 -0
  250. {cartography-0.102.0rc1.dist-info → cartography-0.103.0rc1.dist-info}/licenses/LICENSE +0 -0
  251. {cartography-0.102.0rc1.dist-info → cartography-0.103.0rc1.dist-info}/top_level.txt +0 -0
@@ -5,15 +5,16 @@ import neo4j
5
5
 
6
6
  from cartography.config import Config
7
7
  from cartography.util import load_resource_binary
8
+
8
9
  logger = logging.getLogger(__name__)
9
10
 
10
11
 
11
12
  def get_index_statements() -> List[str]:
12
13
  statements = []
13
- with load_resource_binary('cartography.data', 'indexes.cypher') as f:
14
+ with load_resource_binary("cartography.data", "indexes.cypher") as f:
14
15
  for line in f.readlines():
15
16
  statements.append(
16
- line.decode('UTF-8').rstrip('\r\n'),
17
+ line.decode("UTF-8").rstrip("\r\n"),
17
18
  )
18
19
  return statements
19
20
 
@@ -20,7 +20,8 @@ stat_handler = get_stats_client(__name__)
20
20
 
21
21
  @timeit
22
22
  def start_crowdstrike_ingestion(
23
- neo4j_session: neo4j.Session, config: Config,
23
+ neo4j_session: neo4j.Session,
24
+ config: Config,
24
25
  ) -> None:
25
26
  """
26
27
  Perform ingestion of crowdstrike data.
@@ -31,10 +32,7 @@ def start_crowdstrike_ingestion(
31
32
  common_job_parameters = {
32
33
  "UPDATE_TAG": config.update_tag,
33
34
  }
34
- if (
35
- not config.crowdstrike_client_id or
36
- not config.crowdstrike_client_secret
37
- ):
35
+ if not config.crowdstrike_client_id or not config.crowdstrike_client_secret:
38
36
  logger.error("crowdstrike config not found")
39
37
  return
40
38
 
@@ -60,18 +58,22 @@ def start_crowdstrike_ingestion(
60
58
  group_id = config.crowdstrike_api_url
61
59
  merge_module_sync_metadata(
62
60
  neo4j_session,
63
- group_type='crowdstrike',
61
+ group_type="crowdstrike",
64
62
  group_id=group_id,
65
- synced_type='crowdstrike',
63
+ synced_type="crowdstrike",
66
64
  update_tag=config.update_tag,
67
65
  stat_handler=stat_handler,
68
66
  )
69
67
 
70
68
 
71
69
  @timeit
72
- def cleanup(neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]) -> None:
70
+ def cleanup(
71
+ neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
72
+ ) -> None:
73
73
  logger.info("Running Crowdstrike cleanup")
74
- GraphJob.from_node_schema(CrowdstrikeHostSchema(), common_job_parameters).run(neo4j_session)
74
+ GraphJob.from_node_schema(CrowdstrikeHostSchema(), common_job_parameters).run(
75
+ neo4j_session
76
+ )
75
77
 
76
78
  # Cleanup other crowdstrike assets not handled by the data model
77
79
  run_cleanup_job(
@@ -44,7 +44,11 @@ def load_host_data(
44
44
  )
45
45
 
46
46
 
47
- def get_host_ids(client: Hosts, crowdstrikeapi_filter: str = '', crowdstrikeapi_limit: int = 5000) -> List[List[str]]:
47
+ def get_host_ids(
48
+ client: Hosts,
49
+ crowdstrikeapi_filter: str = "",
50
+ crowdstrikeapi_limit: int = 5000,
51
+ ) -> List[List[str]]:
48
52
  ids = []
49
53
  parameters = {"filter": crowdstrikeapi_filter, "limit": crowdstrikeapi_limit}
50
54
  response = client.QueryDevicesByFilter(parameters=parameters)
@@ -25,7 +25,9 @@ def sync_vulnerabilities(
25
25
 
26
26
 
27
27
  def load_vulnerability_data(
28
- neo4j_session: neo4j.Session, data: List[Dict], update_tag: int,
28
+ neo4j_session: neo4j.Session,
29
+ data: List[Dict],
30
+ update_tag: int,
29
31
  ) -> None:
30
32
  """
31
33
  Transform and load scan information
@@ -111,7 +113,9 @@ def _load_cves(neo4j_session: neo4j.Session, data: List[Dict], update_tag: int)
111
113
  )
112
114
 
113
115
 
114
- def get_spotlight_vulnerability_ids(client: Spotlight_Vulnerabilities) -> List[List[str]]:
116
+ def get_spotlight_vulnerability_ids(
117
+ client: Spotlight_Vulnerabilities,
118
+ ) -> List[List[str]]:
115
119
  ids = []
116
120
  parameters = {"filter": 'status:!"closed"', "limit": 400}
117
121
  response = client.queryVulnerabilities(parameters=parameters)
@@ -135,7 +139,8 @@ def get_spotlight_vulnerability_ids(client: Spotlight_Vulnerabilities) -> List[L
135
139
 
136
140
 
137
141
  def get_spotlight_vulnerabilities(
138
- client: Spotlight_Vulnerabilities, ids: List[str],
142
+ client: Spotlight_Vulnerabilities,
143
+ ids: List[str],
139
144
  ) -> List[Dict]:
140
145
  response = client.getVulnerabilities(ids=",".join(ids))
141
146
  body = response.get("body", {})
@@ -38,21 +38,33 @@ def _sync_year_archives(
38
38
  ) -> None:
39
39
  existing_years = feed.get_cve_sync_metadata(neo4j_session)
40
40
  current_year = datetime.now().year
41
- logger.info(f"Syncing CVE data for year archives. Existing years: {existing_years}. Current year: {current_year}")
41
+ logger.info(
42
+ f"Syncing CVE data for year archives. Existing years: {existing_years}. Current year: {current_year}",
43
+ )
42
44
  for year in range(1999, current_year + 1):
43
45
  if year in existing_years:
44
46
  continue
45
47
  logger.info(f"Syncing CVE data for year {year}")
46
- cves = feed.get_published_cves_per_year(http_session, config.nist_cve_url, str(year), cve_api_key)
48
+ cves = feed.get_published_cves_per_year(
49
+ http_session,
50
+ config.nist_cve_url,
51
+ str(year),
52
+ cve_api_key,
53
+ )
47
54
  feed_metadata = feed.transform_cve_feed(cves)
48
55
  feed.load_cve_feed(neo4j_session, [feed_metadata], config.update_tag)
49
56
  published_cves = feed.transform_cves(cves)
50
- feed.load_cves(neo4j_session, published_cves, feed_metadata['FEED_ID'], config.update_tag)
57
+ feed.load_cves(
58
+ neo4j_session,
59
+ published_cves,
60
+ feed_metadata["FEED_ID"],
61
+ config.update_tag,
62
+ )
51
63
  merge_module_sync_metadata(
52
64
  neo4j_session,
53
- group_type='CVE',
65
+ group_type="CVE",
54
66
  group_id=year,
55
- synced_type='year',
67
+ synced_type="year",
56
68
  update_tag=config.update_tag,
57
69
  stat_handler=stat_handler,
58
70
  )
@@ -66,16 +78,26 @@ def _sync_modified_data(
66
78
  ) -> None:
67
79
  logger.info("Syncing CVE data for modified data")
68
80
  last_modified_date = feed.get_last_modified_cve_date(neo4j_session)
69
- cves = feed.get_modified_cves(http_session, config.nist_cve_url, last_modified_date, cve_api_key)
81
+ cves = feed.get_modified_cves(
82
+ http_session,
83
+ config.nist_cve_url,
84
+ last_modified_date,
85
+ cve_api_key,
86
+ )
70
87
  feed_metadata = feed.transform_cve_feed(cves)
71
88
  feed.load_cve_feed(neo4j_session, [feed_metadata], config.update_tag)
72
89
  modified_cves = feed.transform_cves(cves)
73
- feed.load_cves(neo4j_session, modified_cves, feed_metadata['FEED_ID'], config.update_tag)
90
+ feed.load_cves(
91
+ neo4j_session,
92
+ modified_cves,
93
+ feed_metadata["FEED_ID"],
94
+ config.update_tag,
95
+ )
74
96
  merge_module_sync_metadata(
75
97
  neo4j_session,
76
- group_type='CVE',
77
- group_id=feed_metadata['timestamp'][:4],
78
- synced_type='modified',
98
+ group_type="CVE",
99
+ group_id=feed_metadata["timestamp"][:4],
100
+ synced_type="modified",
79
101
  update_tag=config.update_tag,
80
102
  stat_handler=stat_handler,
81
103
  )
@@ -83,7 +105,8 @@ def _sync_modified_data(
83
105
 
84
106
  @timeit
85
107
  def start_cve_ingestion(
86
- neo4j_session: neo4j.Session, config: Config,
108
+ neo4j_session: neo4j.Session,
109
+ config: Config,
87
110
  ) -> None:
88
111
  """
89
112
  Perform ingestion of CVE data from NIST APIs.
@@ -95,6 +118,16 @@ def start_cve_ingestion(
95
118
  return
96
119
  cve_api_key: str | None = config.cve_api_key if config.cve_api_key else None
97
120
  with _retryable_session() as http_session:
98
- _sync_year_archives(http_session, neo4j_session=neo4j_session, config=config, cve_api_key=cve_api_key)
99
- _sync_modified_data(http_session, neo4j_session=neo4j_session, config=config, cve_api_key=cve_api_key)
121
+ _sync_year_archives(
122
+ http_session,
123
+ neo4j_session=neo4j_session,
124
+ config=config,
125
+ cve_api_key=cve_api_key,
126
+ )
127
+ _sync_modified_data(
128
+ http_session,
129
+ neo4j_session=neo4j_session,
130
+ config=config,
131
+ cve_api_key=cve_api_key,
132
+ )
100
133
  # CVEs are never deleted, so we don't need to run a cleanup job
@@ -51,7 +51,10 @@ def get_last_modified_cve_date(neo4j_session: neo4j.Session) -> str:
51
51
  ORDER BY last_modified DESC
52
52
  LIMIT 1
53
53
  """
54
- result = cast(neo4j.time.DateTime, read_single_value_tx(neo4j_session, query)).to_native()
54
+ result = cast(
55
+ neo4j.time.DateTime,
56
+ read_single_value_tx(neo4j_session, query),
57
+ ).to_native()
55
58
  return result.strftime("%Y-%m-%dT%H:%M:%S")
56
59
 
57
60
 
@@ -61,13 +64,19 @@ def _map_cve_dict(cve_dict: Dict[Any, Any], data: Dict[Any, Any]) -> None:
61
64
  cve_dict["timestamp"] = data["timestamp"]
62
65
  cve_dict["totalResults"] = data["totalResults"]
63
66
  cve_dict["vulnerabilities"] = cve_dict.get("vulnerabilities", []) + data.get(
64
- "vulnerabilities", [],
67
+ "vulnerabilities",
68
+ [],
65
69
  )
66
70
  cve_dict["resultsPerPage"] = data["resultsPerPage"]
67
71
  cve_dict["startIndex"] = data["startIndex"]
68
72
 
69
73
 
70
- def _call_cves_api(http_session: Session, url: str, api_key: str | None, params: Dict[str, Any]) -> Dict[Any, Any]:
74
+ def _call_cves_api(
75
+ http_session: Session,
76
+ url: str,
77
+ api_key: str | None,
78
+ params: Dict[str, Any],
79
+ ) -> Dict[Any, Any]:
71
80
  total_results = 0
72
81
  params["startIndex"] = 0
73
82
  params["resultsPerPage"] = RESULTS_PER_PAGE
@@ -84,7 +93,12 @@ def _call_cves_api(http_session: Session, url: str, api_key: str | None, params:
84
93
 
85
94
  while params["resultsPerPage"] > 0 or params["startIndex"] < total_results:
86
95
  logger.info(f"Calling NIST NVD API at {url} with params {params}")
87
- res = http_session.get(url, params=params, headers=headers, timeout=CONNECT_AND_READ_TIMEOUT)
96
+ res = http_session.get(
97
+ url,
98
+ params=params,
99
+ headers=headers,
100
+ timeout=CONNECT_AND_READ_TIMEOUT,
101
+ )
88
102
  res.raise_for_status()
89
103
  data = res.json()
90
104
  _map_cve_dict(results, data)
@@ -140,7 +154,10 @@ def get_cves_in_batches(
140
154
 
141
155
 
142
156
  def get_modified_cves(
143
- http_session: Session, nist_cve_url: str, last_modified_date: str, api_key: str | None,
157
+ http_session: Session,
158
+ nist_cve_url: str,
159
+ last_modified_date: str,
160
+ api_key: str | None,
144
161
  ) -> Dict[Any, Any]:
145
162
  end_date = datetime.now(tz=timezone.utc)
146
163
  start_date = datetime.strptime(last_modified_date, "%Y-%m-%dT%H:%M:%S").replace(
@@ -151,13 +168,21 @@ def get_modified_cves(
151
168
  "end": "lastModEndDate",
152
169
  }
153
170
  cves = get_cves_in_batches(
154
- http_session, nist_cve_url, start_date, end_date, date_param_names, api_key,
171
+ http_session,
172
+ nist_cve_url,
173
+ start_date,
174
+ end_date,
175
+ date_param_names,
176
+ api_key,
155
177
  )
156
178
  return cves
157
179
 
158
180
 
159
181
  def get_published_cves_per_year(
160
- http_session: Session, nist_cve_url: str, year: str, api_key: str | None,
182
+ http_session: Session,
183
+ nist_cve_url: str,
184
+ year: str,
185
+ api_key: str | None,
161
186
  ) -> Dict[Any, Any]:
162
187
  start_of_year = datetime.strptime(f"{year}-01-01", "%Y-%m-%d")
163
188
  next_year = int(year) + 1
@@ -167,7 +192,12 @@ def get_published_cves_per_year(
167
192
  "end": "pubEndDate",
168
193
  }
169
194
  cves = get_cves_in_batches(
170
- http_session, nist_cve_url, start_of_year, end_of_next_year, date_param_names, api_key,
195
+ http_session,
196
+ nist_cve_url,
197
+ start_of_year,
198
+ end_of_next_year,
199
+ date_param_names,
200
+ api_key,
171
201
  )
172
202
  return cves
173
203
 
@@ -198,9 +228,13 @@ def transform_cves(cve_json: Dict[Any, Any]) -> List[Dict[Any, Any]]:
198
228
  ]
199
229
  cve["references_urls"] = [url["url"] for url in cve["references"]]
200
230
  if cve.get("weaknesses"):
201
- weakness_descriptions = [weakness["description"] for weakness in cve["weaknesses"]]
231
+ weakness_descriptions = [
232
+ weakness["description"] for weakness in cve["weaknesses"]
233
+ ]
202
234
  weakness_descriptions = reduce(
203
- lambda x, y: x + y, weakness_descriptions, [],
235
+ lambda x, y: x + y,
236
+ weakness_descriptions,
237
+ [],
204
238
  )
205
239
  cve["weaknesses"] = [
206
240
  description["value"]
@@ -226,7 +260,7 @@ def transform_cves(cve_json: Dict[Any, Any]) -> List[Dict[Any, Any]]:
226
260
  cve["exploitabilityScore"] = cvss31.get("exploitabilityScore")
227
261
  cve["impactScore"] = cvss31.get("impactScore")
228
262
  except Exception:
229
- logger.error("Failed to transform CVE data {data}")
263
+ logger.error(f"Failed to transform CVE data {data}")
230
264
  raise
231
265
  cves.append(cve)
232
266
  return cves
@@ -265,7 +299,9 @@ def load_cves(
265
299
 
266
300
 
267
301
  def load_cve_feed(
268
- neo4j_session: neo4j.Session, data: List[Dict[str, Any]], update_tag: int,
302
+ neo4j_session: neo4j.Session,
303
+ data: List[Dict[str, Any]],
304
+ update_tag: int,
269
305
  ) -> None:
270
306
  """
271
307
  Load CVE feed information
@@ -9,7 +9,6 @@ from cartography.intel.digitalocean import management
9
9
  from cartography.intel.digitalocean import platform
10
10
  from cartography.util import timeit
11
11
 
12
-
13
12
  logger = logging.getLogger(__name__)
14
13
 
15
14
 
@@ -23,7 +22,9 @@ def start_digitalocean_ingestion(neo4j_session: neo4j.Session, config: Config) -
23
22
  """
24
23
 
25
24
  if not config.digitalocean_token:
26
- logger.info('DigitalOcean import is not configured - skipping this module. See docs to configure.')
25
+ logger.info(
26
+ "DigitalOcean import is not configured - skipping this module. See docs to configure.",
27
+ )
27
28
  return
28
29
 
29
30
  common_job_parameters = {
@@ -31,14 +32,22 @@ def start_digitalocean_ingestion(neo4j_session: neo4j.Session, config: Config) -
31
32
  }
32
33
  manager = Manager(token=config.digitalocean_token)
33
34
 
34
- """
35
- Get Account ID related to this credentials and pass it along in `common_job_parameters` to avoid cleaning up other
36
- accounts resources
37
- """
38
- account = manager.get_account()
39
- common_job_parameters["DO_ACCOUNT_ID"] = account.uuid
40
-
41
- platform.sync(neo4j_session, account, config.update_tag, common_job_parameters)
42
- project_resources = management.sync(neo4j_session, manager, config.update_tag, common_job_parameters)
43
- compute.sync(neo4j_session, manager, project_resources, config.update_tag, common_job_parameters)
44
- return
35
+ account_id = platform.sync(
36
+ neo4j_session, manager, config.update_tag, common_job_parameters
37
+ )
38
+ common_job_parameters["ACCOUNT_ID"] = str(account_id)
39
+ projects_resources = management.sync(
40
+ neo4j_session,
41
+ manager,
42
+ account_id,
43
+ config.update_tag,
44
+ common_job_parameters,
45
+ )
46
+ compute.sync(
47
+ neo4j_session,
48
+ manager,
49
+ account_id,
50
+ projects_resources,
51
+ config.update_tag,
52
+ common_job_parameters,
53
+ )
@@ -1,10 +1,15 @@
1
1
  import logging
2
+ from typing import Any
3
+ from typing import Dict
4
+ from typing import List
2
5
  from typing import Optional
3
6
 
4
7
  import neo4j
5
8
  from digitalocean import Manager
6
9
 
7
- from cartography.util import run_cleanup_job
10
+ from cartography.client.core.tx import load
11
+ from cartography.graph.job import GraphJob
12
+ from cartography.models.digitalocean.droplet import DODropletSchema
8
13
  from cartography.util import timeit
9
14
 
10
15
  logger = logging.getLogger(__name__)
@@ -12,29 +17,20 @@ logger = logging.getLogger(__name__)
12
17
 
13
18
  @timeit
14
19
  def sync(
15
- neo4j_session: neo4j.Session,
16
- manager: Manager,
17
- projects_resources: dict,
18
- digitalocean_update_tag: int,
19
- common_job_parameters: dict,
20
- ) -> None:
21
- sync_droplets(neo4j_session, manager, projects_resources, digitalocean_update_tag, common_job_parameters)
22
-
23
-
24
- @timeit
25
- def sync_droplets(
26
- neo4j_session: neo4j.Session,
27
- manager: Manager,
28
- projects_resources: dict,
29
- digitalocean_update_tag: int,
30
- common_job_parameters: dict,
20
+ neo4j_session: neo4j.Session,
21
+ manager: Manager,
22
+ account_id: str,
23
+ projects_resources: dict,
24
+ update_tag: int,
25
+ common_job_parameters: dict,
31
26
  ) -> None:
32
27
  logger.info("Syncing Droplets")
33
- account_id = common_job_parameters['DO_ACCOUNT_ID']
34
28
  droplets_res = get_droplets(manager)
35
- droplets = transform_droplets(droplets_res, account_id, projects_resources)
36
- load_droplets(neo4j_session, droplets, digitalocean_update_tag)
37
- cleanup_droplets(neo4j_session, common_job_parameters)
29
+ droplets_by_project = transform_droplets(
30
+ droplets_res, account_id, projects_resources
31
+ )
32
+ load_droplets(neo4j_session, account_id, droplets_by_project, update_tag)
33
+ cleanup(neo4j_session, list(droplets_by_project.keys()), common_job_parameters)
38
34
 
39
35
 
40
36
  @timeit
@@ -43,35 +39,45 @@ def get_droplets(manager: Manager) -> list:
43
39
 
44
40
 
45
41
  @timeit
46
- def transform_droplets(droplets_res: list, account_id: str, projects_resources: dict) -> list:
47
- droplets = list()
42
+ def transform_droplets(
43
+ droplets_res: list,
44
+ account_id: str,
45
+ projects_resources: dict,
46
+ ) -> Dict[str, List[Dict[str, Any]]]:
47
+ droplets_by_project: Dict[str, List[Dict[str, Any]]] = {}
48
48
  for d in droplets_res:
49
+ project_id = str(_get_project_id_for_droplet(d.id, projects_resources))
50
+ if project_id not in droplets_by_project:
51
+ droplets_by_project[project_id] = []
49
52
  droplet = {
50
- 'id': d.id,
51
- 'name': d.name,
52
- 'locked': d.locked,
53
- 'status': d.status,
54
- 'features': d.features,
55
- 'region': d.region['slug'],
56
- 'created_at': d.created_at,
57
- 'image': d.image['slug'],
58
- 'size': d.size_slug,
59
- 'kernel': d.kernel,
60
- 'tags': d.tags,
61
- 'volumes': d.volume_ids,
62
- 'vpc_uuid': d.vpc_uuid,
63
- 'ip_address': d.ip_address,
64
- 'private_ip_address': d.private_ip_address,
65
- 'ip_v6_address': d.ip_v6_address,
66
- 'account_id': account_id,
67
- 'project_id': _get_project_id_for_droplet(d.id, projects_resources),
53
+ "id": d.id,
54
+ "name": d.name,
55
+ "locked": d.locked,
56
+ "status": d.status,
57
+ "features": d.features,
58
+ "region": d.region["slug"],
59
+ "created_at": d.created_at,
60
+ "image": d.image["slug"],
61
+ "size": d.size_slug,
62
+ "kernel": d.kernel,
63
+ "tags": d.tags,
64
+ "volumes": d.volume_ids,
65
+ "vpc_uuid": d.vpc_uuid,
66
+ "ip_address": d.ip_address,
67
+ "private_ip_address": d.private_ip_address,
68
+ "ip_v6_address": d.ip_v6_address,
69
+ "account_id": account_id,
70
+ "project_id": _get_project_id_for_droplet(d.id, projects_resources),
68
71
  }
69
- droplets.append(droplet)
70
- return droplets
72
+ droplets_by_project[project_id].append(droplet)
73
+ return droplets_by_project
71
74
 
72
75
 
73
76
  @timeit
74
- def _get_project_id_for_droplet(droplet_id: int, project_resources: dict) -> Optional[str]:
77
+ def _get_project_id_for_droplet(
78
+ droplet_id: int,
79
+ project_resources: dict,
80
+ ) -> Optional[str]:
75
81
  for project_id, resource_list in project_resources.items():
76
82
  droplet_resource_name = "do:droplet:" + str(droplet_id)
77
83
  if droplet_resource_name in resource_list:
@@ -80,71 +86,32 @@ def _get_project_id_for_droplet(droplet_id: int, project_resources: dict) -> Opt
80
86
 
81
87
 
82
88
  @timeit
83
- def load_droplets(neo4j_session: neo4j.Session, data: list, digitalocean_update_tag: int) -> None:
84
- query = """
85
- MERGE (p:DOProject{id:$ProjectId})
86
- ON CREATE SET p.firstseen = timestamp()
87
- SET p.lastupdated = $digitalocean_update_tag
88
-
89
- MERGE (d:DODroplet{id:$DropletId})
90
- ON CREATE SET d.firstseen = timestamp()
91
- SET d.account_id = $AccountId,
92
- d.name = $Name,
93
- d.locked = $Locked,
94
- d.status = $Status,
95
- d.features = $Features,
96
- d.region = $RegionSlug,
97
- d.created_at = $CreatedAt,
98
- d.image = $ImageSlug,
99
- d.size = $SizeSlug,
100
- d.kernel = $Kernel,
101
- d.ip_address = $IpAddress,
102
- d.private_ip_address = $PrivateIpAddress,
103
- d.project_id = $ProjectId,
104
- d.ip_v6_address = $IpV6Address,
105
- d.tags = $Tags,
106
- d.volumes = $Volumes,
107
- d.vpc_uuid = $VpcUuid,
108
- d.lastupdated = $digitalocean_update_tag
109
- WITH d, p
110
-
111
- MERGE (p)-[r:RESOURCE]->(d)
112
- ON CREATE SET r.firstseen = timestamp()
113
- SET r.lastupdated = $digitalocean_update_tag
114
- """
115
- for droplet in data:
116
- neo4j_session.run(
117
- query,
118
- AccountId=droplet['account_id'],
119
- DropletId=droplet['id'],
120
- Name=droplet['name'],
121
- Locked=droplet['locked'],
122
- Status=droplet['status'],
123
- Features=droplet['features'],
124
- RegionSlug=droplet['region'],
125
- CreatedAt=droplet['created_at'],
126
- ImageSlug=droplet['image'],
127
- SizeSlug=droplet['size'],
128
- IpAddress=droplet['ip_address'],
129
- PrivateIpAddress=droplet['private_ip_address'],
130
- ProjectId=droplet['project_id'],
131
- IpV6Address=droplet['ip_v6_address'],
132
- Kernel=droplet['kernel'],
133
- Tags=droplet['tags'],
134
- Volumes=droplet['volumes'],
135
- VpcUuid=droplet['vpc_uuid'],
136
- digitalocean_update_tag=digitalocean_update_tag,
89
+ def load_droplets(
90
+ neo4j_session: neo4j.Session,
91
+ account_id: str,
92
+ data: Dict[str, List[Dict[str, Any]]],
93
+ update_tag: int,
94
+ ) -> None:
95
+ for project_id, droplets in data.items():
96
+ load(
97
+ neo4j_session,
98
+ DODropletSchema(),
99
+ droplets,
100
+ lastupdated=update_tag,
101
+ PROJECT_ID=str(project_id),
102
+ ACCOUNT_ID=str(account_id),
137
103
  )
138
- return
139
104
 
140
105
 
141
106
  @timeit
142
- def cleanup_droplets(neo4j_session: neo4j.Session, common_job_parameters: dict) -> None:
143
- """
144
- Delete out-of-date DigitalOcean droplets and relationships
145
- :param neo4j_session: The Neo4j session
146
- :param common_job_parameters: dict of other job parameters to pass to Neo4j
147
- :return: Nothing
148
- """
149
- run_cleanup_job('digitalocean_droplet_cleanup.json', neo4j_session, common_job_parameters)
150
- return
107
+ def cleanup(
108
+ neo4j_session: neo4j.Session,
109
+ projects_ids: List[str],
110
+ common_job_parameters: Dict[str, Any],
111
+ ) -> None:
112
+ for project_id in projects_ids:
113
+ parameters = common_job_parameters.copy()
114
+ parameters["PROJECT_ID"] = str(project_id)
115
+ GraphJob.from_node_schema(DODropletSchema(), parameters).run(
116
+ neo4j_session,
117
+ )