cartography 0.95.0rc1__py3-none-any.whl → 0.96.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (27) hide show
  1. cartography/cli.py +15 -0
  2. cartography/config.py +4 -0
  3. cartography/data/indexes.cypher +1 -2
  4. cartography/data/jobs/cleanup/aws_import_identity_center_cleanup.json +16 -0
  5. cartography/data/jobs/cleanup/{github_users_cleanup.json → github_org_and_users_cleanup.json} +5 -0
  6. cartography/graph/querybuilder.py +4 -0
  7. cartography/intel/aws/ec2/network_acls.py +208 -0
  8. cartography/intel/aws/identitycenter.py +307 -0
  9. cartography/intel/aws/resources.py +4 -0
  10. cartography/intel/github/users.py +156 -39
  11. cartography/intel/okta/users.py +2 -1
  12. cartography/intel/semgrep/__init__.py +9 -2
  13. cartography/intel/semgrep/dependencies.py +233 -0
  14. cartography/intel/semgrep/deployment.py +67 -0
  15. cartography/intel/semgrep/findings.py +22 -53
  16. cartography/models/aws/ec2/network_acl_rules.py +97 -0
  17. cartography/models/aws/ec2/network_acls.py +86 -0
  18. cartography/models/core/common.py +18 -1
  19. cartography/models/github/orgs.py +26 -0
  20. cartography/models/github/users.py +119 -0
  21. cartography/models/semgrep/dependencies.py +90 -0
  22. {cartography-0.95.0rc1.dist-info → cartography-0.96.0rc2.dist-info}/METADATA +1 -1
  23. {cartography-0.95.0rc1.dist-info → cartography-0.96.0rc2.dist-info}/RECORD +27 -17
  24. {cartography-0.95.0rc1.dist-info → cartography-0.96.0rc2.dist-info}/WHEEL +1 -1
  25. {cartography-0.95.0rc1.dist-info → cartography-0.96.0rc2.dist-info}/LICENSE +0 -0
  26. {cartography-0.95.0rc1.dist-info → cartography-0.96.0rc2.dist-info}/entry_points.txt +0 -0
  27. {cartography-0.95.0rc1.dist-info → cartography-0.96.0rc2.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ from copy import deepcopy
2
3
  from typing import Any
3
4
  from typing import Dict
4
5
  from typing import List
@@ -6,7 +7,11 @@ from typing import Tuple
6
7
 
7
8
  import neo4j
8
9
 
10
+ from cartography.client.core.tx import load
9
11
  from cartography.intel.github.util import fetch_all
12
+ from cartography.models.github.orgs import GitHubOrganizationSchema
13
+ from cartography.models.github.users import GitHubOrganizationUserSchema
14
+ from cartography.models.github.users import GitHubUnaffiliatedUserSchema
10
15
  from cartography.stats import get_stats_client
11
16
  from cartography.util import merge_module_sync_metadata
12
17
  from cartography.util import run_cleanup_job
@@ -44,17 +49,46 @@ GITHUB_ORG_USERS_PAGINATED_GRAPHQL = """
44
49
  }
45
50
  """
46
51
 
52
+ GITHUB_ENTERPRISE_OWNER_USERS_PAGINATED_GRAPHQL = """
53
+ query($login: String!, $cursor: String) {
54
+ organization(login: $login)
55
+ {
56
+ url
57
+ login
58
+ enterpriseOwners(first:100, after: $cursor){
59
+ edges {
60
+ node {
61
+ url
62
+ login
63
+ name
64
+ isSiteAdmin
65
+ email
66
+ company
67
+ }
68
+ organizationRole
69
+ }
70
+ pageInfo{
71
+ endCursor
72
+ hasNextPage
73
+ }
74
+ }
75
+ }
76
+ }
77
+ """
78
+
47
79
 
48
80
  @timeit
49
- def get(token: str, api_url: str, organization: str) -> Tuple[List[Dict], Dict]:
81
+ def get_users(token: str, api_url: str, organization: str) -> Tuple[List[Dict], Dict]:
50
82
  """
51
83
  Retrieve a list of users from the given GitHub organization as described in
52
84
  https://docs.github.com/en/graphql/reference/objects#organizationmemberedge.
53
85
  :param token: The Github API token as string.
54
86
  :param api_url: The Github v4 API endpoint as string.
55
87
  :param organization: The name of the target Github organization as string.
56
- :return: A 2-tuple containing 1. a list of dicts representing users - see tests.data.github.users.GITHUB_USER_DATA
57
- for shape, and 2. data on the owning GitHub organization - see tests.data.github.users.GITHUB_ORG_DATA for shape.
88
+ :return: A 2-tuple containing
89
+ 1. a list of dicts representing users and
90
+ 2. data on the owning GitHub organization
91
+ see tests.data.github.users.GITHUB_USER_DATA for shape of both
58
92
  """
59
93
  users, org = fetch_all(
60
94
  token,
@@ -66,56 +100,139 @@ def get(token: str, api_url: str, organization: str) -> Tuple[List[Dict], Dict]:
66
100
  return users.edges, org
67
101
 
68
102
 
103
+ def get_enterprise_owners(token: str, api_url: str, organization: str) -> Tuple[List[Dict], Dict]:
104
+ """
105
+ Retrieve a list of enterprise owners from the given GitHub organization as described in
106
+ https://docs.github.com/en/graphql/reference/objects#organizationenterpriseowneredge.
107
+ :param token: The Github API token as string.
108
+ :param api_url: The Github v4 API endpoint as string.
109
+ :param organization: The name of the target Github organization as string.
110
+ :return: A 2-tuple containing
111
+ 1. a list of dicts representing users who are enterprise owners
112
+ 3. data on the owning GitHub organization
113
+ see tests.data.github.users.GITHUB_ENTERPRISE_OWNER_DATA for shape
114
+ """
115
+ owners, org = fetch_all(
116
+ token,
117
+ api_url,
118
+ organization,
119
+ GITHUB_ENTERPRISE_OWNER_USERS_PAGINATED_GRAPHQL,
120
+ 'enterpriseOwners',
121
+ )
122
+ return owners.edges, org
123
+
124
+
69
125
  @timeit
70
- def load_organization_users(
71
- neo4j_session: neo4j.Session, user_data: List[Dict], org_data: Dict,
126
+ def transform_users(user_data: List[Dict], owners_data: List[Dict], org_data: Dict) -> Tuple[List[Dict], List[Dict]]:
127
+ """
128
+ Taking raw user and owner data, return two lists of processed user data:
129
+ * organization users aka affiliated users (users directly affiliated with an organization)
130
+ * unaffiliated users (user who, for example, are enterprise owners but not members of the target organization).
131
+
132
+ :param token: The Github API token as string.
133
+ :param api_url: The Github v4 API endpoint as string.
134
+ :param organization: The name of the target Github organization as string.
135
+ :return: A 2-tuple containing
136
+ 1. a list of dicts representing users who are affiliated with the target org
137
+ see tests.data.github.users.GITHUB_USER_DATA for shape
138
+ 2. a list of dicts representing users who are not affiliated (e.g. enterprise owners who are not also in
139
+ the target org) — see tests.data.github.users.GITHUB_ENTERPRISE_OWNER_DATA for shape
140
+ 3. data on the owning GitHub organization
141
+ """
142
+
143
+ users_dict = {}
144
+ for user in user_data:
145
+ processed_user = deepcopy(user['node'])
146
+ processed_user['role'] = user['role']
147
+ processed_user['hasTwoFactorEnabled'] = user['hasTwoFactorEnabled']
148
+ processed_user['MEMBER_OF'] = org_data['url']
149
+ users_dict[processed_user['url']] = processed_user
150
+
151
+ owners_dict = {}
152
+ for owner in owners_data:
153
+ processed_owner = deepcopy(owner['node'])
154
+ processed_owner['isEnterpriseOwner'] = True
155
+ if owner['organizationRole'] == 'UNAFFILIATED':
156
+ processed_owner['UNAFFILIATED'] = org_data['url']
157
+ else:
158
+ processed_owner['MEMBER_OF'] = org_data['url']
159
+ owners_dict[processed_owner['url']] = processed_owner
160
+
161
+ affiliated_users = [] # users affiliated with the target org
162
+ for url, user in users_dict.items():
163
+ user['isEnterpriseOwner'] = url in owners_dict
164
+ affiliated_users.append(user)
165
+
166
+ unaffiliated_users = [] # users not affiliated with the target org
167
+ for url, owner in owners_dict.items():
168
+ if url not in users_dict:
169
+ unaffiliated_users.append(owner)
170
+
171
+ return affiliated_users, unaffiliated_users
172
+
173
+
174
+ @timeit
175
+ def load_users(
176
+ neo4j_session: neo4j.Session,
177
+ node_schema: GitHubOrganizationUserSchema | GitHubUnaffiliatedUserSchema,
178
+ user_data: List[Dict],
179
+ org_data: Dict,
72
180
  update_tag: int,
73
181
  ) -> None:
74
- query = """
75
- MERGE (org:GitHubOrganization{id: $OrgUrl})
76
- ON CREATE SET org.firstseen = timestamp()
77
- SET org.username = $OrgLogin,
78
- org.lastupdated = $UpdateTag
79
- WITH org
80
-
81
- UNWIND $UserData as user
82
-
83
- MERGE (u:GitHubUser{id: user.node.url})
84
- ON CREATE SET u.firstseen = timestamp()
85
- SET u.fullname = user.node.name,
86
- u.username = user.node.login,
87
- u.has_2fa_enabled = user.hasTwoFactorEnabled,
88
- u.role = user.role,
89
- u.is_site_admin = user.node.isSiteAdmin,
90
- u.email = user.node.email,
91
- u.company = user.node.company,
92
- u.lastupdated = $UpdateTag
93
-
94
- MERGE (u)-[r:MEMBER_OF]->(org)
95
- ON CREATE SET r.firstseen = timestamp()
96
- SET r.lastupdated = $UpdateTag
97
- """
98
- neo4j_session.run(
99
- query,
100
- OrgUrl=org_data['url'],
101
- OrgLogin=org_data['login'],
102
- UserData=user_data,
103
- UpdateTag=update_tag,
182
+ logger.info(f"Loading {len(user_data)} GitHub users to the graph")
183
+ load(
184
+ neo4j_session,
185
+ node_schema,
186
+ user_data,
187
+ lastupdated=update_tag,
188
+ org_url=org_data['url'],
189
+ )
190
+
191
+
192
+ @timeit
193
+ def load_organization(
194
+ neo4j_session: neo4j.Session,
195
+ node_schema: GitHubOrganizationSchema,
196
+ org_data: List[Dict[str, Any]],
197
+ update_tag: int,
198
+ ) -> None:
199
+ logger.info(f"Loading {len(org_data)} GitHub organization to the graph")
200
+ load(
201
+ neo4j_session,
202
+ node_schema,
203
+ org_data,
204
+ lastupdated=update_tag,
104
205
  )
105
206
 
106
207
 
107
208
  @timeit
108
209
  def sync(
109
210
  neo4j_session: neo4j.Session,
110
- common_job_parameters: Dict[str, Any],
211
+ common_job_parameters: Dict,
111
212
  github_api_key: str,
112
213
  github_url: str,
113
214
  organization: str,
114
215
  ) -> None:
115
216
  logger.info("Syncing GitHub users")
116
- user_data, org_data = get(github_api_key, github_url, organization)
117
- load_organization_users(neo4j_session, user_data, org_data, common_job_parameters['UPDATE_TAG'])
118
- run_cleanup_job('github_users_cleanup.json', neo4j_session, common_job_parameters)
217
+ user_data, org_data = get_users(github_api_key, github_url, organization)
218
+ owners_data, org_data = get_enterprise_owners(github_api_key, github_url, organization)
219
+ processed_affiliated_user_data, processed_unaffiliated_user_data = (
220
+ transform_users(user_data, owners_data, org_data)
221
+ )
222
+ load_organization(
223
+ neo4j_session, GitHubOrganizationSchema(), [org_data],
224
+ common_job_parameters['UPDATE_TAG'],
225
+ )
226
+ load_users(
227
+ neo4j_session, GitHubOrganizationUserSchema(), processed_affiliated_user_data, org_data,
228
+ common_job_parameters['UPDATE_TAG'],
229
+ )
230
+ load_users(
231
+ neo4j_session, GitHubUnaffiliatedUserSchema(), processed_unaffiliated_user_data, org_data,
232
+ common_job_parameters['UPDATE_TAG'],
233
+ )
234
+ # no automated cleanup job for users because user node has no sub_resource_relationship
235
+ run_cleanup_job('github_org_and_users_cleanup.json', neo4j_session, common_job_parameters)
119
236
  merge_module_sync_metadata(
120
237
  neo4j_session,
121
238
  group_type='GitHubOrganization',
@@ -150,7 +150,8 @@ def _load_okta_users(
150
150
  new_user.okta_last_updated = user_data.okta_last_updated,
151
151
  new_user.password_changed = user_data.password_changed,
152
152
  new_user.transition_to_status = user_data.transition_to_status,
153
- new_user.lastupdated = $okta_update_tag
153
+ new_user.lastupdated = $okta_update_tag,
154
+ new_user :UserAccount
154
155
  WITH new_user, org
155
156
  MERGE (org)-[org_r:RESOURCE]->(new_user)
156
157
  ON CREATE SET org_r.firstseen = timestamp()
@@ -3,7 +3,9 @@ import logging
3
3
  import neo4j
4
4
 
5
5
  from cartography.config import Config
6
- from cartography.intel.semgrep.findings import sync
6
+ from cartography.intel.semgrep.dependencies import sync_dependencies
7
+ from cartography.intel.semgrep.deployment import sync_deployment
8
+ from cartography.intel.semgrep.findings import sync_findings
7
9
  from cartography.util import timeit
8
10
 
9
11
 
@@ -20,4 +22,9 @@ def start_semgrep_ingestion(
20
22
  if not config.semgrep_app_token:
21
23
  logger.info('Semgrep import is not configured - skipping this module. See docs to configure.')
22
24
  return
23
- sync(neo4j_session, config.semgrep_app_token, config.update_tag, common_job_parameters)
25
+
26
+ # sync_deployment must be called first since it populates common_job_parameters
27
+ # with the deployment ID and slug, which are required by the other sync functions
28
+ sync_deployment(neo4j_session, config.semgrep_app_token, config.update_tag, common_job_parameters)
29
+ sync_dependencies(neo4j_session, config.semgrep_app_token, config.semgrep_dependency_ecosystems, config.update_tag, common_job_parameters) # noqa: E501
30
+ sync_findings(neo4j_session, config.semgrep_app_token, config.update_tag, common_job_parameters)
@@ -0,0 +1,233 @@
1
+ import logging
2
+ from typing import Any
3
+ from typing import Callable
4
+ from typing import Dict
5
+ from typing import List
6
+
7
+ import neo4j
8
+ import requests
9
+ from requests.exceptions import HTTPError
10
+ from requests.exceptions import ReadTimeout
11
+
12
+ from cartography.client.core.tx import load
13
+ from cartography.graph.job import GraphJob
14
+ from cartography.models.semgrep.dependencies import SemgrepGoLibrarySchema
15
+ from cartography.models.semgrep.dependencies import SemgrepNpmLibrarySchema
16
+ from cartography.stats import get_stats_client
17
+ from cartography.util import merge_module_sync_metadata
18
+ from cartography.util import timeit
19
+
20
+ logger = logging.getLogger(__name__)
21
+ stat_handler = get_stats_client(__name__)
22
+ _PAGE_SIZE = 10000
23
+ _TIMEOUT = (60, 60)
24
+ _MAX_RETRIES = 3
25
+
26
+ # The keys in this dictionary must be in Semgrep's list of supported ecosystems, defined here:
27
+ # https://semgrep.dev/api/v1/docs/#tag/SupplyChainService/operation/semgrep_app.products.sca.handlers.dependency.list_dependencies_conexxion
28
+ ECOSYSTEM_TO_SCHEMA: Dict = {
29
+ 'gomod': SemgrepGoLibrarySchema,
30
+ 'npm': SemgrepNpmLibrarySchema,
31
+ }
32
+
33
+
34
+ def parse_and_validate_semgrep_ecosystems(ecosystems: str) -> List[str]:
35
+ validated_ecosystems: List[str] = []
36
+ for ecosystem in ecosystems.split(','):
37
+ ecosystem = ecosystem.strip().lower()
38
+
39
+ if ecosystem in ECOSYSTEM_TO_SCHEMA:
40
+ validated_ecosystems.append(ecosystem)
41
+ else:
42
+ valid_ecosystems: str = ','.join(ECOSYSTEM_TO_SCHEMA.keys())
43
+ raise ValueError(
44
+ f'Error parsing `semgrep-dependency-ecosystems`. You specified "{ecosystems}". '
45
+ f'Please check that your input is formatted as comma-separated values, e.g. "gomod,npm". '
46
+ f'Full list of supported ecosystems: {valid_ecosystems}.',
47
+ )
48
+ return validated_ecosystems
49
+
50
+
51
+ @timeit
52
+ def get_dependencies(semgrep_app_token: str, deployment_id: str, ecosystem: str) -> List[Dict[str, Any]]:
53
+ """
54
+ Gets all dependencies for the given ecosystem within the given Semgrep deployment ID.
55
+ param: semgrep_app_token: The Semgrep App token to use for authentication.
56
+ param: deployment_id: The Semgrep deployment ID to use for retrieving dependencies.
57
+ param: ecosystem: The ecosystem to import dependencies from, e.g. "gomod" or "npm".
58
+ """
59
+ all_deps = []
60
+ deps_url = f"https://semgrep.dev/api/v1/deployments/{deployment_id}/dependencies"
61
+ has_more = True
62
+ page = 0
63
+ retries = 0
64
+ headers = {
65
+ "Content-Type": "application/json",
66
+ "Authorization": f"Bearer {semgrep_app_token}",
67
+ }
68
+
69
+ request_data: dict[str, Any] = {
70
+ "pageSize": _PAGE_SIZE,
71
+ "dependencyFilter": {
72
+ "ecosystem": [ecosystem],
73
+ },
74
+ }
75
+
76
+ logger.info(f"Retrieving Semgrep {ecosystem} dependencies for deployment '{deployment_id}'.")
77
+ while has_more:
78
+ try:
79
+ response = requests.post(deps_url, json=request_data, headers=headers, timeout=_TIMEOUT)
80
+ response.raise_for_status()
81
+ data = response.json()
82
+ except (ReadTimeout, HTTPError):
83
+ logger.warning(f"Failed to retrieve Semgrep {ecosystem} dependencies for page {page}. Retrying...")
84
+ retries += 1
85
+ if retries >= _MAX_RETRIES:
86
+ raise
87
+ continue
88
+ deps = data.get("dependencies", [])
89
+ has_more = data.get("hasMore", False)
90
+ logger.info(f"Processed page {page} of Semgrep {ecosystem} dependencies.")
91
+ all_deps.extend(deps)
92
+ retries = 0
93
+ page += 1
94
+ request_data["cursor"] = data.get("cursor")
95
+
96
+ logger.info(f"Retrieved {len(all_deps)} Semgrep {ecosystem} dependencies in {page} pages.")
97
+ return all_deps
98
+
99
+
100
+ def transform_dependencies(raw_deps: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
101
+ """
102
+ Transforms the raw dependencies response from Semgrep API into a list of dicts
103
+ that can be used to create the Dependency nodes.
104
+ """
105
+
106
+ """
107
+ sample raw_dep as of November 2024:
108
+ {
109
+ "repositoryId": "123456",
110
+ "definedAt": {
111
+ "path": "go.mod",
112
+ "startLine": "6",
113
+ "endLine": "6",
114
+ "url": "https://github.com/org/repo-name/blob/00000000000000000000000000000000/go.mod#L6",
115
+ "committedAt": "1970-01-01T00:00:00Z",
116
+ "startCol": "0",
117
+ "endCol": "0"
118
+ },
119
+ "transitivity": "DIRECT",
120
+ "package": {
121
+ "name": "github.com/foo/bar",
122
+ "versionSpecifier": "1.2.3"
123
+ },
124
+ "ecosystem": "gomod",
125
+ "licenses": [],
126
+ "pathToTransitivity": []
127
+ },
128
+ """
129
+ deps = []
130
+ for raw_dep in raw_deps:
131
+
132
+ # We could call a different endpoint to get all repo IDs and store a mapping of repo ID to URL,
133
+ # but it's much simpler to just extract the URL from the definedAt field.
134
+ repo_url = raw_dep["definedAt"]["url"].split("/blob/", 1)[0]
135
+
136
+ name = raw_dep["package"]["name"]
137
+ version = raw_dep["package"]["versionSpecifier"]
138
+ id = f"{name}|{version}"
139
+
140
+ # As of November 2024, Semgrep does not import dependencies with version specifiers such as >, <, etc.
141
+ # For now, hardcode the specifier to ==<version> to align with GitHub-sourced Python dependencies.
142
+ # If Semgrep eventually supports version specifiers, update this line accordingly.
143
+ specifier = f"=={version}"
144
+
145
+ deps.append({
146
+ # existing dependency properties:
147
+ "id": id,
148
+ "name": name,
149
+ "specifier": specifier,
150
+ "version": version,
151
+ "repo_url": repo_url,
152
+
153
+ # Semgrep-specific properties:
154
+ "ecosystem": raw_dep["ecosystem"],
155
+ "transitivity": raw_dep["transitivity"].lower(),
156
+ "url": raw_dep["definedAt"]["url"],
157
+ })
158
+
159
+ return deps
160
+
161
+
162
+ @timeit
163
+ def load_dependencies(
164
+ neo4j_session: neo4j.Session,
165
+ dependency_schema: Callable,
166
+ dependencies: List[Dict],
167
+ deployment_id: str,
168
+ update_tag: int,
169
+ ) -> None:
170
+ logger.info(f"Loading {len(dependencies)} {dependency_schema().label} objects into the graph.")
171
+ load(
172
+ neo4j_session,
173
+ dependency_schema(),
174
+ dependencies,
175
+ lastupdated=update_tag,
176
+ DEPLOYMENT_ID=deployment_id,
177
+ )
178
+
179
+
180
+ @timeit
181
+ def cleanup(
182
+ neo4j_session: neo4j.Session,
183
+ dependency_schema: Callable,
184
+ common_job_parameters: Dict[str, Any],
185
+ ) -> None:
186
+ logger.info(f"Running Semgrep Dependencies cleanup job for {dependency_schema().label}.")
187
+ GraphJob.from_node_schema(dependency_schema(), common_job_parameters).run(neo4j_session)
188
+
189
+
190
+ @timeit
191
+ def sync_dependencies(
192
+ neo4j_session: neo4j.Session,
193
+ semgrep_app_token: str,
194
+ ecosystems_str: str,
195
+ update_tag: int,
196
+ common_job_parameters: Dict[str, Any],
197
+ ) -> None:
198
+
199
+ deployment_id = common_job_parameters.get("DEPLOYMENT_ID")
200
+ if not deployment_id:
201
+ logger.warning(
202
+ "Missing Semgrep deployment ID, ensure that sync_deployment() has been called. "
203
+ "Skipping Semgrep dependencies sync job.",
204
+ )
205
+ return
206
+
207
+ if not ecosystems_str:
208
+ logger.warning(
209
+ "Semgrep is not configured to import dependencies for any ecosystems, see docs to configure. "
210
+ "Skipping Semgrep dependencies sync job.",
211
+ )
212
+ return
213
+
214
+ # We don't expect an error here since we've already validated the input in cli.py
215
+ ecosystems = parse_and_validate_semgrep_ecosystems(ecosystems_str)
216
+
217
+ logger.info("Running Semgrep dependencies sync job.")
218
+
219
+ for ecosystem in ecosystems:
220
+ schema = ECOSYSTEM_TO_SCHEMA[ecosystem]
221
+ raw_deps = get_dependencies(semgrep_app_token, deployment_id, ecosystem)
222
+ deps = transform_dependencies(raw_deps)
223
+ load_dependencies(neo4j_session, schema, deps, deployment_id, update_tag)
224
+ cleanup(neo4j_session, schema, common_job_parameters)
225
+
226
+ merge_module_sync_metadata(
227
+ neo4j_session=neo4j_session,
228
+ group_type='Semgrep',
229
+ group_id=deployment_id,
230
+ synced_type='SemgrepDependency',
231
+ update_tag=update_tag,
232
+ stat_handler=stat_handler,
233
+ )
@@ -0,0 +1,67 @@
1
+ import logging
2
+ from typing import Any
3
+ from typing import Dict
4
+
5
+ import neo4j
6
+ import requests
7
+
8
+ from cartography.client.core.tx import load
9
+ from cartography.models.semgrep.deployment import SemgrepDeploymentSchema
10
+ from cartography.stats import get_stats_client
11
+ from cartography.util import timeit
12
+
13
+ logger = logging.getLogger(__name__)
14
+ stat_handler = get_stats_client(__name__)
15
+ _TIMEOUT = (60, 60)
16
+
17
+
18
+ @timeit
19
+ def get_deployment(semgrep_app_token: str) -> Dict[str, Any]:
20
+ """
21
+ Gets the deployment associated with the passed Semgrep App token.
22
+ param: semgrep_app_token: The Semgrep App token to use for authentication.
23
+ """
24
+ deployment = {}
25
+ deployment_url = "https://semgrep.dev/api/v1/deployments"
26
+ headers = {
27
+ "Content-Type": "application/json",
28
+ "Authorization": f"Bearer {semgrep_app_token}",
29
+ }
30
+ response = requests.get(deployment_url, headers=headers, timeout=_TIMEOUT)
31
+ response.raise_for_status()
32
+
33
+ data = response.json()
34
+ deployment["id"] = data["deployments"][0]["id"]
35
+ deployment["name"] = data["deployments"][0]["name"]
36
+ deployment["slug"] = data["deployments"][0]["slug"]
37
+
38
+ return deployment
39
+
40
+
41
+ @timeit
42
+ def load_semgrep_deployment(
43
+ neo4j_session: neo4j.Session, deployment: Dict[str, Any], update_tag: int,
44
+ ) -> None:
45
+ logger.info(f"Loading SemgrepDeployment {deployment} into the graph.")
46
+ load(
47
+ neo4j_session,
48
+ SemgrepDeploymentSchema(),
49
+ [deployment],
50
+ lastupdated=update_tag,
51
+ )
52
+
53
+
54
+ @timeit
55
+ def sync_deployment(
56
+ neo4j_session: neo4j.Session,
57
+ semgrep_app_token: str,
58
+ update_tag: int,
59
+ common_job_parameters: Dict[str, Any],
60
+ ) -> None:
61
+
62
+ semgrep_deployment = get_deployment(semgrep_app_token)
63
+ deployment_id = semgrep_deployment["id"]
64
+ deployment_slug = semgrep_deployment["slug"]
65
+ load_semgrep_deployment(neo4j_session, semgrep_deployment, update_tag)
66
+ common_job_parameters["DEPLOYMENT_ID"] = deployment_id
67
+ common_job_parameters["DEPLOYMENT_SLUG"] = deployment_slug