cartography 0.95.0rc1__py3-none-any.whl → 0.96.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/cli.py +15 -0
- cartography/client/core/tx.py +1 -1
- cartography/config.py +6 -2
- cartography/data/indexes.cypher +1 -2
- cartography/data/jobs/cleanup/aws_import_identity_center_cleanup.json +16 -0
- cartography/data/jobs/cleanup/{github_users_cleanup.json → github_org_and_users_cleanup.json} +5 -0
- cartography/data/jobs/cleanup/github_repos_cleanup.json +25 -0
- cartography/graph/querybuilder.py +4 -0
- cartography/intel/aws/apigateway.py +3 -3
- cartography/intel/aws/ec2/auto_scaling_groups.py +147 -185
- cartography/intel/aws/ec2/instances.py +2 -0
- cartography/intel/aws/ec2/network_acls.py +209 -0
- cartography/intel/aws/ec2/subnets.py +2 -0
- cartography/intel/aws/iam.py +4 -3
- cartography/intel/aws/identitycenter.py +307 -0
- cartography/intel/aws/resources.py +4 -0
- cartography/intel/cve/__init__.py +1 -1
- cartography/intel/cve/feed.py +10 -7
- cartography/intel/github/repos.py +176 -27
- cartography/intel/github/users.py +156 -39
- cartography/intel/okta/users.py +2 -1
- cartography/intel/semgrep/__init__.py +9 -2
- cartography/intel/semgrep/dependencies.py +233 -0
- cartography/intel/semgrep/deployment.py +67 -0
- cartography/intel/semgrep/findings.py +22 -53
- cartography/models/aws/ec2/auto_scaling_groups.py +204 -0
- cartography/models/aws/ec2/launch_configurations.py +55 -0
- cartography/models/aws/ec2/network_acl_rules.py +98 -0
- cartography/models/aws/ec2/network_acls.py +86 -0
- cartography/models/aws/identitycenter/__init__.py +0 -0
- cartography/models/aws/identitycenter/awsidentitycenter.py +44 -0
- cartography/models/aws/identitycenter/awspermissionset.py +84 -0
- cartography/models/aws/identitycenter/awsssouser.py +68 -0
- cartography/models/core/common.py +18 -1
- cartography/models/github/orgs.py +26 -0
- cartography/models/github/users.py +119 -0
- cartography/models/semgrep/dependencies.py +90 -0
- cartography-0.96.0.dist-info/METADATA +53 -0
- {cartography-0.95.0rc1.dist-info → cartography-0.96.0.dist-info}/RECORD +43 -27
- {cartography-0.95.0rc1.dist-info → cartography-0.96.0.dist-info}/WHEEL +1 -1
- cartography-0.95.0rc1.dist-info/METADATA +0 -53
- {cartography-0.95.0rc1.dist-info → cartography-0.96.0.dist-info}/LICENSE +0 -0
- {cartography-0.95.0rc1.dist-info → cartography-0.96.0.dist-info}/entry_points.txt +0 -0
- {cartography-0.95.0rc1.dist-info → cartography-0.96.0.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import configparser
|
|
2
2
|
import logging
|
|
3
|
+
from collections import namedtuple
|
|
3
4
|
from string import Template
|
|
4
5
|
from typing import Any
|
|
5
6
|
from typing import Dict
|
|
@@ -12,11 +13,26 @@ from packaging.requirements import Requirement
|
|
|
12
13
|
from packaging.utils import canonicalize_name
|
|
13
14
|
|
|
14
15
|
from cartography.intel.github.util import fetch_all
|
|
16
|
+
from cartography.intel.github.util import PaginatedGraphqlData
|
|
15
17
|
from cartography.util import run_cleanup_job
|
|
16
18
|
from cartography.util import timeit
|
|
17
19
|
|
|
18
20
|
logger = logging.getLogger(__name__)
|
|
19
21
|
|
|
22
|
+
|
|
23
|
+
# Representation of a user's permission level and affiliation to a GitHub repo. See:
|
|
24
|
+
# - Permission: https://docs.github.com/en/graphql/reference/enums#repositorypermission
|
|
25
|
+
# - Affiliation: https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation
|
|
26
|
+
UserAffiliationAndRepoPermission = namedtuple(
|
|
27
|
+
'UserAffiliationAndRepoPermission',
|
|
28
|
+
[
|
|
29
|
+
'user', # Dict
|
|
30
|
+
'permission', # 'WRITE', 'MAINTAIN', 'ADMIN', etc
|
|
31
|
+
'affiliation', # 'OUTSIDE', 'DIRECT'
|
|
32
|
+
],
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
20
36
|
GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
|
|
21
37
|
query($login: String!, $cursor: String) {
|
|
22
38
|
organization(login: $login)
|
|
@@ -59,17 +75,11 @@ GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
|
|
|
59
75
|
login
|
|
60
76
|
__typename
|
|
61
77
|
}
|
|
62
|
-
collaborators(
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
url
|
|
68
|
-
login
|
|
69
|
-
name
|
|
70
|
-
email
|
|
71
|
-
company
|
|
72
|
-
}
|
|
78
|
+
directCollaborators: collaborators(first: 100, affiliation: DIRECT) {
|
|
79
|
+
totalCount
|
|
80
|
+
}
|
|
81
|
+
outsideCollaborators: collaborators(first: 100, affiliation: OUTSIDE) {
|
|
82
|
+
totalCount
|
|
73
83
|
}
|
|
74
84
|
requirements:object(expression: "HEAD:requirements.txt") {
|
|
75
85
|
... on Blob {
|
|
@@ -89,6 +99,111 @@ GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
|
|
|
89
99
|
# Note: In the above query, `HEAD` references the default branch.
|
|
90
100
|
# See https://stackoverflow.com/questions/48935381/github-graphql-api-default-branch-in-repository
|
|
91
101
|
|
|
102
|
+
GITHUB_REPO_COLLABS_PAGINATED_GRAPHQL = """
|
|
103
|
+
query($login: String!, $repo: String!, $affiliation: CollaboratorAffiliation!, $cursor: String) {
|
|
104
|
+
organization(login: $login) {
|
|
105
|
+
url
|
|
106
|
+
login
|
|
107
|
+
repository(name: $repo){
|
|
108
|
+
name
|
|
109
|
+
collaborators(first: 50, affiliation: $affiliation, after: $cursor) {
|
|
110
|
+
edges {
|
|
111
|
+
permission
|
|
112
|
+
}
|
|
113
|
+
nodes {
|
|
114
|
+
url
|
|
115
|
+
login
|
|
116
|
+
name
|
|
117
|
+
email
|
|
118
|
+
company
|
|
119
|
+
}
|
|
120
|
+
pageInfo{
|
|
121
|
+
endCursor
|
|
122
|
+
hasNextPage
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
rateLimit {
|
|
128
|
+
limit
|
|
129
|
+
cost
|
|
130
|
+
remaining
|
|
131
|
+
resetAt
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _get_repo_collaborators_for_multiple_repos(
|
|
138
|
+
repo_raw_data: list[dict[str, Any]],
|
|
139
|
+
affiliation: str,
|
|
140
|
+
org: str,
|
|
141
|
+
api_url: str,
|
|
142
|
+
token: str,
|
|
143
|
+
) -> dict[str, List[UserAffiliationAndRepoPermission]]:
|
|
144
|
+
"""
|
|
145
|
+
For every repo in the given list, retrieve the collaborators.
|
|
146
|
+
:param repo_raw_data: A list of dicts representing repos. See tests.data.github.repos.GET_REPOS for data shape.
|
|
147
|
+
:param affiliation: The type of affiliation to retrieve collaborators for. Either 'DIRECT' or 'OUTSIDE'.
|
|
148
|
+
See https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation
|
|
149
|
+
:param org: The name of the target Github organization as string.
|
|
150
|
+
:param api_url: The Github v4 API endpoint as string.
|
|
151
|
+
:param token: The Github API token as string.
|
|
152
|
+
:return: A dictionary of repo URL to list of UserAffiliationAndRepoPermission
|
|
153
|
+
"""
|
|
154
|
+
result: dict[str, List[UserAffiliationAndRepoPermission]] = {}
|
|
155
|
+
for repo in repo_raw_data:
|
|
156
|
+
repo_name = repo['name']
|
|
157
|
+
repo_url = repo['url']
|
|
158
|
+
|
|
159
|
+
if ((affiliation == 'OUTSIDE' and repo['outsideCollaborators']['totalCount'] == 0) or
|
|
160
|
+
(affiliation == 'DIRECT' and repo['directCollaborators']['totalCount'] == 0)):
|
|
161
|
+
# repo has no collabs of the affiliation type we're looking for, so don't waste time making an API call
|
|
162
|
+
result[repo_url] = []
|
|
163
|
+
continue
|
|
164
|
+
|
|
165
|
+
collab_users = []
|
|
166
|
+
collab_permission = []
|
|
167
|
+
collaborators = _get_repo_collaborators(token, api_url, org, repo_name, affiliation)
|
|
168
|
+
# nodes and edges are expected to always be present given that we only call for them if totalCount is > 0
|
|
169
|
+
for collab in collaborators.nodes:
|
|
170
|
+
collab_users.append(collab)
|
|
171
|
+
for perm in collaborators.edges:
|
|
172
|
+
collab_permission.append(perm['permission'])
|
|
173
|
+
|
|
174
|
+
result[repo_url] = [
|
|
175
|
+
UserAffiliationAndRepoPermission(user, permission, affiliation)
|
|
176
|
+
for user, permission in zip(collab_users, collab_permission)
|
|
177
|
+
]
|
|
178
|
+
return result
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def _get_repo_collaborators(
|
|
182
|
+
token: str, api_url: str, organization: str, repo: str, affiliation: str,
|
|
183
|
+
) -> PaginatedGraphqlData:
|
|
184
|
+
"""
|
|
185
|
+
Retrieve a list of collaborators for a given repository, as described in
|
|
186
|
+
https://docs.github.com/en/graphql/reference/objects#repositorycollaboratorconnection.
|
|
187
|
+
:param token: The Github API token as string.
|
|
188
|
+
:param api_url: The Github v4 API endpoint as string.
|
|
189
|
+
:param organization: The name of the target Github organization as string.
|
|
190
|
+
:pram repo: The name of the target Github repository as string.
|
|
191
|
+
:param affiliation: The type of affiliation to retrieve collaborators for. Either 'DIRECT' or 'OUTSIDE'.
|
|
192
|
+
See https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation
|
|
193
|
+
:return: A list of dicts representing repos. See tests.data.github.repos for data shape.
|
|
194
|
+
"""
|
|
195
|
+
collaborators, _ = fetch_all(
|
|
196
|
+
token,
|
|
197
|
+
api_url,
|
|
198
|
+
organization,
|
|
199
|
+
GITHUB_REPO_COLLABS_PAGINATED_GRAPHQL,
|
|
200
|
+
'repository',
|
|
201
|
+
resource_inner_type='collaborators',
|
|
202
|
+
repo=repo,
|
|
203
|
+
affiliation=affiliation,
|
|
204
|
+
)
|
|
205
|
+
return collaborators
|
|
206
|
+
|
|
92
207
|
|
|
93
208
|
@timeit
|
|
94
209
|
def get(token: str, api_url: str, organization: str) -> List[Dict]:
|
|
@@ -111,11 +226,18 @@ def get(token: str, api_url: str, organization: str) -> List[Dict]:
|
|
|
111
226
|
return repos.nodes
|
|
112
227
|
|
|
113
228
|
|
|
114
|
-
def transform(
|
|
229
|
+
def transform(
|
|
230
|
+
repos_json: List[Dict], direct_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
|
|
231
|
+
outside_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
|
|
232
|
+
) -> Dict:
|
|
115
233
|
"""
|
|
116
234
|
Parses the JSON returned from GitHub API to create data for graph ingestion
|
|
117
|
-
:param repos_json: the list of individual repository nodes from GitHub.
|
|
118
|
-
|
|
235
|
+
:param repos_json: the list of individual repository nodes from GitHub.
|
|
236
|
+
See tests.data.github.repos.GET_REPOS for data shape.
|
|
237
|
+
:param direct_collaborators: dict of repo URL to list of direct collaborators.
|
|
238
|
+
See tests.data.github.repos.DIRECT_COLLABORATORS for data shape.
|
|
239
|
+
:param outside_collaborators: dict of repo URL to list of outside collaborators.
|
|
240
|
+
See tests.data.github.repos.OUTSIDE_COLLABORATORS for data shape.
|
|
119
241
|
:return: Dict containing the repos, repo->language mapping, owners->repo mapping, outside collaborators->repo
|
|
120
242
|
mapping, and Python requirements files (if any) in a repo.
|
|
121
243
|
"""
|
|
@@ -123,7 +245,10 @@ def transform(repos_json: List[Dict]) -> Dict:
|
|
|
123
245
|
transformed_repo_languages: List[Dict] = []
|
|
124
246
|
transformed_repo_owners: List[Dict] = []
|
|
125
247
|
# See https://docs.github.com/en/graphql/reference/enums#repositorypermission
|
|
126
|
-
|
|
248
|
+
transformed_outside_collaborators: Dict[str, List[Any]] = {
|
|
249
|
+
'ADMIN': [], 'MAINTAIN': [], 'READ': [], 'TRIAGE': [], 'WRITE': [],
|
|
250
|
+
}
|
|
251
|
+
transformed_direct_collaborators: Dict[str, List[Any]] = {
|
|
127
252
|
'ADMIN': [], 'MAINTAIN': [], 'READ': [], 'TRIAGE': [], 'WRITE': [],
|
|
128
253
|
}
|
|
129
254
|
transformed_requirements_files: List[Dict] = []
|
|
@@ -131,14 +256,22 @@ def transform(repos_json: List[Dict]) -> Dict:
|
|
|
131
256
|
_transform_repo_languages(repo_object['url'], repo_object, transformed_repo_languages)
|
|
132
257
|
_transform_repo_objects(repo_object, transformed_repo_list)
|
|
133
258
|
_transform_repo_owners(repo_object['owner']['url'], repo_object, transformed_repo_owners)
|
|
134
|
-
_transform_collaborators(
|
|
259
|
+
_transform_collaborators(
|
|
260
|
+
repo_object['url'], outside_collaborators[repo_object['url']],
|
|
261
|
+
transformed_outside_collaborators,
|
|
262
|
+
)
|
|
263
|
+
_transform_collaborators(
|
|
264
|
+
repo_object['url'], direct_collaborators[repo_object['url']],
|
|
265
|
+
transformed_direct_collaborators,
|
|
266
|
+
)
|
|
135
267
|
_transform_requirements_txt(repo_object['requirements'], repo_object['url'], transformed_requirements_files)
|
|
136
268
|
_transform_setup_cfg_requirements(repo_object['setupCfg'], repo_object['url'], transformed_requirements_files)
|
|
137
269
|
results = {
|
|
138
270
|
'repos': transformed_repo_list,
|
|
139
271
|
'repo_languages': transformed_repo_languages,
|
|
140
272
|
'repo_owners': transformed_repo_owners,
|
|
141
|
-
'
|
|
273
|
+
'repo_outside_collaborators': transformed_outside_collaborators,
|
|
274
|
+
'repo_direct_collaborators': transformed_direct_collaborators,
|
|
142
275
|
'python_requirements': transformed_requirements_files,
|
|
143
276
|
}
|
|
144
277
|
return results
|
|
@@ -229,11 +362,15 @@ def _transform_repo_languages(repo_url: str, repo: Dict, repo_languages: List[Di
|
|
|
229
362
|
})
|
|
230
363
|
|
|
231
364
|
|
|
232
|
-
def _transform_collaborators(
|
|
365
|
+
def _transform_collaborators(
|
|
366
|
+
repo_url: str, collaborators: List[UserAffiliationAndRepoPermission], transformed_collaborators: Dict,
|
|
367
|
+
) -> None:
|
|
233
368
|
"""
|
|
234
|
-
Performs data adjustments for
|
|
369
|
+
Performs data adjustments for collaborators in a GitHub repo.
|
|
235
370
|
Output data shape = [{permission, repo_url, url (the user's URL), login, name}, ...]
|
|
236
|
-
:param collaborators:
|
|
371
|
+
:param collaborators: For data shape, see
|
|
372
|
+
cartography.tests.data.github.repos.DIRECT_COLLABORATORS
|
|
373
|
+
cartography.tests.data.github.repos.OUTSIDE_COLLABORATORS
|
|
237
374
|
:param repo_url: The URL of the GitHub repo.
|
|
238
375
|
:param transformed_collaborators: Output dict. Data shape =
|
|
239
376
|
{'ADMIN': [{ user }, ...], 'MAINTAIN': [{ user }, ...], 'READ': [ ... ], 'TRIAGE': [ ... ], 'WRITE': [ ... ]}
|
|
@@ -241,10 +378,11 @@ def _transform_collaborators(collaborators: Dict, repo_url: str, transformed_col
|
|
|
241
378
|
"""
|
|
242
379
|
# `collaborators` is sometimes None
|
|
243
380
|
if collaborators:
|
|
244
|
-
for
|
|
245
|
-
|
|
381
|
+
for collaborator in collaborators:
|
|
382
|
+
user = collaborator.user
|
|
246
383
|
user['repo_url'] = repo_url
|
|
247
|
-
|
|
384
|
+
user['affiliation'] = collaborator.affiliation
|
|
385
|
+
transformed_collaborators[collaborator.permission].append(user)
|
|
248
386
|
|
|
249
387
|
|
|
250
388
|
def _transform_requirements_txt(
|
|
@@ -482,7 +620,7 @@ def load_github_owners(neo4j_session: neo4j.Session, update_tag: int, repo_owner
|
|
|
482
620
|
|
|
483
621
|
|
|
484
622
|
@timeit
|
|
485
|
-
def load_collaborators(neo4j_session: neo4j.Session, update_tag: int, collaborators: Dict) -> None:
|
|
623
|
+
def load_collaborators(neo4j_session: neo4j.Session, update_tag: int, collaborators: Dict, affiliation: str) -> None:
|
|
486
624
|
query = Template("""
|
|
487
625
|
UNWIND $UserData as user
|
|
488
626
|
|
|
@@ -502,7 +640,7 @@ def load_collaborators(neo4j_session: neo4j.Session, update_tag: int, collaborat
|
|
|
502
640
|
SET o.lastupdated = $UpdateTag
|
|
503
641
|
""")
|
|
504
642
|
for collab_type in collaborators.keys():
|
|
505
|
-
relationship_label = f"
|
|
643
|
+
relationship_label = f"{affiliation}_COLLAB_{collab_type}"
|
|
506
644
|
neo4j_session.run(
|
|
507
645
|
query.safe_substitute(rel_label=relationship_label),
|
|
508
646
|
UserData=collaborators[collab_type],
|
|
@@ -515,7 +653,12 @@ def load(neo4j_session: neo4j.Session, common_job_parameters: Dict, repo_data: D
|
|
|
515
653
|
load_github_repos(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repos'])
|
|
516
654
|
load_github_owners(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_owners'])
|
|
517
655
|
load_github_languages(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_languages'])
|
|
518
|
-
load_collaborators(
|
|
656
|
+
load_collaborators(
|
|
657
|
+
neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_direct_collaborators'], 'DIRECT',
|
|
658
|
+
)
|
|
659
|
+
load_collaborators(
|
|
660
|
+
neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_outside_collaborators'], 'OUTSIDE',
|
|
661
|
+
)
|
|
519
662
|
load_python_requirements(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['python_requirements'])
|
|
520
663
|
|
|
521
664
|
|
|
@@ -561,6 +704,12 @@ def sync(
|
|
|
561
704
|
"""
|
|
562
705
|
logger.info("Syncing GitHub repos")
|
|
563
706
|
repos_json = get(github_api_key, github_url, organization)
|
|
564
|
-
|
|
707
|
+
direct_collabs = _get_repo_collaborators_for_multiple_repos(
|
|
708
|
+
repos_json, "DIRECT", organization, github_url, github_api_key,
|
|
709
|
+
)
|
|
710
|
+
outside_collabs = _get_repo_collaborators_for_multiple_repos(
|
|
711
|
+
repos_json, "OUTSIDE", organization, github_url, github_api_key,
|
|
712
|
+
)
|
|
713
|
+
repo_data = transform(repos_json, direct_collabs, outside_collabs)
|
|
565
714
|
load(neo4j_session, common_job_parameters, repo_data)
|
|
566
715
|
run_cleanup_job('github_repos_cleanup.json', neo4j_session, common_job_parameters)
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from copy import deepcopy
|
|
2
3
|
from typing import Any
|
|
3
4
|
from typing import Dict
|
|
4
5
|
from typing import List
|
|
@@ -6,7 +7,11 @@ from typing import Tuple
|
|
|
6
7
|
|
|
7
8
|
import neo4j
|
|
8
9
|
|
|
10
|
+
from cartography.client.core.tx import load
|
|
9
11
|
from cartography.intel.github.util import fetch_all
|
|
12
|
+
from cartography.models.github.orgs import GitHubOrganizationSchema
|
|
13
|
+
from cartography.models.github.users import GitHubOrganizationUserSchema
|
|
14
|
+
from cartography.models.github.users import GitHubUnaffiliatedUserSchema
|
|
10
15
|
from cartography.stats import get_stats_client
|
|
11
16
|
from cartography.util import merge_module_sync_metadata
|
|
12
17
|
from cartography.util import run_cleanup_job
|
|
@@ -44,17 +49,46 @@ GITHUB_ORG_USERS_PAGINATED_GRAPHQL = """
|
|
|
44
49
|
}
|
|
45
50
|
"""
|
|
46
51
|
|
|
52
|
+
GITHUB_ENTERPRISE_OWNER_USERS_PAGINATED_GRAPHQL = """
|
|
53
|
+
query($login: String!, $cursor: String) {
|
|
54
|
+
organization(login: $login)
|
|
55
|
+
{
|
|
56
|
+
url
|
|
57
|
+
login
|
|
58
|
+
enterpriseOwners(first:100, after: $cursor){
|
|
59
|
+
edges {
|
|
60
|
+
node {
|
|
61
|
+
url
|
|
62
|
+
login
|
|
63
|
+
name
|
|
64
|
+
isSiteAdmin
|
|
65
|
+
email
|
|
66
|
+
company
|
|
67
|
+
}
|
|
68
|
+
organizationRole
|
|
69
|
+
}
|
|
70
|
+
pageInfo{
|
|
71
|
+
endCursor
|
|
72
|
+
hasNextPage
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
"""
|
|
78
|
+
|
|
47
79
|
|
|
48
80
|
@timeit
|
|
49
|
-
def
|
|
81
|
+
def get_users(token: str, api_url: str, organization: str) -> Tuple[List[Dict], Dict]:
|
|
50
82
|
"""
|
|
51
83
|
Retrieve a list of users from the given GitHub organization as described in
|
|
52
84
|
https://docs.github.com/en/graphql/reference/objects#organizationmemberedge.
|
|
53
85
|
:param token: The Github API token as string.
|
|
54
86
|
:param api_url: The Github v4 API endpoint as string.
|
|
55
87
|
:param organization: The name of the target Github organization as string.
|
|
56
|
-
:return: A 2-tuple containing
|
|
57
|
-
|
|
88
|
+
:return: A 2-tuple containing
|
|
89
|
+
1. a list of dicts representing users and
|
|
90
|
+
2. data on the owning GitHub organization
|
|
91
|
+
see tests.data.github.users.GITHUB_USER_DATA for shape of both
|
|
58
92
|
"""
|
|
59
93
|
users, org = fetch_all(
|
|
60
94
|
token,
|
|
@@ -66,56 +100,139 @@ def get(token: str, api_url: str, organization: str) -> Tuple[List[Dict], Dict]:
|
|
|
66
100
|
return users.edges, org
|
|
67
101
|
|
|
68
102
|
|
|
103
|
+
def get_enterprise_owners(token: str, api_url: str, organization: str) -> Tuple[List[Dict], Dict]:
|
|
104
|
+
"""
|
|
105
|
+
Retrieve a list of enterprise owners from the given GitHub organization as described in
|
|
106
|
+
https://docs.github.com/en/graphql/reference/objects#organizationenterpriseowneredge.
|
|
107
|
+
:param token: The Github API token as string.
|
|
108
|
+
:param api_url: The Github v4 API endpoint as string.
|
|
109
|
+
:param organization: The name of the target Github organization as string.
|
|
110
|
+
:return: A 2-tuple containing
|
|
111
|
+
1. a list of dicts representing users who are enterprise owners
|
|
112
|
+
3. data on the owning GitHub organization
|
|
113
|
+
see tests.data.github.users.GITHUB_ENTERPRISE_OWNER_DATA for shape
|
|
114
|
+
"""
|
|
115
|
+
owners, org = fetch_all(
|
|
116
|
+
token,
|
|
117
|
+
api_url,
|
|
118
|
+
organization,
|
|
119
|
+
GITHUB_ENTERPRISE_OWNER_USERS_PAGINATED_GRAPHQL,
|
|
120
|
+
'enterpriseOwners',
|
|
121
|
+
)
|
|
122
|
+
return owners.edges, org
|
|
123
|
+
|
|
124
|
+
|
|
69
125
|
@timeit
|
|
70
|
-
def
|
|
71
|
-
|
|
126
|
+
def transform_users(user_data: List[Dict], owners_data: List[Dict], org_data: Dict) -> Tuple[List[Dict], List[Dict]]:
|
|
127
|
+
"""
|
|
128
|
+
Taking raw user and owner data, return two lists of processed user data:
|
|
129
|
+
* organization users aka affiliated users (users directly affiliated with an organization)
|
|
130
|
+
* unaffiliated users (user who, for example, are enterprise owners but not members of the target organization).
|
|
131
|
+
|
|
132
|
+
:param token: The Github API token as string.
|
|
133
|
+
:param api_url: The Github v4 API endpoint as string.
|
|
134
|
+
:param organization: The name of the target Github organization as string.
|
|
135
|
+
:return: A 2-tuple containing
|
|
136
|
+
1. a list of dicts representing users who are affiliated with the target org
|
|
137
|
+
see tests.data.github.users.GITHUB_USER_DATA for shape
|
|
138
|
+
2. a list of dicts representing users who are not affiliated (e.g. enterprise owners who are not also in
|
|
139
|
+
the target org) — see tests.data.github.users.GITHUB_ENTERPRISE_OWNER_DATA for shape
|
|
140
|
+
3. data on the owning GitHub organization
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
users_dict = {}
|
|
144
|
+
for user in user_data:
|
|
145
|
+
processed_user = deepcopy(user['node'])
|
|
146
|
+
processed_user['role'] = user['role']
|
|
147
|
+
processed_user['hasTwoFactorEnabled'] = user['hasTwoFactorEnabled']
|
|
148
|
+
processed_user['MEMBER_OF'] = org_data['url']
|
|
149
|
+
users_dict[processed_user['url']] = processed_user
|
|
150
|
+
|
|
151
|
+
owners_dict = {}
|
|
152
|
+
for owner in owners_data:
|
|
153
|
+
processed_owner = deepcopy(owner['node'])
|
|
154
|
+
processed_owner['isEnterpriseOwner'] = True
|
|
155
|
+
if owner['organizationRole'] == 'UNAFFILIATED':
|
|
156
|
+
processed_owner['UNAFFILIATED'] = org_data['url']
|
|
157
|
+
else:
|
|
158
|
+
processed_owner['MEMBER_OF'] = org_data['url']
|
|
159
|
+
owners_dict[processed_owner['url']] = processed_owner
|
|
160
|
+
|
|
161
|
+
affiliated_users = [] # users affiliated with the target org
|
|
162
|
+
for url, user in users_dict.items():
|
|
163
|
+
user['isEnterpriseOwner'] = url in owners_dict
|
|
164
|
+
affiliated_users.append(user)
|
|
165
|
+
|
|
166
|
+
unaffiliated_users = [] # users not affiliated with the target org
|
|
167
|
+
for url, owner in owners_dict.items():
|
|
168
|
+
if url not in users_dict:
|
|
169
|
+
unaffiliated_users.append(owner)
|
|
170
|
+
|
|
171
|
+
return affiliated_users, unaffiliated_users
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
@timeit
|
|
175
|
+
def load_users(
|
|
176
|
+
neo4j_session: neo4j.Session,
|
|
177
|
+
node_schema: GitHubOrganizationUserSchema | GitHubUnaffiliatedUserSchema,
|
|
178
|
+
user_data: List[Dict],
|
|
179
|
+
org_data: Dict,
|
|
72
180
|
update_tag: int,
|
|
73
181
|
) -> None:
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
"""
|
|
98
|
-
neo4j_session.run(
|
|
99
|
-
query,
|
|
100
|
-
OrgUrl=org_data['url'],
|
|
101
|
-
OrgLogin=org_data['login'],
|
|
102
|
-
UserData=user_data,
|
|
103
|
-
UpdateTag=update_tag,
|
|
182
|
+
logger.info(f"Loading {len(user_data)} GitHub users to the graph")
|
|
183
|
+
load(
|
|
184
|
+
neo4j_session,
|
|
185
|
+
node_schema,
|
|
186
|
+
user_data,
|
|
187
|
+
lastupdated=update_tag,
|
|
188
|
+
org_url=org_data['url'],
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
@timeit
|
|
193
|
+
def load_organization(
|
|
194
|
+
neo4j_session: neo4j.Session,
|
|
195
|
+
node_schema: GitHubOrganizationSchema,
|
|
196
|
+
org_data: List[Dict[str, Any]],
|
|
197
|
+
update_tag: int,
|
|
198
|
+
) -> None:
|
|
199
|
+
logger.info(f"Loading {len(org_data)} GitHub organization to the graph")
|
|
200
|
+
load(
|
|
201
|
+
neo4j_session,
|
|
202
|
+
node_schema,
|
|
203
|
+
org_data,
|
|
204
|
+
lastupdated=update_tag,
|
|
104
205
|
)
|
|
105
206
|
|
|
106
207
|
|
|
107
208
|
@timeit
|
|
108
209
|
def sync(
|
|
109
210
|
neo4j_session: neo4j.Session,
|
|
110
|
-
common_job_parameters: Dict
|
|
211
|
+
common_job_parameters: Dict,
|
|
111
212
|
github_api_key: str,
|
|
112
213
|
github_url: str,
|
|
113
214
|
organization: str,
|
|
114
215
|
) -> None:
|
|
115
216
|
logger.info("Syncing GitHub users")
|
|
116
|
-
user_data, org_data =
|
|
117
|
-
|
|
118
|
-
|
|
217
|
+
user_data, org_data = get_users(github_api_key, github_url, organization)
|
|
218
|
+
owners_data, org_data = get_enterprise_owners(github_api_key, github_url, organization)
|
|
219
|
+
processed_affiliated_user_data, processed_unaffiliated_user_data = (
|
|
220
|
+
transform_users(user_data, owners_data, org_data)
|
|
221
|
+
)
|
|
222
|
+
load_organization(
|
|
223
|
+
neo4j_session, GitHubOrganizationSchema(), [org_data],
|
|
224
|
+
common_job_parameters['UPDATE_TAG'],
|
|
225
|
+
)
|
|
226
|
+
load_users(
|
|
227
|
+
neo4j_session, GitHubOrganizationUserSchema(), processed_affiliated_user_data, org_data,
|
|
228
|
+
common_job_parameters['UPDATE_TAG'],
|
|
229
|
+
)
|
|
230
|
+
load_users(
|
|
231
|
+
neo4j_session, GitHubUnaffiliatedUserSchema(), processed_unaffiliated_user_data, org_data,
|
|
232
|
+
common_job_parameters['UPDATE_TAG'],
|
|
233
|
+
)
|
|
234
|
+
# no automated cleanup job for users because user node has no sub_resource_relationship
|
|
235
|
+
run_cleanup_job('github_org_and_users_cleanup.json', neo4j_session, common_job_parameters)
|
|
119
236
|
merge_module_sync_metadata(
|
|
120
237
|
neo4j_session,
|
|
121
238
|
group_type='GitHubOrganization',
|
cartography/intel/okta/users.py
CHANGED
|
@@ -150,7 +150,8 @@ def _load_okta_users(
|
|
|
150
150
|
new_user.okta_last_updated = user_data.okta_last_updated,
|
|
151
151
|
new_user.password_changed = user_data.password_changed,
|
|
152
152
|
new_user.transition_to_status = user_data.transition_to_status,
|
|
153
|
-
new_user.lastupdated = $okta_update_tag
|
|
153
|
+
new_user.lastupdated = $okta_update_tag,
|
|
154
|
+
new_user :UserAccount
|
|
154
155
|
WITH new_user, org
|
|
155
156
|
MERGE (org)-[org_r:RESOURCE]->(new_user)
|
|
156
157
|
ON CREATE SET org_r.firstseen = timestamp()
|
|
@@ -3,7 +3,9 @@ import logging
|
|
|
3
3
|
import neo4j
|
|
4
4
|
|
|
5
5
|
from cartography.config import Config
|
|
6
|
-
from cartography.intel.semgrep.
|
|
6
|
+
from cartography.intel.semgrep.dependencies import sync_dependencies
|
|
7
|
+
from cartography.intel.semgrep.deployment import sync_deployment
|
|
8
|
+
from cartography.intel.semgrep.findings import sync_findings
|
|
7
9
|
from cartography.util import timeit
|
|
8
10
|
|
|
9
11
|
|
|
@@ -20,4 +22,9 @@ def start_semgrep_ingestion(
|
|
|
20
22
|
if not config.semgrep_app_token:
|
|
21
23
|
logger.info('Semgrep import is not configured - skipping this module. See docs to configure.')
|
|
22
24
|
return
|
|
23
|
-
|
|
25
|
+
|
|
26
|
+
# sync_deployment must be called first since it populates common_job_parameters
|
|
27
|
+
# with the deployment ID and slug, which are required by the other sync functions
|
|
28
|
+
sync_deployment(neo4j_session, config.semgrep_app_token, config.update_tag, common_job_parameters)
|
|
29
|
+
sync_dependencies(neo4j_session, config.semgrep_app_token, config.semgrep_dependency_ecosystems, config.update_tag, common_job_parameters) # noqa: E501
|
|
30
|
+
sync_findings(neo4j_session, config.semgrep_app_token, config.update_tag, common_job_parameters)
|