cartography 0.96.0rc3__py3-none-any.whl → 0.96.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/data/jobs/cleanup/github_repos_cleanup.json +25 -0
- cartography/intel/aws/ec2/auto_scaling_groups.py +147 -185
- cartography/intel/aws/ec2/instances.py +2 -0
- cartography/intel/aws/ec2/network_acls.py +2 -1
- cartography/intel/aws/ec2/subnets.py +2 -0
- cartography/intel/aws/iam.py +4 -3
- cartography/intel/cve/feed.py +29 -24
- cartography/intel/github/repos.py +229 -29
- cartography/intel/github/teams.py +160 -38
- cartography/intel/github/users.py +2 -0
- cartography/intel/github/util.py +2 -1
- cartography/models/aws/ec2/auto_scaling_groups.py +204 -0
- cartography/models/aws/ec2/launch_configurations.py +55 -0
- cartography/models/aws/ec2/network_acl_rules.py +1 -0
- cartography/models/github/teams.py +29 -0
- cartography/util.py +22 -0
- {cartography-0.96.0rc3.dist-info → cartography-0.96.2.dist-info}/METADATA +1 -1
- {cartography-0.96.0rc3.dist-info → cartography-0.96.2.dist-info}/RECORD +22 -20
- {cartography-0.96.0rc3.dist-info → cartography-0.96.2.dist-info}/LICENSE +0 -0
- {cartography-0.96.0rc3.dist-info → cartography-0.96.2.dist-info}/WHEEL +0 -0
- {cartography-0.96.0rc3.dist-info → cartography-0.96.2.dist-info}/entry_points.txt +0 -0
- {cartography-0.96.0rc3.dist-info → cartography-0.96.2.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import configparser
|
|
2
2
|
import logging
|
|
3
|
+
from collections import namedtuple
|
|
3
4
|
from string import Template
|
|
4
5
|
from typing import Any
|
|
5
6
|
from typing import Dict
|
|
@@ -12,11 +13,28 @@ from packaging.requirements import Requirement
|
|
|
12
13
|
from packaging.utils import canonicalize_name
|
|
13
14
|
|
|
14
15
|
from cartography.intel.github.util import fetch_all
|
|
16
|
+
from cartography.intel.github.util import PaginatedGraphqlData
|
|
17
|
+
from cartography.util import backoff_handler
|
|
18
|
+
from cartography.util import retries_with_backoff
|
|
15
19
|
from cartography.util import run_cleanup_job
|
|
16
20
|
from cartography.util import timeit
|
|
17
21
|
|
|
18
22
|
logger = logging.getLogger(__name__)
|
|
19
23
|
|
|
24
|
+
|
|
25
|
+
# Representation of a user's permission level and affiliation to a GitHub repo. See:
|
|
26
|
+
# - Permission: https://docs.github.com/en/graphql/reference/enums#repositorypermission
|
|
27
|
+
# - Affiliation: https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation
|
|
28
|
+
UserAffiliationAndRepoPermission = namedtuple(
|
|
29
|
+
'UserAffiliationAndRepoPermission',
|
|
30
|
+
[
|
|
31
|
+
'user', # Dict
|
|
32
|
+
'permission', # 'WRITE', 'MAINTAIN', 'ADMIN', etc
|
|
33
|
+
'affiliation', # 'OUTSIDE', 'DIRECT'
|
|
34
|
+
],
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
20
38
|
GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
|
|
21
39
|
query($login: String!, $cursor: String) {
|
|
22
40
|
organization(login: $login)
|
|
@@ -59,17 +77,11 @@ GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
|
|
|
59
77
|
login
|
|
60
78
|
__typename
|
|
61
79
|
}
|
|
62
|
-
collaborators(
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
url
|
|
68
|
-
login
|
|
69
|
-
name
|
|
70
|
-
email
|
|
71
|
-
company
|
|
72
|
-
}
|
|
80
|
+
directCollaborators: collaborators(first: 100, affiliation: DIRECT) {
|
|
81
|
+
totalCount
|
|
82
|
+
}
|
|
83
|
+
outsideCollaborators: collaborators(first: 100, affiliation: OUTSIDE) {
|
|
84
|
+
totalCount
|
|
73
85
|
}
|
|
74
86
|
requirements:object(expression: "HEAD:requirements.txt") {
|
|
75
87
|
... on Blob {
|
|
@@ -89,6 +101,145 @@ GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
|
|
|
89
101
|
# Note: In the above query, `HEAD` references the default branch.
|
|
90
102
|
# See https://stackoverflow.com/questions/48935381/github-graphql-api-default-branch-in-repository
|
|
91
103
|
|
|
104
|
+
GITHUB_REPO_COLLABS_PAGINATED_GRAPHQL = """
|
|
105
|
+
query($login: String!, $repo: String!, $affiliation: CollaboratorAffiliation!, $cursor: String) {
|
|
106
|
+
organization(login: $login) {
|
|
107
|
+
url
|
|
108
|
+
login
|
|
109
|
+
repository(name: $repo){
|
|
110
|
+
name
|
|
111
|
+
collaborators(first: 50, affiliation: $affiliation, after: $cursor) {
|
|
112
|
+
edges {
|
|
113
|
+
permission
|
|
114
|
+
}
|
|
115
|
+
nodes {
|
|
116
|
+
url
|
|
117
|
+
login
|
|
118
|
+
name
|
|
119
|
+
email
|
|
120
|
+
company
|
|
121
|
+
}
|
|
122
|
+
pageInfo{
|
|
123
|
+
endCursor
|
|
124
|
+
hasNextPage
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
rateLimit {
|
|
130
|
+
limit
|
|
131
|
+
cost
|
|
132
|
+
remaining
|
|
133
|
+
resetAt
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _get_repo_collaborators_inner_func(
|
|
140
|
+
org: str,
|
|
141
|
+
api_url: str,
|
|
142
|
+
token: str,
|
|
143
|
+
repo_raw_data: list[dict[str, Any]],
|
|
144
|
+
affiliation: str,
|
|
145
|
+
collab_users: list[dict[str, Any]],
|
|
146
|
+
collab_permission: list[str],
|
|
147
|
+
) -> dict[str, list[UserAffiliationAndRepoPermission]]:
|
|
148
|
+
result: dict[str, list[UserAffiliationAndRepoPermission]] = {}
|
|
149
|
+
|
|
150
|
+
for repo in repo_raw_data:
|
|
151
|
+
repo_name = repo['name']
|
|
152
|
+
repo_url = repo['url']
|
|
153
|
+
|
|
154
|
+
if ((affiliation == 'OUTSIDE' and repo['outsideCollaborators']['totalCount'] == 0) or
|
|
155
|
+
(affiliation == 'DIRECT' and repo['directCollaborators']['totalCount'] == 0)):
|
|
156
|
+
# repo has no collabs of the affiliation type we're looking for, so don't waste time making an API call
|
|
157
|
+
result[repo_url] = []
|
|
158
|
+
continue
|
|
159
|
+
|
|
160
|
+
logger.info(f"Loading {affiliation} collaborators for repo {repo_name}.")
|
|
161
|
+
collaborators = _get_repo_collaborators(token, api_url, org, repo_name, affiliation)
|
|
162
|
+
|
|
163
|
+
# nodes and edges are expected to always be present given that we only call for them if totalCount is > 0
|
|
164
|
+
# however sometimes GitHub returns None, as in issue 1334 and 1404.
|
|
165
|
+
for collab in collaborators.nodes or []:
|
|
166
|
+
collab_users.append(collab)
|
|
167
|
+
|
|
168
|
+
# The `or []` is because `.edges` can be None.
|
|
169
|
+
for perm in collaborators.edges or []:
|
|
170
|
+
collab_permission.append(perm['permission'])
|
|
171
|
+
|
|
172
|
+
result[repo_url] = [
|
|
173
|
+
UserAffiliationAndRepoPermission(user, permission, affiliation)
|
|
174
|
+
for user, permission in zip(collab_users, collab_permission)
|
|
175
|
+
]
|
|
176
|
+
return result
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _get_repo_collaborators_for_multiple_repos(
|
|
180
|
+
repo_raw_data: list[dict[str, Any]],
|
|
181
|
+
affiliation: str,
|
|
182
|
+
org: str,
|
|
183
|
+
api_url: str,
|
|
184
|
+
token: str,
|
|
185
|
+
) -> dict[str, list[UserAffiliationAndRepoPermission]]:
|
|
186
|
+
"""
|
|
187
|
+
For every repo in the given list, retrieve the collaborators.
|
|
188
|
+
:param repo_raw_data: A list of dicts representing repos. See tests.data.github.repos.GET_REPOS for data shape.
|
|
189
|
+
:param affiliation: The type of affiliation to retrieve collaborators for. Either 'DIRECT' or 'OUTSIDE'.
|
|
190
|
+
See https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation
|
|
191
|
+
:param org: The name of the target Github organization as string.
|
|
192
|
+
:param api_url: The Github v4 API endpoint as string.
|
|
193
|
+
:param token: The Github API token as string.
|
|
194
|
+
:return: A dictionary of repo URL to list of UserAffiliationAndRepoPermission
|
|
195
|
+
"""
|
|
196
|
+
logger.info(f'Retrieving repo collaborators for affiliation "{affiliation}" on org "{org}".')
|
|
197
|
+
collab_users: List[dict[str, Any]] = []
|
|
198
|
+
collab_permission: List[str] = []
|
|
199
|
+
|
|
200
|
+
result: dict[str, list[UserAffiliationAndRepoPermission]] = retries_with_backoff(
|
|
201
|
+
_get_repo_collaborators_inner_func,
|
|
202
|
+
TypeError,
|
|
203
|
+
5,
|
|
204
|
+
backoff_handler,
|
|
205
|
+
)(
|
|
206
|
+
org=org,
|
|
207
|
+
api_url=api_url,
|
|
208
|
+
token=token,
|
|
209
|
+
repo_raw_data=repo_raw_data,
|
|
210
|
+
affiliation=affiliation,
|
|
211
|
+
collab_users=collab_users,
|
|
212
|
+
collab_permission=collab_permission,
|
|
213
|
+
)
|
|
214
|
+
return result
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def _get_repo_collaborators(
|
|
218
|
+
token: str, api_url: str, organization: str, repo: str, affiliation: str,
|
|
219
|
+
) -> PaginatedGraphqlData:
|
|
220
|
+
"""
|
|
221
|
+
Retrieve a list of collaborators for a given repository, as described in
|
|
222
|
+
https://docs.github.com/en/graphql/reference/objects#repositorycollaboratorconnection.
|
|
223
|
+
:param token: The Github API token as string.
|
|
224
|
+
:param api_url: The Github v4 API endpoint as string.
|
|
225
|
+
:param organization: The name of the target Github organization as string.
|
|
226
|
+
:pram repo: The name of the target Github repository as string.
|
|
227
|
+
:param affiliation: The type of affiliation to retrieve collaborators for. Either 'DIRECT' or 'OUTSIDE'.
|
|
228
|
+
See https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation
|
|
229
|
+
:return: A list of dicts representing repos. See tests.data.github.repos for data shape.
|
|
230
|
+
"""
|
|
231
|
+
collaborators, _ = fetch_all(
|
|
232
|
+
token,
|
|
233
|
+
api_url,
|
|
234
|
+
organization,
|
|
235
|
+
GITHUB_REPO_COLLABS_PAGINATED_GRAPHQL,
|
|
236
|
+
'repository',
|
|
237
|
+
resource_inner_type='collaborators',
|
|
238
|
+
repo=repo,
|
|
239
|
+
affiliation=affiliation,
|
|
240
|
+
)
|
|
241
|
+
return collaborators
|
|
242
|
+
|
|
92
243
|
|
|
93
244
|
@timeit
|
|
94
245
|
def get(token: str, api_url: str, organization: str) -> List[Dict]:
|
|
@@ -111,11 +262,19 @@ def get(token: str, api_url: str, organization: str) -> List[Dict]:
|
|
|
111
262
|
return repos.nodes
|
|
112
263
|
|
|
113
264
|
|
|
114
|
-
def transform(
|
|
265
|
+
def transform(
|
|
266
|
+
repos_json: List[Dict],
|
|
267
|
+
direct_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
|
|
268
|
+
outside_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
|
|
269
|
+
) -> Dict:
|
|
115
270
|
"""
|
|
116
271
|
Parses the JSON returned from GitHub API to create data for graph ingestion
|
|
117
|
-
:param repos_json: the list of individual repository nodes from GitHub.
|
|
118
|
-
|
|
272
|
+
:param repos_json: the list of individual repository nodes from GitHub.
|
|
273
|
+
See tests.data.github.repos.GET_REPOS for data shape.
|
|
274
|
+
:param direct_collaborators: dict of repo URL to list of direct collaborators.
|
|
275
|
+
See tests.data.github.repos.DIRECT_COLLABORATORS for data shape.
|
|
276
|
+
:param outside_collaborators: dict of repo URL to list of outside collaborators.
|
|
277
|
+
See tests.data.github.repos.OUTSIDE_COLLABORATORS for data shape.
|
|
119
278
|
:return: Dict containing the repos, repo->language mapping, owners->repo mapping, outside collaborators->repo
|
|
120
279
|
mapping, and Python requirements files (if any) in a repo.
|
|
121
280
|
"""
|
|
@@ -123,7 +282,10 @@ def transform(repos_json: List[Dict]) -> Dict:
|
|
|
123
282
|
transformed_repo_languages: List[Dict] = []
|
|
124
283
|
transformed_repo_owners: List[Dict] = []
|
|
125
284
|
# See https://docs.github.com/en/graphql/reference/enums#repositorypermission
|
|
126
|
-
|
|
285
|
+
transformed_outside_collaborators: Dict[str, List[Any]] = {
|
|
286
|
+
'ADMIN': [], 'MAINTAIN': [], 'READ': [], 'TRIAGE': [], 'WRITE': [],
|
|
287
|
+
}
|
|
288
|
+
transformed_direct_collaborators: Dict[str, List[Any]] = {
|
|
127
289
|
'ADMIN': [], 'MAINTAIN': [], 'READ': [], 'TRIAGE': [], 'WRITE': [],
|
|
128
290
|
}
|
|
129
291
|
transformed_requirements_files: List[Dict] = []
|
|
@@ -131,14 +293,30 @@ def transform(repos_json: List[Dict]) -> Dict:
|
|
|
131
293
|
_transform_repo_languages(repo_object['url'], repo_object, transformed_repo_languages)
|
|
132
294
|
_transform_repo_objects(repo_object, transformed_repo_list)
|
|
133
295
|
_transform_repo_owners(repo_object['owner']['url'], repo_object, transformed_repo_owners)
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
296
|
+
|
|
297
|
+
# Allow sync to continue if we didn't have permissions to list collaborators
|
|
298
|
+
repo_url = repo_object['url']
|
|
299
|
+
if repo_url in outside_collaborators:
|
|
300
|
+
_transform_collaborators(
|
|
301
|
+
repo_object['url'],
|
|
302
|
+
outside_collaborators[repo_object['url']],
|
|
303
|
+
transformed_outside_collaborators,
|
|
304
|
+
)
|
|
305
|
+
if repo_url in direct_collaborators:
|
|
306
|
+
_transform_collaborators(
|
|
307
|
+
repo_object['url'],
|
|
308
|
+
direct_collaborators[repo_object['url']],
|
|
309
|
+
transformed_direct_collaborators,
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
_transform_requirements_txt(repo_object['requirements'], repo_url, transformed_requirements_files)
|
|
313
|
+
_transform_setup_cfg_requirements(repo_object['setupCfg'], repo_url, transformed_requirements_files)
|
|
137
314
|
results = {
|
|
138
315
|
'repos': transformed_repo_list,
|
|
139
316
|
'repo_languages': transformed_repo_languages,
|
|
140
317
|
'repo_owners': transformed_repo_owners,
|
|
141
|
-
'
|
|
318
|
+
'repo_outside_collaborators': transformed_outside_collaborators,
|
|
319
|
+
'repo_direct_collaborators': transformed_direct_collaborators,
|
|
142
320
|
'python_requirements': transformed_requirements_files,
|
|
143
321
|
}
|
|
144
322
|
return results
|
|
@@ -229,11 +407,15 @@ def _transform_repo_languages(repo_url: str, repo: Dict, repo_languages: List[Di
|
|
|
229
407
|
})
|
|
230
408
|
|
|
231
409
|
|
|
232
|
-
def _transform_collaborators(
|
|
410
|
+
def _transform_collaborators(
|
|
411
|
+
repo_url: str, collaborators: List[UserAffiliationAndRepoPermission], transformed_collaborators: Dict,
|
|
412
|
+
) -> None:
|
|
233
413
|
"""
|
|
234
|
-
Performs data adjustments for
|
|
414
|
+
Performs data adjustments for collaborators in a GitHub repo.
|
|
235
415
|
Output data shape = [{permission, repo_url, url (the user's URL), login, name}, ...]
|
|
236
|
-
:param collaborators:
|
|
416
|
+
:param collaborators: For data shape, see
|
|
417
|
+
cartography.tests.data.github.repos.DIRECT_COLLABORATORS
|
|
418
|
+
cartography.tests.data.github.repos.OUTSIDE_COLLABORATORS
|
|
237
419
|
:param repo_url: The URL of the GitHub repo.
|
|
238
420
|
:param transformed_collaborators: Output dict. Data shape =
|
|
239
421
|
{'ADMIN': [{ user }, ...], 'MAINTAIN': [{ user }, ...], 'READ': [ ... ], 'TRIAGE': [ ... ], 'WRITE': [ ... ]}
|
|
@@ -241,10 +423,11 @@ def _transform_collaborators(collaborators: Dict, repo_url: str, transformed_col
|
|
|
241
423
|
"""
|
|
242
424
|
# `collaborators` is sometimes None
|
|
243
425
|
if collaborators:
|
|
244
|
-
for
|
|
245
|
-
|
|
426
|
+
for collaborator in collaborators:
|
|
427
|
+
user = collaborator.user
|
|
246
428
|
user['repo_url'] = repo_url
|
|
247
|
-
|
|
429
|
+
user['affiliation'] = collaborator.affiliation
|
|
430
|
+
transformed_collaborators[collaborator.permission].append(user)
|
|
248
431
|
|
|
249
432
|
|
|
250
433
|
def _transform_requirements_txt(
|
|
@@ -482,7 +665,7 @@ def load_github_owners(neo4j_session: neo4j.Session, update_tag: int, repo_owner
|
|
|
482
665
|
|
|
483
666
|
|
|
484
667
|
@timeit
|
|
485
|
-
def load_collaborators(neo4j_session: neo4j.Session, update_tag: int, collaborators: Dict) -> None:
|
|
668
|
+
def load_collaborators(neo4j_session: neo4j.Session, update_tag: int, collaborators: Dict, affiliation: str) -> None:
|
|
486
669
|
query = Template("""
|
|
487
670
|
UNWIND $UserData as user
|
|
488
671
|
|
|
@@ -502,7 +685,7 @@ def load_collaborators(neo4j_session: neo4j.Session, update_tag: int, collaborat
|
|
|
502
685
|
SET o.lastupdated = $UpdateTag
|
|
503
686
|
""")
|
|
504
687
|
for collab_type in collaborators.keys():
|
|
505
|
-
relationship_label = f"
|
|
688
|
+
relationship_label = f"{affiliation}_COLLAB_{collab_type}"
|
|
506
689
|
neo4j_session.run(
|
|
507
690
|
query.safe_substitute(rel_label=relationship_label),
|
|
508
691
|
UserData=collaborators[collab_type],
|
|
@@ -515,7 +698,12 @@ def load(neo4j_session: neo4j.Session, common_job_parameters: Dict, repo_data: D
|
|
|
515
698
|
load_github_repos(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repos'])
|
|
516
699
|
load_github_owners(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_owners'])
|
|
517
700
|
load_github_languages(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_languages'])
|
|
518
|
-
load_collaborators(
|
|
701
|
+
load_collaborators(
|
|
702
|
+
neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_direct_collaborators'], 'DIRECT',
|
|
703
|
+
)
|
|
704
|
+
load_collaborators(
|
|
705
|
+
neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_outside_collaborators'], 'OUTSIDE',
|
|
706
|
+
)
|
|
519
707
|
load_python_requirements(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['python_requirements'])
|
|
520
708
|
|
|
521
709
|
|
|
@@ -561,6 +749,18 @@ def sync(
|
|
|
561
749
|
"""
|
|
562
750
|
logger.info("Syncing GitHub repos")
|
|
563
751
|
repos_json = get(github_api_key, github_url, organization)
|
|
564
|
-
|
|
752
|
+
direct_collabs: dict[str, list[UserAffiliationAndRepoPermission]] = {}
|
|
753
|
+
outside_collabs: dict[str, list[UserAffiliationAndRepoPermission]] = {}
|
|
754
|
+
try:
|
|
755
|
+
direct_collabs = _get_repo_collaborators_for_multiple_repos(
|
|
756
|
+
repos_json, "DIRECT", organization, github_url, github_api_key,
|
|
757
|
+
)
|
|
758
|
+
outside_collabs = _get_repo_collaborators_for_multiple_repos(
|
|
759
|
+
repos_json, "OUTSIDE", organization, github_url, github_api_key,
|
|
760
|
+
)
|
|
761
|
+
except TypeError:
|
|
762
|
+
# due to permission errors or transient network error or some other nonsense
|
|
763
|
+
logger.warning('Unable to list repo collaborators due to permission errors; continuing on.', exc_info=True)
|
|
764
|
+
repo_data = transform(repos_json, direct_collabs, outside_collabs)
|
|
565
765
|
load(neo4j_session, common_job_parameters, repo_data)
|
|
566
766
|
run_cleanup_job('github_repos_cleanup.json', neo4j_session, common_job_parameters)
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from collections import namedtuple
|
|
3
|
-
from time import sleep
|
|
4
3
|
from typing import Any
|
|
5
4
|
from typing import Dict
|
|
6
5
|
from typing import List
|
|
@@ -13,11 +12,27 @@ from cartography.graph.job import GraphJob
|
|
|
13
12
|
from cartography.intel.github.util import fetch_all
|
|
14
13
|
from cartography.intel.github.util import PaginatedGraphqlData
|
|
15
14
|
from cartography.models.github.teams import GitHubTeamSchema
|
|
15
|
+
from cartography.util import retries_with_backoff
|
|
16
16
|
from cartography.util import timeit
|
|
17
17
|
|
|
18
18
|
logger = logging.getLogger(__name__)
|
|
19
19
|
|
|
20
|
+
# A team's permission on a repo: https://docs.github.com/en/graphql/reference/enums#repositorypermission
|
|
20
21
|
RepoPermission = namedtuple('RepoPermission', ['repo_url', 'permission'])
|
|
22
|
+
# A team member's role: https://docs.github.com/en/graphql/reference/enums#teammemberrole
|
|
23
|
+
UserRole = namedtuple('UserRole', ['user_url', 'role'])
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def backoff_handler(details: Dict) -> None:
|
|
27
|
+
"""
|
|
28
|
+
Custom backoff handler for GitHub calls in this module.
|
|
29
|
+
"""
|
|
30
|
+
team_name = details['kwargs'].get('team_name') or 'not present in kwargs'
|
|
31
|
+
updated_details = {**details, 'team_name': team_name}
|
|
32
|
+
logger.warning(
|
|
33
|
+
"Backing off {wait:0.1f} seconds after {tries} tries. Calling function {target} for team {team_name}"
|
|
34
|
+
.format(**updated_details),
|
|
35
|
+
)
|
|
21
36
|
|
|
22
37
|
|
|
23
38
|
@timeit
|
|
@@ -32,7 +47,10 @@ def get_teams(org: str, api_url: str, token: str) -> Tuple[PaginatedGraphqlData,
|
|
|
32
47
|
slug
|
|
33
48
|
url
|
|
34
49
|
description
|
|
35
|
-
repositories
|
|
50
|
+
repositories {
|
|
51
|
+
totalCount
|
|
52
|
+
}
|
|
53
|
+
members(membership: IMMEDIATE) {
|
|
36
54
|
totalCount
|
|
37
55
|
}
|
|
38
56
|
}
|
|
@@ -47,6 +65,26 @@ def get_teams(org: str, api_url: str, token: str) -> Tuple[PaginatedGraphqlData,
|
|
|
47
65
|
return fetch_all(token, api_url, org, org_teams_gql, 'teams')
|
|
48
66
|
|
|
49
67
|
|
|
68
|
+
def _get_teams_repos_inner_func(
|
|
69
|
+
org: str,
|
|
70
|
+
api_url: str,
|
|
71
|
+
token: str,
|
|
72
|
+
team_name: str,
|
|
73
|
+
repo_urls: list[str],
|
|
74
|
+
repo_permissions: list[str],
|
|
75
|
+
) -> None:
|
|
76
|
+
logger.info(f"Loading team repos for {team_name}.")
|
|
77
|
+
team_repos = _get_team_repos(org, api_url, token, team_name)
|
|
78
|
+
|
|
79
|
+
# The `or []` is because `.nodes` can be None. See:
|
|
80
|
+
# https://docs.github.com/en/graphql/reference/objects#teamrepositoryconnection
|
|
81
|
+
for repo in team_repos.nodes or []:
|
|
82
|
+
repo_urls.append(repo['url'])
|
|
83
|
+
# The `or []` is because `.edges` can be None.
|
|
84
|
+
for edge in team_repos.edges or []:
|
|
85
|
+
repo_permissions.append(edge['permission'])
|
|
86
|
+
|
|
87
|
+
|
|
50
88
|
@timeit
|
|
51
89
|
def _get_team_repos_for_multiple_teams(
|
|
52
90
|
team_raw_data: list[dict[str, Any]],
|
|
@@ -64,36 +102,22 @@ def _get_team_repos_for_multiple_teams(
|
|
|
64
102
|
result[team_name] = []
|
|
65
103
|
continue
|
|
66
104
|
|
|
67
|
-
repo_urls = []
|
|
68
|
-
repo_permissions = []
|
|
69
|
-
|
|
70
|
-
max_tries = 5
|
|
71
|
-
|
|
72
|
-
for current_try in range(1, max_tries + 1):
|
|
73
|
-
team_repos = _get_team_repos(org, api_url, token, team_name)
|
|
74
|
-
|
|
75
|
-
try:
|
|
76
|
-
# The `or []` is because `.nodes` can be None. See:
|
|
77
|
-
# https://docs.github.com/en/graphql/reference/objects#teamrepositoryconnection
|
|
78
|
-
for repo in team_repos.nodes or []:
|
|
79
|
-
repo_urls.append(repo['url'])
|
|
80
|
-
|
|
81
|
-
# The `or []` is because `.edges` can be None.
|
|
82
|
-
for edge in team_repos.edges or []:
|
|
83
|
-
repo_permissions.append(edge['permission'])
|
|
84
|
-
# We're done! Break out of the retry loop.
|
|
85
|
-
break
|
|
86
|
-
|
|
87
|
-
except TypeError:
|
|
88
|
-
# Handles issue #1334
|
|
89
|
-
logger.warning(
|
|
90
|
-
f"GitHub returned None when trying to find repo or permission data for team {team_name}.",
|
|
91
|
-
exc_info=True,
|
|
92
|
-
)
|
|
93
|
-
if current_try == max_tries:
|
|
94
|
-
raise RuntimeError(f"GitHub returned a None repo url for team {team_name}, retries exhausted.")
|
|
95
|
-
sleep(current_try ** 2)
|
|
105
|
+
repo_urls: List[str] = []
|
|
106
|
+
repo_permissions: List[str] = []
|
|
96
107
|
|
|
108
|
+
retries_with_backoff(
|
|
109
|
+
_get_teams_repos_inner_func,
|
|
110
|
+
TypeError,
|
|
111
|
+
5,
|
|
112
|
+
backoff_handler,
|
|
113
|
+
)(
|
|
114
|
+
org=org,
|
|
115
|
+
api_url=api_url,
|
|
116
|
+
token=token,
|
|
117
|
+
team_name=team_name,
|
|
118
|
+
repo_urls=repo_urls,
|
|
119
|
+
repo_permissions=repo_permissions,
|
|
120
|
+
)
|
|
97
121
|
# Shape = [(repo_url, 'WRITE'), ...]]
|
|
98
122
|
result[team_name] = [RepoPermission(url, perm) for url, perm in zip(repo_urls, repo_permissions)]
|
|
99
123
|
return result
|
|
@@ -142,10 +166,97 @@ def _get_team_repos(org: str, api_url: str, token: str, team: str) -> PaginatedG
|
|
|
142
166
|
return team_repos
|
|
143
167
|
|
|
144
168
|
|
|
169
|
+
def _get_team_users_for_multiple_teams(
|
|
170
|
+
team_raw_data: list[dict[str, Any]],
|
|
171
|
+
org: str,
|
|
172
|
+
api_url: str,
|
|
173
|
+
token: str,
|
|
174
|
+
) -> dict[str, list[UserRole]]:
|
|
175
|
+
result: dict[str, list[UserRole]] = {}
|
|
176
|
+
for team in team_raw_data:
|
|
177
|
+
team_name = team['slug']
|
|
178
|
+
user_count = team['members']['totalCount']
|
|
179
|
+
|
|
180
|
+
if user_count == 0:
|
|
181
|
+
# This team has no users so let's move on
|
|
182
|
+
result[team_name] = []
|
|
183
|
+
continue
|
|
184
|
+
|
|
185
|
+
user_urls: List[str] = []
|
|
186
|
+
user_roles: List[str] = []
|
|
187
|
+
|
|
188
|
+
def get_teams_users_inner_func(
|
|
189
|
+
org: str, api_url: str, token: str, team_name: str,
|
|
190
|
+
user_urls: List[str], user_roles: List[str],
|
|
191
|
+
) -> None:
|
|
192
|
+
logger.info(f"Loading team users for {team_name}.")
|
|
193
|
+
team_users = _get_team_users(org, api_url, token, team_name)
|
|
194
|
+
# The `or []` is because `.nodes` can be None. See:
|
|
195
|
+
# https://docs.github.com/en/graphql/reference/objects#teammemberconnection
|
|
196
|
+
for user in team_users.nodes or []:
|
|
197
|
+
user_urls.append(user['url'])
|
|
198
|
+
# The `or []` is because `.edges` can be None.
|
|
199
|
+
for edge in team_users.edges or []:
|
|
200
|
+
user_roles.append(edge['role'])
|
|
201
|
+
|
|
202
|
+
retries_with_backoff(get_teams_users_inner_func, TypeError, 5, backoff_handler)(
|
|
203
|
+
org=org, api_url=api_url, token=token, team_name=team_name, user_urls=user_urls, user_roles=user_roles,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
# Shape = [(user_url, 'MAINTAINER'), ...]]
|
|
207
|
+
result[team_name] = [UserRole(url, role) for url, role in zip(user_urls, user_roles)]
|
|
208
|
+
return result
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
@timeit
|
|
212
|
+
def _get_team_users(org: str, api_url: str, token: str, team: str) -> PaginatedGraphqlData:
|
|
213
|
+
team_users_gql = """
|
|
214
|
+
query($login: String!, $team: String!, $cursor: String) {
|
|
215
|
+
organization(login: $login) {
|
|
216
|
+
url
|
|
217
|
+
login
|
|
218
|
+
team(slug: $team) {
|
|
219
|
+
slug
|
|
220
|
+
members(first: 100, after: $cursor, membership: IMMEDIATE) {
|
|
221
|
+
totalCount
|
|
222
|
+
nodes {
|
|
223
|
+
url
|
|
224
|
+
}
|
|
225
|
+
edges {
|
|
226
|
+
role
|
|
227
|
+
}
|
|
228
|
+
pageInfo {
|
|
229
|
+
endCursor
|
|
230
|
+
hasNextPage
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
rateLimit {
|
|
236
|
+
limit
|
|
237
|
+
cost
|
|
238
|
+
remaining
|
|
239
|
+
resetAt
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
"""
|
|
243
|
+
team_users, _ = fetch_all(
|
|
244
|
+
token,
|
|
245
|
+
api_url,
|
|
246
|
+
org,
|
|
247
|
+
team_users_gql,
|
|
248
|
+
'team',
|
|
249
|
+
resource_inner_type='members',
|
|
250
|
+
team=team,
|
|
251
|
+
)
|
|
252
|
+
return team_users
|
|
253
|
+
|
|
254
|
+
|
|
145
255
|
def transform_teams(
|
|
146
256
|
team_paginated_data: PaginatedGraphqlData,
|
|
147
257
|
org_data: Dict[str, Any],
|
|
148
258
|
team_repo_data: dict[str, list[RepoPermission]],
|
|
259
|
+
team_user_data: dict[str, list[UserRole]],
|
|
149
260
|
) -> list[dict[str, Any]]:
|
|
150
261
|
result = []
|
|
151
262
|
for team in team_paginated_data.nodes:
|
|
@@ -155,19 +266,29 @@ def transform_teams(
|
|
|
155
266
|
'url': team['url'],
|
|
156
267
|
'description': team['description'],
|
|
157
268
|
'repo_count': team['repositories']['totalCount'],
|
|
269
|
+
'member_count': team['members']['totalCount'],
|
|
158
270
|
'org_url': org_data['url'],
|
|
159
271
|
'org_login': org_data['login'],
|
|
160
272
|
}
|
|
161
273
|
repo_permissions = team_repo_data[team_name]
|
|
162
|
-
|
|
274
|
+
user_roles = team_user_data[team_name]
|
|
275
|
+
|
|
276
|
+
if not repo_permissions and not user_roles:
|
|
163
277
|
result.append(repo_info)
|
|
164
278
|
continue
|
|
165
279
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
280
|
+
if repo_permissions:
|
|
281
|
+
# `permission` can be one of ADMIN, READ, WRITE, TRIAGE, or MAINTAIN
|
|
282
|
+
for repo_url, permission in repo_permissions:
|
|
283
|
+
repo_info_copy = repo_info.copy()
|
|
284
|
+
repo_info_copy[permission] = repo_url
|
|
285
|
+
result.append(repo_info_copy)
|
|
286
|
+
if user_roles:
|
|
287
|
+
# `role` can be one of MAINTAINER, MEMBER
|
|
288
|
+
for user_url, role in user_roles:
|
|
289
|
+
repo_info_copy = repo_info.copy()
|
|
290
|
+
repo_info_copy[role] = user_url
|
|
291
|
+
result.append(repo_info_copy)
|
|
171
292
|
return result
|
|
172
293
|
|
|
173
294
|
|
|
@@ -203,7 +324,8 @@ def sync_github_teams(
|
|
|
203
324
|
) -> None:
|
|
204
325
|
teams_paginated, org_data = get_teams(organization, github_url, github_api_key)
|
|
205
326
|
team_repos = _get_team_repos_for_multiple_teams(teams_paginated.nodes, organization, github_url, github_api_key)
|
|
206
|
-
|
|
327
|
+
team_users = _get_team_users_for_multiple_teams(teams_paginated.nodes, organization, github_url, github_api_key)
|
|
328
|
+
processed_data = transform_teams(teams_paginated, org_data, team_repos, team_users)
|
|
207
329
|
load_team_repos(neo4j_session, processed_data, common_job_parameters['UPDATE_TAG'], org_data['url'])
|
|
208
330
|
common_job_parameters['org_url'] = org_data['url']
|
|
209
331
|
cleanup(neo4j_session, common_job_parameters)
|
|
@@ -90,6 +90,7 @@ def get_users(token: str, api_url: str, organization: str) -> Tuple[List[Dict],
|
|
|
90
90
|
2. data on the owning GitHub organization
|
|
91
91
|
see tests.data.github.users.GITHUB_USER_DATA for shape of both
|
|
92
92
|
"""
|
|
93
|
+
logger.info(f"Retrieving users from GitHub organization {organization}")
|
|
93
94
|
users, org = fetch_all(
|
|
94
95
|
token,
|
|
95
96
|
api_url,
|
|
@@ -112,6 +113,7 @@ def get_enterprise_owners(token: str, api_url: str, organization: str) -> Tuple[
|
|
|
112
113
|
3. data on the owning GitHub organization
|
|
113
114
|
see tests.data.github.users.GITHUB_ENTERPRISE_OWNER_DATA for shape
|
|
114
115
|
"""
|
|
116
|
+
logger.info(f"Retrieving enterprise owners from GitHub organization {organization}")
|
|
115
117
|
owners, org = fetch_all(
|
|
116
118
|
token,
|
|
117
119
|
api_url,
|
cartography/intel/github/util.py
CHANGED
|
@@ -163,7 +163,8 @@ def fetch_all(
|
|
|
163
163
|
|
|
164
164
|
if retry >= retries:
|
|
165
165
|
logger.error(
|
|
166
|
-
f"GitHub: Could not retrieve page of resource `{resource_type}` due to HTTP error
|
|
166
|
+
f"GitHub: Could not retrieve page of resource `{resource_type}` due to HTTP error "
|
|
167
|
+
f"after {retry} retries. Raising exception.",
|
|
167
168
|
exc_info=True,
|
|
168
169
|
)
|
|
169
170
|
raise exc
|