cartography 0.96.0rc2__py3-none-any.whl → 0.96.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/client/core/tx.py +1 -1
- cartography/config.py +2 -2
- cartography/data/jobs/cleanup/github_repos_cleanup.json +25 -0
- cartography/intel/aws/apigateway.py +3 -3
- cartography/intel/aws/ec2/auto_scaling_groups.py +147 -185
- cartography/intel/aws/ec2/instances.py +2 -0
- cartography/intel/aws/ec2/network_acls.py +2 -1
- cartography/intel/aws/ec2/subnets.py +2 -0
- cartography/intel/aws/iam.py +4 -3
- cartography/intel/cve/__init__.py +1 -1
- cartography/intel/cve/feed.py +10 -7
- cartography/intel/github/repos.py +209 -27
- cartography/intel/github/teams.py +160 -38
- cartography/models/aws/ec2/auto_scaling_groups.py +204 -0
- cartography/models/aws/ec2/launch_configurations.py +55 -0
- cartography/models/aws/ec2/network_acl_rules.py +1 -0
- cartography/models/aws/identitycenter/__init__.py +0 -0
- cartography/models/aws/identitycenter/awsidentitycenter.py +44 -0
- cartography/models/aws/identitycenter/awspermissionset.py +84 -0
- cartography/models/aws/identitycenter/awsssouser.py +68 -0
- cartography/models/github/teams.py +29 -0
- cartography/util.py +22 -0
- cartography-0.96.1.dist-info/METADATA +53 -0
- {cartography-0.96.0rc2.dist-info → cartography-0.96.1.dist-info}/RECORD +28 -22
- {cartography-0.96.0rc2.dist-info → cartography-0.96.1.dist-info}/WHEEL +1 -1
- cartography-0.96.0rc2.dist-info/METADATA +0 -53
- {cartography-0.96.0rc2.dist-info → cartography-0.96.1.dist-info}/LICENSE +0 -0
- {cartography-0.96.0rc2.dist-info → cartography-0.96.1.dist-info}/entry_points.txt +0 -0
- {cartography-0.96.0rc2.dist-info → cartography-0.96.1.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import configparser
|
|
2
2
|
import logging
|
|
3
|
+
from collections import namedtuple
|
|
3
4
|
from string import Template
|
|
4
5
|
from typing import Any
|
|
5
6
|
from typing import Dict
|
|
@@ -12,11 +13,28 @@ from packaging.requirements import Requirement
|
|
|
12
13
|
from packaging.utils import canonicalize_name
|
|
13
14
|
|
|
14
15
|
from cartography.intel.github.util import fetch_all
|
|
16
|
+
from cartography.intel.github.util import PaginatedGraphqlData
|
|
17
|
+
from cartography.util import backoff_handler
|
|
18
|
+
from cartography.util import retries_with_backoff
|
|
15
19
|
from cartography.util import run_cleanup_job
|
|
16
20
|
from cartography.util import timeit
|
|
17
21
|
|
|
18
22
|
logger = logging.getLogger(__name__)
|
|
19
23
|
|
|
24
|
+
|
|
25
|
+
# Representation of a user's permission level and affiliation to a GitHub repo. See:
|
|
26
|
+
# - Permission: https://docs.github.com/en/graphql/reference/enums#repositorypermission
|
|
27
|
+
# - Affiliation: https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation
|
|
28
|
+
UserAffiliationAndRepoPermission = namedtuple(
|
|
29
|
+
'UserAffiliationAndRepoPermission',
|
|
30
|
+
[
|
|
31
|
+
'user', # Dict
|
|
32
|
+
'permission', # 'WRITE', 'MAINTAIN', 'ADMIN', etc
|
|
33
|
+
'affiliation', # 'OUTSIDE', 'DIRECT'
|
|
34
|
+
],
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
20
38
|
GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
|
|
21
39
|
query($login: String!, $cursor: String) {
|
|
22
40
|
organization(login: $login)
|
|
@@ -59,17 +77,11 @@ GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
|
|
|
59
77
|
login
|
|
60
78
|
__typename
|
|
61
79
|
}
|
|
62
|
-
collaborators(
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
url
|
|
68
|
-
login
|
|
69
|
-
name
|
|
70
|
-
email
|
|
71
|
-
company
|
|
72
|
-
}
|
|
80
|
+
directCollaborators: collaborators(first: 100, affiliation: DIRECT) {
|
|
81
|
+
totalCount
|
|
82
|
+
}
|
|
83
|
+
outsideCollaborators: collaborators(first: 100, affiliation: OUTSIDE) {
|
|
84
|
+
totalCount
|
|
73
85
|
}
|
|
74
86
|
requirements:object(expression: "HEAD:requirements.txt") {
|
|
75
87
|
... on Blob {
|
|
@@ -89,6 +101,142 @@ GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
|
|
|
89
101
|
# Note: In the above query, `HEAD` references the default branch.
|
|
90
102
|
# See https://stackoverflow.com/questions/48935381/github-graphql-api-default-branch-in-repository
|
|
91
103
|
|
|
104
|
+
GITHUB_REPO_COLLABS_PAGINATED_GRAPHQL = """
|
|
105
|
+
query($login: String!, $repo: String!, $affiliation: CollaboratorAffiliation!, $cursor: String) {
|
|
106
|
+
organization(login: $login) {
|
|
107
|
+
url
|
|
108
|
+
login
|
|
109
|
+
repository(name: $repo){
|
|
110
|
+
name
|
|
111
|
+
collaborators(first: 50, affiliation: $affiliation, after: $cursor) {
|
|
112
|
+
edges {
|
|
113
|
+
permission
|
|
114
|
+
}
|
|
115
|
+
nodes {
|
|
116
|
+
url
|
|
117
|
+
login
|
|
118
|
+
name
|
|
119
|
+
email
|
|
120
|
+
company
|
|
121
|
+
}
|
|
122
|
+
pageInfo{
|
|
123
|
+
endCursor
|
|
124
|
+
hasNextPage
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
rateLimit {
|
|
130
|
+
limit
|
|
131
|
+
cost
|
|
132
|
+
remaining
|
|
133
|
+
resetAt
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _get_repo_collaborators_inner_func(
|
|
140
|
+
org: str,
|
|
141
|
+
api_url: str,
|
|
142
|
+
token: str,
|
|
143
|
+
repo_name: str,
|
|
144
|
+
affiliation: str,
|
|
145
|
+
collab_users: list[dict[str, Any]],
|
|
146
|
+
collab_permission: list[str],
|
|
147
|
+
) -> None:
|
|
148
|
+
logger.info(f"Loading {affiliation} collaborators for repo {repo_name}.")
|
|
149
|
+
collaborators = _get_repo_collaborators(token, api_url, org, repo_name, affiliation)
|
|
150
|
+
|
|
151
|
+
# nodes and edges are expected to always be present given that we only call for them if totalCount is > 0
|
|
152
|
+
# however sometimes GitHub returns None, as in issue 1334 and 1404.
|
|
153
|
+
for collab in collaborators.nodes or []:
|
|
154
|
+
collab_users.append(collab)
|
|
155
|
+
|
|
156
|
+
# The `or []` is because `.edges` can be None.
|
|
157
|
+
for perm in collaborators.edges or []:
|
|
158
|
+
collab_permission.append(perm['permission'])
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _get_repo_collaborators_for_multiple_repos(
|
|
162
|
+
repo_raw_data: list[dict[str, Any]],
|
|
163
|
+
affiliation: str,
|
|
164
|
+
org: str,
|
|
165
|
+
api_url: str,
|
|
166
|
+
token: str,
|
|
167
|
+
) -> dict[str, list[UserAffiliationAndRepoPermission]]:
|
|
168
|
+
"""
|
|
169
|
+
For every repo in the given list, retrieve the collaborators.
|
|
170
|
+
:param repo_raw_data: A list of dicts representing repos. See tests.data.github.repos.GET_REPOS for data shape.
|
|
171
|
+
:param affiliation: The type of affiliation to retrieve collaborators for. Either 'DIRECT' or 'OUTSIDE'.
|
|
172
|
+
See https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation
|
|
173
|
+
:param org: The name of the target Github organization as string.
|
|
174
|
+
:param api_url: The Github v4 API endpoint as string.
|
|
175
|
+
:param token: The Github API token as string.
|
|
176
|
+
:return: A dictionary of repo URL to list of UserAffiliationAndRepoPermission
|
|
177
|
+
"""
|
|
178
|
+
result: dict[str, list[UserAffiliationAndRepoPermission]] = {}
|
|
179
|
+
for repo in repo_raw_data:
|
|
180
|
+
repo_name = repo['name']
|
|
181
|
+
repo_url = repo['url']
|
|
182
|
+
|
|
183
|
+
if ((affiliation == 'OUTSIDE' and repo['outsideCollaborators']['totalCount'] == 0) or
|
|
184
|
+
(affiliation == 'DIRECT' and repo['directCollaborators']['totalCount'] == 0)):
|
|
185
|
+
# repo has no collabs of the affiliation type we're looking for, so don't waste time making an API call
|
|
186
|
+
result[repo_url] = []
|
|
187
|
+
continue
|
|
188
|
+
|
|
189
|
+
collab_users: List[dict[str, Any]] = []
|
|
190
|
+
collab_permission: List[str] = []
|
|
191
|
+
|
|
192
|
+
retries_with_backoff(
|
|
193
|
+
_get_repo_collaborators_inner_func,
|
|
194
|
+
TypeError,
|
|
195
|
+
5,
|
|
196
|
+
backoff_handler,
|
|
197
|
+
)(
|
|
198
|
+
org=org,
|
|
199
|
+
api_url=api_url,
|
|
200
|
+
token=token,
|
|
201
|
+
repo_name=repo_name,
|
|
202
|
+
affiliation=affiliation,
|
|
203
|
+
collab_users=collab_users,
|
|
204
|
+
collab_permission=collab_permission,
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
result[repo_url] = [
|
|
208
|
+
UserAffiliationAndRepoPermission(user, permission, affiliation)
|
|
209
|
+
for user, permission in zip(collab_users, collab_permission)
|
|
210
|
+
]
|
|
211
|
+
return result
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def _get_repo_collaborators(
|
|
215
|
+
token: str, api_url: str, organization: str, repo: str, affiliation: str,
|
|
216
|
+
) -> PaginatedGraphqlData:
|
|
217
|
+
"""
|
|
218
|
+
Retrieve a list of collaborators for a given repository, as described in
|
|
219
|
+
https://docs.github.com/en/graphql/reference/objects#repositorycollaboratorconnection.
|
|
220
|
+
:param token: The Github API token as string.
|
|
221
|
+
:param api_url: The Github v4 API endpoint as string.
|
|
222
|
+
:param organization: The name of the target Github organization as string.
|
|
223
|
+
:pram repo: The name of the target Github repository as string.
|
|
224
|
+
:param affiliation: The type of affiliation to retrieve collaborators for. Either 'DIRECT' or 'OUTSIDE'.
|
|
225
|
+
See https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation
|
|
226
|
+
:return: A list of dicts representing repos. See tests.data.github.repos for data shape.
|
|
227
|
+
"""
|
|
228
|
+
collaborators, _ = fetch_all(
|
|
229
|
+
token,
|
|
230
|
+
api_url,
|
|
231
|
+
organization,
|
|
232
|
+
GITHUB_REPO_COLLABS_PAGINATED_GRAPHQL,
|
|
233
|
+
'repository',
|
|
234
|
+
resource_inner_type='collaborators',
|
|
235
|
+
repo=repo,
|
|
236
|
+
affiliation=affiliation,
|
|
237
|
+
)
|
|
238
|
+
return collaborators
|
|
239
|
+
|
|
92
240
|
|
|
93
241
|
@timeit
|
|
94
242
|
def get(token: str, api_url: str, organization: str) -> List[Dict]:
|
|
@@ -111,11 +259,18 @@ def get(token: str, api_url: str, organization: str) -> List[Dict]:
|
|
|
111
259
|
return repos.nodes
|
|
112
260
|
|
|
113
261
|
|
|
114
|
-
def transform(
|
|
262
|
+
def transform(
|
|
263
|
+
repos_json: List[Dict], direct_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
|
|
264
|
+
outside_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
|
|
265
|
+
) -> Dict:
|
|
115
266
|
"""
|
|
116
267
|
Parses the JSON returned from GitHub API to create data for graph ingestion
|
|
117
|
-
:param repos_json: the list of individual repository nodes from GitHub.
|
|
118
|
-
|
|
268
|
+
:param repos_json: the list of individual repository nodes from GitHub.
|
|
269
|
+
See tests.data.github.repos.GET_REPOS for data shape.
|
|
270
|
+
:param direct_collaborators: dict of repo URL to list of direct collaborators.
|
|
271
|
+
See tests.data.github.repos.DIRECT_COLLABORATORS for data shape.
|
|
272
|
+
:param outside_collaborators: dict of repo URL to list of outside collaborators.
|
|
273
|
+
See tests.data.github.repos.OUTSIDE_COLLABORATORS for data shape.
|
|
119
274
|
:return: Dict containing the repos, repo->language mapping, owners->repo mapping, outside collaborators->repo
|
|
120
275
|
mapping, and Python requirements files (if any) in a repo.
|
|
121
276
|
"""
|
|
@@ -123,7 +278,10 @@ def transform(repos_json: List[Dict]) -> Dict:
|
|
|
123
278
|
transformed_repo_languages: List[Dict] = []
|
|
124
279
|
transformed_repo_owners: List[Dict] = []
|
|
125
280
|
# See https://docs.github.com/en/graphql/reference/enums#repositorypermission
|
|
126
|
-
|
|
281
|
+
transformed_outside_collaborators: Dict[str, List[Any]] = {
|
|
282
|
+
'ADMIN': [], 'MAINTAIN': [], 'READ': [], 'TRIAGE': [], 'WRITE': [],
|
|
283
|
+
}
|
|
284
|
+
transformed_direct_collaborators: Dict[str, List[Any]] = {
|
|
127
285
|
'ADMIN': [], 'MAINTAIN': [], 'READ': [], 'TRIAGE': [], 'WRITE': [],
|
|
128
286
|
}
|
|
129
287
|
transformed_requirements_files: List[Dict] = []
|
|
@@ -131,14 +289,22 @@ def transform(repos_json: List[Dict]) -> Dict:
|
|
|
131
289
|
_transform_repo_languages(repo_object['url'], repo_object, transformed_repo_languages)
|
|
132
290
|
_transform_repo_objects(repo_object, transformed_repo_list)
|
|
133
291
|
_transform_repo_owners(repo_object['owner']['url'], repo_object, transformed_repo_owners)
|
|
134
|
-
_transform_collaborators(
|
|
292
|
+
_transform_collaborators(
|
|
293
|
+
repo_object['url'], outside_collaborators[repo_object['url']],
|
|
294
|
+
transformed_outside_collaborators,
|
|
295
|
+
)
|
|
296
|
+
_transform_collaborators(
|
|
297
|
+
repo_object['url'], direct_collaborators[repo_object['url']],
|
|
298
|
+
transformed_direct_collaborators,
|
|
299
|
+
)
|
|
135
300
|
_transform_requirements_txt(repo_object['requirements'], repo_object['url'], transformed_requirements_files)
|
|
136
301
|
_transform_setup_cfg_requirements(repo_object['setupCfg'], repo_object['url'], transformed_requirements_files)
|
|
137
302
|
results = {
|
|
138
303
|
'repos': transformed_repo_list,
|
|
139
304
|
'repo_languages': transformed_repo_languages,
|
|
140
305
|
'repo_owners': transformed_repo_owners,
|
|
141
|
-
'
|
|
306
|
+
'repo_outside_collaborators': transformed_outside_collaborators,
|
|
307
|
+
'repo_direct_collaborators': transformed_direct_collaborators,
|
|
142
308
|
'python_requirements': transformed_requirements_files,
|
|
143
309
|
}
|
|
144
310
|
return results
|
|
@@ -229,11 +395,15 @@ def _transform_repo_languages(repo_url: str, repo: Dict, repo_languages: List[Di
|
|
|
229
395
|
})
|
|
230
396
|
|
|
231
397
|
|
|
232
|
-
def _transform_collaborators(
|
|
398
|
+
def _transform_collaborators(
|
|
399
|
+
repo_url: str, collaborators: List[UserAffiliationAndRepoPermission], transformed_collaborators: Dict,
|
|
400
|
+
) -> None:
|
|
233
401
|
"""
|
|
234
|
-
Performs data adjustments for
|
|
402
|
+
Performs data adjustments for collaborators in a GitHub repo.
|
|
235
403
|
Output data shape = [{permission, repo_url, url (the user's URL), login, name}, ...]
|
|
236
|
-
:param collaborators:
|
|
404
|
+
:param collaborators: For data shape, see
|
|
405
|
+
cartography.tests.data.github.repos.DIRECT_COLLABORATORS
|
|
406
|
+
cartography.tests.data.github.repos.OUTSIDE_COLLABORATORS
|
|
237
407
|
:param repo_url: The URL of the GitHub repo.
|
|
238
408
|
:param transformed_collaborators: Output dict. Data shape =
|
|
239
409
|
{'ADMIN': [{ user }, ...], 'MAINTAIN': [{ user }, ...], 'READ': [ ... ], 'TRIAGE': [ ... ], 'WRITE': [ ... ]}
|
|
@@ -241,10 +411,11 @@ def _transform_collaborators(collaborators: Dict, repo_url: str, transformed_col
|
|
|
241
411
|
"""
|
|
242
412
|
# `collaborators` is sometimes None
|
|
243
413
|
if collaborators:
|
|
244
|
-
for
|
|
245
|
-
|
|
414
|
+
for collaborator in collaborators:
|
|
415
|
+
user = collaborator.user
|
|
246
416
|
user['repo_url'] = repo_url
|
|
247
|
-
|
|
417
|
+
user['affiliation'] = collaborator.affiliation
|
|
418
|
+
transformed_collaborators[collaborator.permission].append(user)
|
|
248
419
|
|
|
249
420
|
|
|
250
421
|
def _transform_requirements_txt(
|
|
@@ -482,7 +653,7 @@ def load_github_owners(neo4j_session: neo4j.Session, update_tag: int, repo_owner
|
|
|
482
653
|
|
|
483
654
|
|
|
484
655
|
@timeit
|
|
485
|
-
def load_collaborators(neo4j_session: neo4j.Session, update_tag: int, collaborators: Dict) -> None:
|
|
656
|
+
def load_collaborators(neo4j_session: neo4j.Session, update_tag: int, collaborators: Dict, affiliation: str) -> None:
|
|
486
657
|
query = Template("""
|
|
487
658
|
UNWIND $UserData as user
|
|
488
659
|
|
|
@@ -502,7 +673,7 @@ def load_collaborators(neo4j_session: neo4j.Session, update_tag: int, collaborat
|
|
|
502
673
|
SET o.lastupdated = $UpdateTag
|
|
503
674
|
""")
|
|
504
675
|
for collab_type in collaborators.keys():
|
|
505
|
-
relationship_label = f"
|
|
676
|
+
relationship_label = f"{affiliation}_COLLAB_{collab_type}"
|
|
506
677
|
neo4j_session.run(
|
|
507
678
|
query.safe_substitute(rel_label=relationship_label),
|
|
508
679
|
UserData=collaborators[collab_type],
|
|
@@ -515,7 +686,12 @@ def load(neo4j_session: neo4j.Session, common_job_parameters: Dict, repo_data: D
|
|
|
515
686
|
load_github_repos(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repos'])
|
|
516
687
|
load_github_owners(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_owners'])
|
|
517
688
|
load_github_languages(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_languages'])
|
|
518
|
-
load_collaborators(
|
|
689
|
+
load_collaborators(
|
|
690
|
+
neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_direct_collaborators'], 'DIRECT',
|
|
691
|
+
)
|
|
692
|
+
load_collaborators(
|
|
693
|
+
neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_outside_collaborators'], 'OUTSIDE',
|
|
694
|
+
)
|
|
519
695
|
load_python_requirements(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['python_requirements'])
|
|
520
696
|
|
|
521
697
|
|
|
@@ -561,6 +737,12 @@ def sync(
|
|
|
561
737
|
"""
|
|
562
738
|
logger.info("Syncing GitHub repos")
|
|
563
739
|
repos_json = get(github_api_key, github_url, organization)
|
|
564
|
-
|
|
740
|
+
direct_collabs = _get_repo_collaborators_for_multiple_repos(
|
|
741
|
+
repos_json, "DIRECT", organization, github_url, github_api_key,
|
|
742
|
+
)
|
|
743
|
+
outside_collabs = _get_repo_collaborators_for_multiple_repos(
|
|
744
|
+
repos_json, "OUTSIDE", organization, github_url, github_api_key,
|
|
745
|
+
)
|
|
746
|
+
repo_data = transform(repos_json, direct_collabs, outside_collabs)
|
|
565
747
|
load(neo4j_session, common_job_parameters, repo_data)
|
|
566
748
|
run_cleanup_job('github_repos_cleanup.json', neo4j_session, common_job_parameters)
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from collections import namedtuple
|
|
3
|
-
from time import sleep
|
|
4
3
|
from typing import Any
|
|
5
4
|
from typing import Dict
|
|
6
5
|
from typing import List
|
|
@@ -13,11 +12,27 @@ from cartography.graph.job import GraphJob
|
|
|
13
12
|
from cartography.intel.github.util import fetch_all
|
|
14
13
|
from cartography.intel.github.util import PaginatedGraphqlData
|
|
15
14
|
from cartography.models.github.teams import GitHubTeamSchema
|
|
15
|
+
from cartography.util import retries_with_backoff
|
|
16
16
|
from cartography.util import timeit
|
|
17
17
|
|
|
18
18
|
logger = logging.getLogger(__name__)
|
|
19
19
|
|
|
20
|
+
# A team's permission on a repo: https://docs.github.com/en/graphql/reference/enums#repositorypermission
|
|
20
21
|
RepoPermission = namedtuple('RepoPermission', ['repo_url', 'permission'])
|
|
22
|
+
# A team member's role: https://docs.github.com/en/graphql/reference/enums#teammemberrole
|
|
23
|
+
UserRole = namedtuple('UserRole', ['user_url', 'role'])
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def backoff_handler(details: Dict) -> None:
|
|
27
|
+
"""
|
|
28
|
+
Custom backoff handler for GitHub calls in this module.
|
|
29
|
+
"""
|
|
30
|
+
team_name = details['kwargs'].get('team_name') or 'not present in kwargs'
|
|
31
|
+
updated_details = {**details, 'team_name': team_name}
|
|
32
|
+
logger.warning(
|
|
33
|
+
"Backing off {wait:0.1f} seconds after {tries} tries. Calling function {target} for team {team_name}"
|
|
34
|
+
.format(**updated_details),
|
|
35
|
+
)
|
|
21
36
|
|
|
22
37
|
|
|
23
38
|
@timeit
|
|
@@ -32,7 +47,10 @@ def get_teams(org: str, api_url: str, token: str) -> Tuple[PaginatedGraphqlData,
|
|
|
32
47
|
slug
|
|
33
48
|
url
|
|
34
49
|
description
|
|
35
|
-
repositories
|
|
50
|
+
repositories {
|
|
51
|
+
totalCount
|
|
52
|
+
}
|
|
53
|
+
members(membership: IMMEDIATE) {
|
|
36
54
|
totalCount
|
|
37
55
|
}
|
|
38
56
|
}
|
|
@@ -47,6 +65,26 @@ def get_teams(org: str, api_url: str, token: str) -> Tuple[PaginatedGraphqlData,
|
|
|
47
65
|
return fetch_all(token, api_url, org, org_teams_gql, 'teams')
|
|
48
66
|
|
|
49
67
|
|
|
68
|
+
def _get_teams_repos_inner_func(
|
|
69
|
+
org: str,
|
|
70
|
+
api_url: str,
|
|
71
|
+
token: str,
|
|
72
|
+
team_name: str,
|
|
73
|
+
repo_urls: list[str],
|
|
74
|
+
repo_permissions: list[str],
|
|
75
|
+
) -> None:
|
|
76
|
+
logger.info(f"Loading team repos for {team_name}.")
|
|
77
|
+
team_repos = _get_team_repos(org, api_url, token, team_name)
|
|
78
|
+
|
|
79
|
+
# The `or []` is because `.nodes` can be None. See:
|
|
80
|
+
# https://docs.github.com/en/graphql/reference/objects#teamrepositoryconnection
|
|
81
|
+
for repo in team_repos.nodes or []:
|
|
82
|
+
repo_urls.append(repo['url'])
|
|
83
|
+
# The `or []` is because `.edges` can be None.
|
|
84
|
+
for edge in team_repos.edges or []:
|
|
85
|
+
repo_permissions.append(edge['permission'])
|
|
86
|
+
|
|
87
|
+
|
|
50
88
|
@timeit
|
|
51
89
|
def _get_team_repos_for_multiple_teams(
|
|
52
90
|
team_raw_data: list[dict[str, Any]],
|
|
@@ -64,36 +102,22 @@ def _get_team_repos_for_multiple_teams(
|
|
|
64
102
|
result[team_name] = []
|
|
65
103
|
continue
|
|
66
104
|
|
|
67
|
-
repo_urls = []
|
|
68
|
-
repo_permissions = []
|
|
69
|
-
|
|
70
|
-
max_tries = 5
|
|
71
|
-
|
|
72
|
-
for current_try in range(1, max_tries + 1):
|
|
73
|
-
team_repos = _get_team_repos(org, api_url, token, team_name)
|
|
74
|
-
|
|
75
|
-
try:
|
|
76
|
-
# The `or []` is because `.nodes` can be None. See:
|
|
77
|
-
# https://docs.github.com/en/graphql/reference/objects#teamrepositoryconnection
|
|
78
|
-
for repo in team_repos.nodes or []:
|
|
79
|
-
repo_urls.append(repo['url'])
|
|
80
|
-
|
|
81
|
-
# The `or []` is because `.edges` can be None.
|
|
82
|
-
for edge in team_repos.edges or []:
|
|
83
|
-
repo_permissions.append(edge['permission'])
|
|
84
|
-
# We're done! Break out of the retry loop.
|
|
85
|
-
break
|
|
86
|
-
|
|
87
|
-
except TypeError:
|
|
88
|
-
# Handles issue #1334
|
|
89
|
-
logger.warning(
|
|
90
|
-
f"GitHub returned None when trying to find repo or permission data for team {team_name}.",
|
|
91
|
-
exc_info=True,
|
|
92
|
-
)
|
|
93
|
-
if current_try == max_tries:
|
|
94
|
-
raise RuntimeError(f"GitHub returned a None repo url for team {team_name}, retries exhausted.")
|
|
95
|
-
sleep(current_try ** 2)
|
|
105
|
+
repo_urls: List[str] = []
|
|
106
|
+
repo_permissions: List[str] = []
|
|
96
107
|
|
|
108
|
+
retries_with_backoff(
|
|
109
|
+
_get_teams_repos_inner_func,
|
|
110
|
+
TypeError,
|
|
111
|
+
5,
|
|
112
|
+
backoff_handler,
|
|
113
|
+
)(
|
|
114
|
+
org=org,
|
|
115
|
+
api_url=api_url,
|
|
116
|
+
token=token,
|
|
117
|
+
team_name=team_name,
|
|
118
|
+
repo_urls=repo_urls,
|
|
119
|
+
repo_permissions=repo_permissions,
|
|
120
|
+
)
|
|
97
121
|
# Shape = [(repo_url, 'WRITE'), ...]]
|
|
98
122
|
result[team_name] = [RepoPermission(url, perm) for url, perm in zip(repo_urls, repo_permissions)]
|
|
99
123
|
return result
|
|
@@ -142,10 +166,97 @@ def _get_team_repos(org: str, api_url: str, token: str, team: str) -> PaginatedG
|
|
|
142
166
|
return team_repos
|
|
143
167
|
|
|
144
168
|
|
|
169
|
+
def _get_team_users_for_multiple_teams(
|
|
170
|
+
team_raw_data: list[dict[str, Any]],
|
|
171
|
+
org: str,
|
|
172
|
+
api_url: str,
|
|
173
|
+
token: str,
|
|
174
|
+
) -> dict[str, list[UserRole]]:
|
|
175
|
+
result: dict[str, list[UserRole]] = {}
|
|
176
|
+
for team in team_raw_data:
|
|
177
|
+
team_name = team['slug']
|
|
178
|
+
user_count = team['members']['totalCount']
|
|
179
|
+
|
|
180
|
+
if user_count == 0:
|
|
181
|
+
# This team has no users so let's move on
|
|
182
|
+
result[team_name] = []
|
|
183
|
+
continue
|
|
184
|
+
|
|
185
|
+
user_urls: List[str] = []
|
|
186
|
+
user_roles: List[str] = []
|
|
187
|
+
|
|
188
|
+
def get_teams_users_inner_func(
|
|
189
|
+
org: str, api_url: str, token: str, team_name: str,
|
|
190
|
+
user_urls: List[str], user_roles: List[str],
|
|
191
|
+
) -> None:
|
|
192
|
+
logger.info(f"Loading team users for {team_name}.")
|
|
193
|
+
team_users = _get_team_users(org, api_url, token, team_name)
|
|
194
|
+
# The `or []` is because `.nodes` can be None. See:
|
|
195
|
+
# https://docs.github.com/en/graphql/reference/objects#teammemberconnection
|
|
196
|
+
for user in team_users.nodes or []:
|
|
197
|
+
user_urls.append(user['url'])
|
|
198
|
+
# The `or []` is because `.edges` can be None.
|
|
199
|
+
for edge in team_users.edges or []:
|
|
200
|
+
user_roles.append(edge['role'])
|
|
201
|
+
|
|
202
|
+
retries_with_backoff(get_teams_users_inner_func, TypeError, 5, backoff_handler)(
|
|
203
|
+
org=org, api_url=api_url, token=token, team_name=team_name, user_urls=user_urls, user_roles=user_roles,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
# Shape = [(user_url, 'MAINTAINER'), ...]]
|
|
207
|
+
result[team_name] = [UserRole(url, role) for url, role in zip(user_urls, user_roles)]
|
|
208
|
+
return result
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
@timeit
|
|
212
|
+
def _get_team_users(org: str, api_url: str, token: str, team: str) -> PaginatedGraphqlData:
|
|
213
|
+
team_users_gql = """
|
|
214
|
+
query($login: String!, $team: String!, $cursor: String) {
|
|
215
|
+
organization(login: $login) {
|
|
216
|
+
url
|
|
217
|
+
login
|
|
218
|
+
team(slug: $team) {
|
|
219
|
+
slug
|
|
220
|
+
members(first: 100, after: $cursor, membership: IMMEDIATE) {
|
|
221
|
+
totalCount
|
|
222
|
+
nodes {
|
|
223
|
+
url
|
|
224
|
+
}
|
|
225
|
+
edges {
|
|
226
|
+
role
|
|
227
|
+
}
|
|
228
|
+
pageInfo {
|
|
229
|
+
endCursor
|
|
230
|
+
hasNextPage
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
rateLimit {
|
|
236
|
+
limit
|
|
237
|
+
cost
|
|
238
|
+
remaining
|
|
239
|
+
resetAt
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
"""
|
|
243
|
+
team_users, _ = fetch_all(
|
|
244
|
+
token,
|
|
245
|
+
api_url,
|
|
246
|
+
org,
|
|
247
|
+
team_users_gql,
|
|
248
|
+
'team',
|
|
249
|
+
resource_inner_type='members',
|
|
250
|
+
team=team,
|
|
251
|
+
)
|
|
252
|
+
return team_users
|
|
253
|
+
|
|
254
|
+
|
|
145
255
|
def transform_teams(
|
|
146
256
|
team_paginated_data: PaginatedGraphqlData,
|
|
147
257
|
org_data: Dict[str, Any],
|
|
148
258
|
team_repo_data: dict[str, list[RepoPermission]],
|
|
259
|
+
team_user_data: dict[str, list[UserRole]],
|
|
149
260
|
) -> list[dict[str, Any]]:
|
|
150
261
|
result = []
|
|
151
262
|
for team in team_paginated_data.nodes:
|
|
@@ -155,19 +266,29 @@ def transform_teams(
|
|
|
155
266
|
'url': team['url'],
|
|
156
267
|
'description': team['description'],
|
|
157
268
|
'repo_count': team['repositories']['totalCount'],
|
|
269
|
+
'member_count': team['members']['totalCount'],
|
|
158
270
|
'org_url': org_data['url'],
|
|
159
271
|
'org_login': org_data['login'],
|
|
160
272
|
}
|
|
161
273
|
repo_permissions = team_repo_data[team_name]
|
|
162
|
-
|
|
274
|
+
user_roles = team_user_data[team_name]
|
|
275
|
+
|
|
276
|
+
if not repo_permissions and not user_roles:
|
|
163
277
|
result.append(repo_info)
|
|
164
278
|
continue
|
|
165
279
|
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
280
|
+
if repo_permissions:
|
|
281
|
+
# `permission` can be one of ADMIN, READ, WRITE, TRIAGE, or MAINTAIN
|
|
282
|
+
for repo_url, permission in repo_permissions:
|
|
283
|
+
repo_info_copy = repo_info.copy()
|
|
284
|
+
repo_info_copy[permission] = repo_url
|
|
285
|
+
result.append(repo_info_copy)
|
|
286
|
+
if user_roles:
|
|
287
|
+
# `role` can be one of MAINTAINER, MEMBER
|
|
288
|
+
for user_url, role in user_roles:
|
|
289
|
+
repo_info_copy = repo_info.copy()
|
|
290
|
+
repo_info_copy[role] = user_url
|
|
291
|
+
result.append(repo_info_copy)
|
|
171
292
|
return result
|
|
172
293
|
|
|
173
294
|
|
|
@@ -203,7 +324,8 @@ def sync_github_teams(
|
|
|
203
324
|
) -> None:
|
|
204
325
|
teams_paginated, org_data = get_teams(organization, github_url, github_api_key)
|
|
205
326
|
team_repos = _get_team_repos_for_multiple_teams(teams_paginated.nodes, organization, github_url, github_api_key)
|
|
206
|
-
|
|
327
|
+
team_users = _get_team_users_for_multiple_teams(teams_paginated.nodes, organization, github_url, github_api_key)
|
|
328
|
+
processed_data = transform_teams(teams_paginated, org_data, team_repos, team_users)
|
|
207
329
|
load_team_repos(neo4j_session, processed_data, common_job_parameters['UPDATE_TAG'], org_data['url'])
|
|
208
330
|
common_job_parameters['org_url'] = org_data['url']
|
|
209
331
|
cleanup(neo4j_session, common_job_parameters)
|