cartography 0.96.0rc3__py3-none-any.whl → 0.96.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

@@ -1,5 +1,6 @@
1
1
  import configparser
2
2
  import logging
3
+ from collections import namedtuple
3
4
  from string import Template
4
5
  from typing import Any
5
6
  from typing import Dict
@@ -12,11 +13,28 @@ from packaging.requirements import Requirement
12
13
  from packaging.utils import canonicalize_name
13
14
 
14
15
  from cartography.intel.github.util import fetch_all
16
+ from cartography.intel.github.util import PaginatedGraphqlData
17
+ from cartography.util import backoff_handler
18
+ from cartography.util import retries_with_backoff
15
19
  from cartography.util import run_cleanup_job
16
20
  from cartography.util import timeit
17
21
 
18
22
  logger = logging.getLogger(__name__)
19
23
 
24
+
25
+ # Representation of a user's permission level and affiliation to a GitHub repo. See:
26
+ # - Permission: https://docs.github.com/en/graphql/reference/enums#repositorypermission
27
+ # - Affiliation: https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation
28
+ UserAffiliationAndRepoPermission = namedtuple(
29
+ 'UserAffiliationAndRepoPermission',
30
+ [
31
+ 'user', # Dict
32
+ 'permission', # 'WRITE', 'MAINTAIN', 'ADMIN', etc
33
+ 'affiliation', # 'OUTSIDE', 'DIRECT'
34
+ ],
35
+ )
36
+
37
+
20
38
  GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
21
39
  query($login: String!, $cursor: String) {
22
40
  organization(login: $login)
@@ -59,17 +77,11 @@ GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
59
77
  login
60
78
  __typename
61
79
  }
62
- collaborators(affiliation: OUTSIDE, first: 50) {
63
- edges {
64
- permission
65
- }
66
- nodes {
67
- url
68
- login
69
- name
70
- email
71
- company
72
- }
80
+ directCollaborators: collaborators(first: 100, affiliation: DIRECT) {
81
+ totalCount
82
+ }
83
+ outsideCollaborators: collaborators(first: 100, affiliation: OUTSIDE) {
84
+ totalCount
73
85
  }
74
86
  requirements:object(expression: "HEAD:requirements.txt") {
75
87
  ... on Blob {
@@ -89,6 +101,145 @@ GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
89
101
  # Note: In the above query, `HEAD` references the default branch.
90
102
  # See https://stackoverflow.com/questions/48935381/github-graphql-api-default-branch-in-repository
91
103
 
104
+ GITHUB_REPO_COLLABS_PAGINATED_GRAPHQL = """
105
+ query($login: String!, $repo: String!, $affiliation: CollaboratorAffiliation!, $cursor: String) {
106
+ organization(login: $login) {
107
+ url
108
+ login
109
+ repository(name: $repo){
110
+ name
111
+ collaborators(first: 50, affiliation: $affiliation, after: $cursor) {
112
+ edges {
113
+ permission
114
+ }
115
+ nodes {
116
+ url
117
+ login
118
+ name
119
+ email
120
+ company
121
+ }
122
+ pageInfo{
123
+ endCursor
124
+ hasNextPage
125
+ }
126
+ }
127
+ }
128
+ }
129
+ rateLimit {
130
+ limit
131
+ cost
132
+ remaining
133
+ resetAt
134
+ }
135
+ }
136
+ """
137
+
138
+
139
+ def _get_repo_collaborators_inner_func(
140
+ org: str,
141
+ api_url: str,
142
+ token: str,
143
+ repo_raw_data: list[dict[str, Any]],
144
+ affiliation: str,
145
+ collab_users: list[dict[str, Any]],
146
+ collab_permission: list[str],
147
+ ) -> dict[str, list[UserAffiliationAndRepoPermission]]:
148
+ result: dict[str, list[UserAffiliationAndRepoPermission]] = {}
149
+
150
+ for repo in repo_raw_data:
151
+ repo_name = repo['name']
152
+ repo_url = repo['url']
153
+
154
+ if ((affiliation == 'OUTSIDE' and repo['outsideCollaborators']['totalCount'] == 0) or
155
+ (affiliation == 'DIRECT' and repo['directCollaborators']['totalCount'] == 0)):
156
+ # repo has no collabs of the affiliation type we're looking for, so don't waste time making an API call
157
+ result[repo_url] = []
158
+ continue
159
+
160
+ logger.info(f"Loading {affiliation} collaborators for repo {repo_name}.")
161
+ collaborators = _get_repo_collaborators(token, api_url, org, repo_name, affiliation)
162
+
163
+ # nodes and edges are expected to always be present given that we only call for them if totalCount is > 0
164
+ # however sometimes GitHub returns None, as in issue 1334 and 1404.
165
+ for collab in collaborators.nodes or []:
166
+ collab_users.append(collab)
167
+
168
+ # The `or []` is because `.edges` can be None.
169
+ for perm in collaborators.edges or []:
170
+ collab_permission.append(perm['permission'])
171
+
172
+ result[repo_url] = [
173
+ UserAffiliationAndRepoPermission(user, permission, affiliation)
174
+ for user, permission in zip(collab_users, collab_permission)
175
+ ]
176
+ return result
177
+
178
+
179
+ def _get_repo_collaborators_for_multiple_repos(
180
+ repo_raw_data: list[dict[str, Any]],
181
+ affiliation: str,
182
+ org: str,
183
+ api_url: str,
184
+ token: str,
185
+ ) -> dict[str, list[UserAffiliationAndRepoPermission]]:
186
+ """
187
+ For every repo in the given list, retrieve the collaborators.
188
+ :param repo_raw_data: A list of dicts representing repos. See tests.data.github.repos.GET_REPOS for data shape.
189
+ :param affiliation: The type of affiliation to retrieve collaborators for. Either 'DIRECT' or 'OUTSIDE'.
190
+ See https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation
191
+ :param org: The name of the target Github organization as string.
192
+ :param api_url: The Github v4 API endpoint as string.
193
+ :param token: The Github API token as string.
194
+ :return: A dictionary of repo URL to list of UserAffiliationAndRepoPermission
195
+ """
196
+ logger.info(f'Retrieving repo collaborators for affiliation "{affiliation}" on org "{org}".')
197
+ collab_users: List[dict[str, Any]] = []
198
+ collab_permission: List[str] = []
199
+
200
+ result: dict[str, list[UserAffiliationAndRepoPermission]] = retries_with_backoff(
201
+ _get_repo_collaborators_inner_func,
202
+ TypeError,
203
+ 5,
204
+ backoff_handler,
205
+ )(
206
+ org=org,
207
+ api_url=api_url,
208
+ token=token,
209
+ repo_raw_data=repo_raw_data,
210
+ affiliation=affiliation,
211
+ collab_users=collab_users,
212
+ collab_permission=collab_permission,
213
+ )
214
+ return result
215
+
216
+
217
+ def _get_repo_collaborators(
218
+ token: str, api_url: str, organization: str, repo: str, affiliation: str,
219
+ ) -> PaginatedGraphqlData:
220
+ """
221
+ Retrieve a list of collaborators for a given repository, as described in
222
+ https://docs.github.com/en/graphql/reference/objects#repositorycollaboratorconnection.
223
+ :param token: The Github API token as string.
224
+ :param api_url: The Github v4 API endpoint as string.
225
+ :param organization: The name of the target Github organization as string.
226
+ :pram repo: The name of the target Github repository as string.
227
+ :param affiliation: The type of affiliation to retrieve collaborators for. Either 'DIRECT' or 'OUTSIDE'.
228
+ See https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation
229
+ :return: A list of dicts representing repos. See tests.data.github.repos for data shape.
230
+ """
231
+ collaborators, _ = fetch_all(
232
+ token,
233
+ api_url,
234
+ organization,
235
+ GITHUB_REPO_COLLABS_PAGINATED_GRAPHQL,
236
+ 'repository',
237
+ resource_inner_type='collaborators',
238
+ repo=repo,
239
+ affiliation=affiliation,
240
+ )
241
+ return collaborators
242
+
92
243
 
93
244
  @timeit
94
245
  def get(token: str, api_url: str, organization: str) -> List[Dict]:
@@ -111,11 +262,19 @@ def get(token: str, api_url: str, organization: str) -> List[Dict]:
111
262
  return repos.nodes
112
263
 
113
264
 
114
- def transform(repos_json: List[Dict]) -> Dict:
265
+ def transform(
266
+ repos_json: List[Dict],
267
+ direct_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
268
+ outside_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
269
+ ) -> Dict:
115
270
  """
116
271
  Parses the JSON returned from GitHub API to create data for graph ingestion
117
- :param repos_json: the list of individual repository nodes from GitHub. See tests.data.github.repos.GET_REPOS for
118
- data shape.
272
+ :param repos_json: the list of individual repository nodes from GitHub.
273
+ See tests.data.github.repos.GET_REPOS for data shape.
274
+ :param direct_collaborators: dict of repo URL to list of direct collaborators.
275
+ See tests.data.github.repos.DIRECT_COLLABORATORS for data shape.
276
+ :param outside_collaborators: dict of repo URL to list of outside collaborators.
277
+ See tests.data.github.repos.OUTSIDE_COLLABORATORS for data shape.
119
278
  :return: Dict containing the repos, repo->language mapping, owners->repo mapping, outside collaborators->repo
120
279
  mapping, and Python requirements files (if any) in a repo.
121
280
  """
@@ -123,7 +282,10 @@ def transform(repos_json: List[Dict]) -> Dict:
123
282
  transformed_repo_languages: List[Dict] = []
124
283
  transformed_repo_owners: List[Dict] = []
125
284
  # See https://docs.github.com/en/graphql/reference/enums#repositorypermission
126
- transformed_collaborators: Dict[str, List[Any]] = {
285
+ transformed_outside_collaborators: Dict[str, List[Any]] = {
286
+ 'ADMIN': [], 'MAINTAIN': [], 'READ': [], 'TRIAGE': [], 'WRITE': [],
287
+ }
288
+ transformed_direct_collaborators: Dict[str, List[Any]] = {
127
289
  'ADMIN': [], 'MAINTAIN': [], 'READ': [], 'TRIAGE': [], 'WRITE': [],
128
290
  }
129
291
  transformed_requirements_files: List[Dict] = []
@@ -131,14 +293,30 @@ def transform(repos_json: List[Dict]) -> Dict:
131
293
  _transform_repo_languages(repo_object['url'], repo_object, transformed_repo_languages)
132
294
  _transform_repo_objects(repo_object, transformed_repo_list)
133
295
  _transform_repo_owners(repo_object['owner']['url'], repo_object, transformed_repo_owners)
134
- _transform_collaborators(repo_object['collaborators'], repo_object['url'], transformed_collaborators)
135
- _transform_requirements_txt(repo_object['requirements'], repo_object['url'], transformed_requirements_files)
136
- _transform_setup_cfg_requirements(repo_object['setupCfg'], repo_object['url'], transformed_requirements_files)
296
+
297
+ # Allow sync to continue if we didn't have permissions to list collaborators
298
+ repo_url = repo_object['url']
299
+ if repo_url in outside_collaborators:
300
+ _transform_collaborators(
301
+ repo_object['url'],
302
+ outside_collaborators[repo_object['url']],
303
+ transformed_outside_collaborators,
304
+ )
305
+ if repo_url in direct_collaborators:
306
+ _transform_collaborators(
307
+ repo_object['url'],
308
+ direct_collaborators[repo_object['url']],
309
+ transformed_direct_collaborators,
310
+ )
311
+
312
+ _transform_requirements_txt(repo_object['requirements'], repo_url, transformed_requirements_files)
313
+ _transform_setup_cfg_requirements(repo_object['setupCfg'], repo_url, transformed_requirements_files)
137
314
  results = {
138
315
  'repos': transformed_repo_list,
139
316
  'repo_languages': transformed_repo_languages,
140
317
  'repo_owners': transformed_repo_owners,
141
- 'repo_collaborators': transformed_collaborators,
318
+ 'repo_outside_collaborators': transformed_outside_collaborators,
319
+ 'repo_direct_collaborators': transformed_direct_collaborators,
142
320
  'python_requirements': transformed_requirements_files,
143
321
  }
144
322
  return results
@@ -229,11 +407,15 @@ def _transform_repo_languages(repo_url: str, repo: Dict, repo_languages: List[Di
229
407
  })
230
408
 
231
409
 
232
- def _transform_collaborators(collaborators: Dict, repo_url: str, transformed_collaborators: Dict) -> None:
410
+ def _transform_collaborators(
411
+ repo_url: str, collaborators: List[UserAffiliationAndRepoPermission], transformed_collaborators: Dict,
412
+ ) -> None:
233
413
  """
234
- Performs data adjustments for outside collaborators in a GitHub repo.
414
+ Performs data adjustments for collaborators in a GitHub repo.
235
415
  Output data shape = [{permission, repo_url, url (the user's URL), login, name}, ...]
236
- :param collaborators: See cartography.tests.data.github.repos for data shape.
416
+ :param collaborators: For data shape, see
417
+ cartography.tests.data.github.repos.DIRECT_COLLABORATORS
418
+ cartography.tests.data.github.repos.OUTSIDE_COLLABORATORS
237
419
  :param repo_url: The URL of the GitHub repo.
238
420
  :param transformed_collaborators: Output dict. Data shape =
239
421
  {'ADMIN': [{ user }, ...], 'MAINTAIN': [{ user }, ...], 'READ': [ ... ], 'TRIAGE': [ ... ], 'WRITE': [ ... ]}
@@ -241,10 +423,11 @@ def _transform_collaborators(collaborators: Dict, repo_url: str, transformed_col
241
423
  """
242
424
  # `collaborators` is sometimes None
243
425
  if collaborators:
244
- for idx, user in enumerate(collaborators['nodes']):
245
- user_permission = collaborators['edges'][idx]['permission']
426
+ for collaborator in collaborators:
427
+ user = collaborator.user
246
428
  user['repo_url'] = repo_url
247
- transformed_collaborators[user_permission].append(user)
429
+ user['affiliation'] = collaborator.affiliation
430
+ transformed_collaborators[collaborator.permission].append(user)
248
431
 
249
432
 
250
433
  def _transform_requirements_txt(
@@ -482,7 +665,7 @@ def load_github_owners(neo4j_session: neo4j.Session, update_tag: int, repo_owner
482
665
 
483
666
 
484
667
  @timeit
485
- def load_collaborators(neo4j_session: neo4j.Session, update_tag: int, collaborators: Dict) -> None:
668
+ def load_collaborators(neo4j_session: neo4j.Session, update_tag: int, collaborators: Dict, affiliation: str) -> None:
486
669
  query = Template("""
487
670
  UNWIND $UserData as user
488
671
 
@@ -502,7 +685,7 @@ def load_collaborators(neo4j_session: neo4j.Session, update_tag: int, collaborat
502
685
  SET o.lastupdated = $UpdateTag
503
686
  """)
504
687
  for collab_type in collaborators.keys():
505
- relationship_label = f"OUTSIDE_COLLAB_{collab_type}"
688
+ relationship_label = f"{affiliation}_COLLAB_{collab_type}"
506
689
  neo4j_session.run(
507
690
  query.safe_substitute(rel_label=relationship_label),
508
691
  UserData=collaborators[collab_type],
@@ -515,7 +698,12 @@ def load(neo4j_session: neo4j.Session, common_job_parameters: Dict, repo_data: D
515
698
  load_github_repos(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repos'])
516
699
  load_github_owners(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_owners'])
517
700
  load_github_languages(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_languages'])
518
- load_collaborators(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_collaborators'])
701
+ load_collaborators(
702
+ neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_direct_collaborators'], 'DIRECT',
703
+ )
704
+ load_collaborators(
705
+ neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_outside_collaborators'], 'OUTSIDE',
706
+ )
519
707
  load_python_requirements(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['python_requirements'])
520
708
 
521
709
 
@@ -561,6 +749,18 @@ def sync(
561
749
  """
562
750
  logger.info("Syncing GitHub repos")
563
751
  repos_json = get(github_api_key, github_url, organization)
564
- repo_data = transform(repos_json)
752
+ direct_collabs: dict[str, list[UserAffiliationAndRepoPermission]] = {}
753
+ outside_collabs: dict[str, list[UserAffiliationAndRepoPermission]] = {}
754
+ try:
755
+ direct_collabs = _get_repo_collaborators_for_multiple_repos(
756
+ repos_json, "DIRECT", organization, github_url, github_api_key,
757
+ )
758
+ outside_collabs = _get_repo_collaborators_for_multiple_repos(
759
+ repos_json, "OUTSIDE", organization, github_url, github_api_key,
760
+ )
761
+ except TypeError:
762
+ # due to permission errors or transient network error or some other nonsense
763
+ logger.warning('Unable to list repo collaborators due to permission errors; continuing on.', exc_info=True)
764
+ repo_data = transform(repos_json, direct_collabs, outside_collabs)
565
765
  load(neo4j_session, common_job_parameters, repo_data)
566
766
  run_cleanup_job('github_repos_cleanup.json', neo4j_session, common_job_parameters)
@@ -1,6 +1,5 @@
1
1
  import logging
2
2
  from collections import namedtuple
3
- from time import sleep
4
3
  from typing import Any
5
4
  from typing import Dict
6
5
  from typing import List
@@ -13,11 +12,27 @@ from cartography.graph.job import GraphJob
13
12
  from cartography.intel.github.util import fetch_all
14
13
  from cartography.intel.github.util import PaginatedGraphqlData
15
14
  from cartography.models.github.teams import GitHubTeamSchema
15
+ from cartography.util import retries_with_backoff
16
16
  from cartography.util import timeit
17
17
 
18
18
  logger = logging.getLogger(__name__)
19
19
 
20
+ # A team's permission on a repo: https://docs.github.com/en/graphql/reference/enums#repositorypermission
20
21
  RepoPermission = namedtuple('RepoPermission', ['repo_url', 'permission'])
22
+ # A team member's role: https://docs.github.com/en/graphql/reference/enums#teammemberrole
23
+ UserRole = namedtuple('UserRole', ['user_url', 'role'])
24
+
25
+
26
+ def backoff_handler(details: Dict) -> None:
27
+ """
28
+ Custom backoff handler for GitHub calls in this module.
29
+ """
30
+ team_name = details['kwargs'].get('team_name') or 'not present in kwargs'
31
+ updated_details = {**details, 'team_name': team_name}
32
+ logger.warning(
33
+ "Backing off {wait:0.1f} seconds after {tries} tries. Calling function {target} for team {team_name}"
34
+ .format(**updated_details),
35
+ )
21
36
 
22
37
 
23
38
  @timeit
@@ -32,7 +47,10 @@ def get_teams(org: str, api_url: str, token: str) -> Tuple[PaginatedGraphqlData,
32
47
  slug
33
48
  url
34
49
  description
35
- repositories(first: 100) {
50
+ repositories {
51
+ totalCount
52
+ }
53
+ members(membership: IMMEDIATE) {
36
54
  totalCount
37
55
  }
38
56
  }
@@ -47,6 +65,26 @@ def get_teams(org: str, api_url: str, token: str) -> Tuple[PaginatedGraphqlData,
47
65
  return fetch_all(token, api_url, org, org_teams_gql, 'teams')
48
66
 
49
67
 
68
+ def _get_teams_repos_inner_func(
69
+ org: str,
70
+ api_url: str,
71
+ token: str,
72
+ team_name: str,
73
+ repo_urls: list[str],
74
+ repo_permissions: list[str],
75
+ ) -> None:
76
+ logger.info(f"Loading team repos for {team_name}.")
77
+ team_repos = _get_team_repos(org, api_url, token, team_name)
78
+
79
+ # The `or []` is because `.nodes` can be None. See:
80
+ # https://docs.github.com/en/graphql/reference/objects#teamrepositoryconnection
81
+ for repo in team_repos.nodes or []:
82
+ repo_urls.append(repo['url'])
83
+ # The `or []` is because `.edges` can be None.
84
+ for edge in team_repos.edges or []:
85
+ repo_permissions.append(edge['permission'])
86
+
87
+
50
88
  @timeit
51
89
  def _get_team_repos_for_multiple_teams(
52
90
  team_raw_data: list[dict[str, Any]],
@@ -64,36 +102,22 @@ def _get_team_repos_for_multiple_teams(
64
102
  result[team_name] = []
65
103
  continue
66
104
 
67
- repo_urls = []
68
- repo_permissions = []
69
-
70
- max_tries = 5
71
-
72
- for current_try in range(1, max_tries + 1):
73
- team_repos = _get_team_repos(org, api_url, token, team_name)
74
-
75
- try:
76
- # The `or []` is because `.nodes` can be None. See:
77
- # https://docs.github.com/en/graphql/reference/objects#teamrepositoryconnection
78
- for repo in team_repos.nodes or []:
79
- repo_urls.append(repo['url'])
80
-
81
- # The `or []` is because `.edges` can be None.
82
- for edge in team_repos.edges or []:
83
- repo_permissions.append(edge['permission'])
84
- # We're done! Break out of the retry loop.
85
- break
86
-
87
- except TypeError:
88
- # Handles issue #1334
89
- logger.warning(
90
- f"GitHub returned None when trying to find repo or permission data for team {team_name}.",
91
- exc_info=True,
92
- )
93
- if current_try == max_tries:
94
- raise RuntimeError(f"GitHub returned a None repo url for team {team_name}, retries exhausted.")
95
- sleep(current_try ** 2)
105
+ repo_urls: List[str] = []
106
+ repo_permissions: List[str] = []
96
107
 
108
+ retries_with_backoff(
109
+ _get_teams_repos_inner_func,
110
+ TypeError,
111
+ 5,
112
+ backoff_handler,
113
+ )(
114
+ org=org,
115
+ api_url=api_url,
116
+ token=token,
117
+ team_name=team_name,
118
+ repo_urls=repo_urls,
119
+ repo_permissions=repo_permissions,
120
+ )
97
121
  # Shape = [(repo_url, 'WRITE'), ...]]
98
122
  result[team_name] = [RepoPermission(url, perm) for url, perm in zip(repo_urls, repo_permissions)]
99
123
  return result
@@ -142,10 +166,97 @@ def _get_team_repos(org: str, api_url: str, token: str, team: str) -> PaginatedG
142
166
  return team_repos
143
167
 
144
168
 
169
+ def _get_team_users_for_multiple_teams(
170
+ team_raw_data: list[dict[str, Any]],
171
+ org: str,
172
+ api_url: str,
173
+ token: str,
174
+ ) -> dict[str, list[UserRole]]:
175
+ result: dict[str, list[UserRole]] = {}
176
+ for team in team_raw_data:
177
+ team_name = team['slug']
178
+ user_count = team['members']['totalCount']
179
+
180
+ if user_count == 0:
181
+ # This team has no users so let's move on
182
+ result[team_name] = []
183
+ continue
184
+
185
+ user_urls: List[str] = []
186
+ user_roles: List[str] = []
187
+
188
+ def get_teams_users_inner_func(
189
+ org: str, api_url: str, token: str, team_name: str,
190
+ user_urls: List[str], user_roles: List[str],
191
+ ) -> None:
192
+ logger.info(f"Loading team users for {team_name}.")
193
+ team_users = _get_team_users(org, api_url, token, team_name)
194
+ # The `or []` is because `.nodes` can be None. See:
195
+ # https://docs.github.com/en/graphql/reference/objects#teammemberconnection
196
+ for user in team_users.nodes or []:
197
+ user_urls.append(user['url'])
198
+ # The `or []` is because `.edges` can be None.
199
+ for edge in team_users.edges or []:
200
+ user_roles.append(edge['role'])
201
+
202
+ retries_with_backoff(get_teams_users_inner_func, TypeError, 5, backoff_handler)(
203
+ org=org, api_url=api_url, token=token, team_name=team_name, user_urls=user_urls, user_roles=user_roles,
204
+ )
205
+
206
+ # Shape = [(user_url, 'MAINTAINER'), ...]]
207
+ result[team_name] = [UserRole(url, role) for url, role in zip(user_urls, user_roles)]
208
+ return result
209
+
210
+
211
+ @timeit
212
+ def _get_team_users(org: str, api_url: str, token: str, team: str) -> PaginatedGraphqlData:
213
+ team_users_gql = """
214
+ query($login: String!, $team: String!, $cursor: String) {
215
+ organization(login: $login) {
216
+ url
217
+ login
218
+ team(slug: $team) {
219
+ slug
220
+ members(first: 100, after: $cursor, membership: IMMEDIATE) {
221
+ totalCount
222
+ nodes {
223
+ url
224
+ }
225
+ edges {
226
+ role
227
+ }
228
+ pageInfo {
229
+ endCursor
230
+ hasNextPage
231
+ }
232
+ }
233
+ }
234
+ }
235
+ rateLimit {
236
+ limit
237
+ cost
238
+ remaining
239
+ resetAt
240
+ }
241
+ }
242
+ """
243
+ team_users, _ = fetch_all(
244
+ token,
245
+ api_url,
246
+ org,
247
+ team_users_gql,
248
+ 'team',
249
+ resource_inner_type='members',
250
+ team=team,
251
+ )
252
+ return team_users
253
+
254
+
145
255
  def transform_teams(
146
256
  team_paginated_data: PaginatedGraphqlData,
147
257
  org_data: Dict[str, Any],
148
258
  team_repo_data: dict[str, list[RepoPermission]],
259
+ team_user_data: dict[str, list[UserRole]],
149
260
  ) -> list[dict[str, Any]]:
150
261
  result = []
151
262
  for team in team_paginated_data.nodes:
@@ -155,19 +266,29 @@ def transform_teams(
155
266
  'url': team['url'],
156
267
  'description': team['description'],
157
268
  'repo_count': team['repositories']['totalCount'],
269
+ 'member_count': team['members']['totalCount'],
158
270
  'org_url': org_data['url'],
159
271
  'org_login': org_data['login'],
160
272
  }
161
273
  repo_permissions = team_repo_data[team_name]
162
- if not repo_permissions:
274
+ user_roles = team_user_data[team_name]
275
+
276
+ if not repo_permissions and not user_roles:
163
277
  result.append(repo_info)
164
278
  continue
165
279
 
166
- # `permission` can be one of ADMIN, READ, WRITE, TRIAGE, or MAINTAIN
167
- for repo_url, permission in repo_permissions:
168
- repo_info_copy = repo_info.copy()
169
- repo_info_copy[permission] = repo_url
170
- result.append(repo_info_copy)
280
+ if repo_permissions:
281
+ # `permission` can be one of ADMIN, READ, WRITE, TRIAGE, or MAINTAIN
282
+ for repo_url, permission in repo_permissions:
283
+ repo_info_copy = repo_info.copy()
284
+ repo_info_copy[permission] = repo_url
285
+ result.append(repo_info_copy)
286
+ if user_roles:
287
+ # `role` can be one of MAINTAINER, MEMBER
288
+ for user_url, role in user_roles:
289
+ repo_info_copy = repo_info.copy()
290
+ repo_info_copy[role] = user_url
291
+ result.append(repo_info_copy)
171
292
  return result
172
293
 
173
294
 
@@ -203,7 +324,8 @@ def sync_github_teams(
203
324
  ) -> None:
204
325
  teams_paginated, org_data = get_teams(organization, github_url, github_api_key)
205
326
  team_repos = _get_team_repos_for_multiple_teams(teams_paginated.nodes, organization, github_url, github_api_key)
206
- processed_data = transform_teams(teams_paginated, org_data, team_repos)
327
+ team_users = _get_team_users_for_multiple_teams(teams_paginated.nodes, organization, github_url, github_api_key)
328
+ processed_data = transform_teams(teams_paginated, org_data, team_repos, team_users)
207
329
  load_team_repos(neo4j_session, processed_data, common_job_parameters['UPDATE_TAG'], org_data['url'])
208
330
  common_job_parameters['org_url'] = org_data['url']
209
331
  cleanup(neo4j_session, common_job_parameters)
@@ -90,6 +90,7 @@ def get_users(token: str, api_url: str, organization: str) -> Tuple[List[Dict],
90
90
  2. data on the owning GitHub organization
91
91
  see tests.data.github.users.GITHUB_USER_DATA for shape of both
92
92
  """
93
+ logger.info(f"Retrieving users from GitHub organization {organization}")
93
94
  users, org = fetch_all(
94
95
  token,
95
96
  api_url,
@@ -112,6 +113,7 @@ def get_enterprise_owners(token: str, api_url: str, organization: str) -> Tuple[
112
113
  3. data on the owning GitHub organization
113
114
  see tests.data.github.users.GITHUB_ENTERPRISE_OWNER_DATA for shape
114
115
  """
116
+ logger.info(f"Retrieving enterprise owners from GitHub organization {organization}")
115
117
  owners, org = fetch_all(
116
118
  token,
117
119
  api_url,
@@ -163,7 +163,8 @@ def fetch_all(
163
163
 
164
164
  if retry >= retries:
165
165
  logger.error(
166
- f"GitHub: Could not retrieve page of resource `{resource_type}` due to HTTP error.",
166
+ f"GitHub: Could not retrieve page of resource `{resource_type}` due to HTTP error "
167
+ f"after {retry} retries. Raising exception.",
167
168
  exc_info=True,
168
169
  )
169
170
  raise exc