cartography 0.96.0rc3__py3-none-any.whl → 0.96.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

@@ -1,5 +1,6 @@
1
1
  import configparser
2
2
  import logging
3
+ from collections import namedtuple
3
4
  from string import Template
4
5
  from typing import Any
5
6
  from typing import Dict
@@ -12,11 +13,28 @@ from packaging.requirements import Requirement
12
13
  from packaging.utils import canonicalize_name
13
14
 
14
15
  from cartography.intel.github.util import fetch_all
16
+ from cartography.intel.github.util import PaginatedGraphqlData
17
+ from cartography.util import backoff_handler
18
+ from cartography.util import retries_with_backoff
15
19
  from cartography.util import run_cleanup_job
16
20
  from cartography.util import timeit
17
21
 
18
22
  logger = logging.getLogger(__name__)
19
23
 
24
+
25
+ # Representation of a user's permission level and affiliation to a GitHub repo. See:
26
+ # - Permission: https://docs.github.com/en/graphql/reference/enums#repositorypermission
27
+ # - Affiliation: https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation
28
+ UserAffiliationAndRepoPermission = namedtuple(
29
+ 'UserAffiliationAndRepoPermission',
30
+ [
31
+ 'user', # Dict
32
+ 'permission', # 'WRITE', 'MAINTAIN', 'ADMIN', etc
33
+ 'affiliation', # 'OUTSIDE', 'DIRECT'
34
+ ],
35
+ )
36
+
37
+
20
38
  GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
21
39
  query($login: String!, $cursor: String) {
22
40
  organization(login: $login)
@@ -59,17 +77,11 @@ GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
59
77
  login
60
78
  __typename
61
79
  }
62
- collaborators(affiliation: OUTSIDE, first: 50) {
63
- edges {
64
- permission
65
- }
66
- nodes {
67
- url
68
- login
69
- name
70
- email
71
- company
72
- }
80
+ directCollaborators: collaborators(first: 100, affiliation: DIRECT) {
81
+ totalCount
82
+ }
83
+ outsideCollaborators: collaborators(first: 100, affiliation: OUTSIDE) {
84
+ totalCount
73
85
  }
74
86
  requirements:object(expression: "HEAD:requirements.txt") {
75
87
  ... on Blob {
@@ -89,6 +101,142 @@ GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
89
101
  # Note: In the above query, `HEAD` references the default branch.
90
102
  # See https://stackoverflow.com/questions/48935381/github-graphql-api-default-branch-in-repository
91
103
 
104
+ GITHUB_REPO_COLLABS_PAGINATED_GRAPHQL = """
105
+ query($login: String!, $repo: String!, $affiliation: CollaboratorAffiliation!, $cursor: String) {
106
+ organization(login: $login) {
107
+ url
108
+ login
109
+ repository(name: $repo){
110
+ name
111
+ collaborators(first: 50, affiliation: $affiliation, after: $cursor) {
112
+ edges {
113
+ permission
114
+ }
115
+ nodes {
116
+ url
117
+ login
118
+ name
119
+ email
120
+ company
121
+ }
122
+ pageInfo{
123
+ endCursor
124
+ hasNextPage
125
+ }
126
+ }
127
+ }
128
+ }
129
+ rateLimit {
130
+ limit
131
+ cost
132
+ remaining
133
+ resetAt
134
+ }
135
+ }
136
+ """
137
+
138
+
139
+ def _get_repo_collaborators_inner_func(
140
+ org: str,
141
+ api_url: str,
142
+ token: str,
143
+ repo_name: str,
144
+ affiliation: str,
145
+ collab_users: list[dict[str, Any]],
146
+ collab_permission: list[str],
147
+ ) -> None:
148
+ logger.info(f"Loading {affiliation} collaborators for repo {repo_name}.")
149
+ collaborators = _get_repo_collaborators(token, api_url, org, repo_name, affiliation)
150
+
151
+ # nodes and edges are expected to always be present given that we only call for them if totalCount is > 0
152
+ # however sometimes GitHub returns None, as in issue 1334 and 1404.
153
+ for collab in collaborators.nodes or []:
154
+ collab_users.append(collab)
155
+
156
+ # The `or []` is because `.edges` can be None.
157
+ for perm in collaborators.edges or []:
158
+ collab_permission.append(perm['permission'])
159
+
160
+
161
+ def _get_repo_collaborators_for_multiple_repos(
162
+ repo_raw_data: list[dict[str, Any]],
163
+ affiliation: str,
164
+ org: str,
165
+ api_url: str,
166
+ token: str,
167
+ ) -> dict[str, list[UserAffiliationAndRepoPermission]]:
168
+ """
169
+ For every repo in the given list, retrieve the collaborators.
170
+ :param repo_raw_data: A list of dicts representing repos. See tests.data.github.repos.GET_REPOS for data shape.
171
+ :param affiliation: The type of affiliation to retrieve collaborators for. Either 'DIRECT' or 'OUTSIDE'.
172
+ See https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation
173
+ :param org: The name of the target Github organization as string.
174
+ :param api_url: The Github v4 API endpoint as string.
175
+ :param token: The Github API token as string.
176
+ :return: A dictionary of repo URL to list of UserAffiliationAndRepoPermission
177
+ """
178
+ result: dict[str, list[UserAffiliationAndRepoPermission]] = {}
179
+ for repo in repo_raw_data:
180
+ repo_name = repo['name']
181
+ repo_url = repo['url']
182
+
183
+ if ((affiliation == 'OUTSIDE' and repo['outsideCollaborators']['totalCount'] == 0) or
184
+ (affiliation == 'DIRECT' and repo['directCollaborators']['totalCount'] == 0)):
185
+ # repo has no collabs of the affiliation type we're looking for, so don't waste time making an API call
186
+ result[repo_url] = []
187
+ continue
188
+
189
+ collab_users: List[dict[str, Any]] = []
190
+ collab_permission: List[str] = []
191
+
192
+ retries_with_backoff(
193
+ _get_repo_collaborators_inner_func,
194
+ TypeError,
195
+ 5,
196
+ backoff_handler,
197
+ )(
198
+ org=org,
199
+ api_url=api_url,
200
+ token=token,
201
+ repo_name=repo_name,
202
+ affiliation=affiliation,
203
+ collab_users=collab_users,
204
+ collab_permission=collab_permission,
205
+ )
206
+
207
+ result[repo_url] = [
208
+ UserAffiliationAndRepoPermission(user, permission, affiliation)
209
+ for user, permission in zip(collab_users, collab_permission)
210
+ ]
211
+ return result
212
+
213
+
214
+ def _get_repo_collaborators(
215
+ token: str, api_url: str, organization: str, repo: str, affiliation: str,
216
+ ) -> PaginatedGraphqlData:
217
+ """
218
+ Retrieve a list of collaborators for a given repository, as described in
219
+ https://docs.github.com/en/graphql/reference/objects#repositorycollaboratorconnection.
220
+ :param token: The Github API token as string.
221
+ :param api_url: The Github v4 API endpoint as string.
222
+ :param organization: The name of the target Github organization as string.
223
+ :pram repo: The name of the target Github repository as string.
224
+ :param affiliation: The type of affiliation to retrieve collaborators for. Either 'DIRECT' or 'OUTSIDE'.
225
+ See https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation
226
+ :return: A list of dicts representing repos. See tests.data.github.repos for data shape.
227
+ """
228
+ collaborators, _ = fetch_all(
229
+ token,
230
+ api_url,
231
+ organization,
232
+ GITHUB_REPO_COLLABS_PAGINATED_GRAPHQL,
233
+ 'repository',
234
+ resource_inner_type='collaborators',
235
+ repo=repo,
236
+ affiliation=affiliation,
237
+ )
238
+ return collaborators
239
+
92
240
 
93
241
  @timeit
94
242
  def get(token: str, api_url: str, organization: str) -> List[Dict]:
@@ -111,11 +259,18 @@ def get(token: str, api_url: str, organization: str) -> List[Dict]:
111
259
  return repos.nodes
112
260
 
113
261
 
114
- def transform(repos_json: List[Dict]) -> Dict:
262
+ def transform(
263
+ repos_json: List[Dict], direct_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
264
+ outside_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
265
+ ) -> Dict:
115
266
  """
116
267
  Parses the JSON returned from GitHub API to create data for graph ingestion
117
- :param repos_json: the list of individual repository nodes from GitHub. See tests.data.github.repos.GET_REPOS for
118
- data shape.
268
+ :param repos_json: the list of individual repository nodes from GitHub.
269
+ See tests.data.github.repos.GET_REPOS for data shape.
270
+ :param direct_collaborators: dict of repo URL to list of direct collaborators.
271
+ See tests.data.github.repos.DIRECT_COLLABORATORS for data shape.
272
+ :param outside_collaborators: dict of repo URL to list of outside collaborators.
273
+ See tests.data.github.repos.OUTSIDE_COLLABORATORS for data shape.
119
274
  :return: Dict containing the repos, repo->language mapping, owners->repo mapping, outside collaborators->repo
120
275
  mapping, and Python requirements files (if any) in a repo.
121
276
  """
@@ -123,7 +278,10 @@ def transform(repos_json: List[Dict]) -> Dict:
123
278
  transformed_repo_languages: List[Dict] = []
124
279
  transformed_repo_owners: List[Dict] = []
125
280
  # See https://docs.github.com/en/graphql/reference/enums#repositorypermission
126
- transformed_collaborators: Dict[str, List[Any]] = {
281
+ transformed_outside_collaborators: Dict[str, List[Any]] = {
282
+ 'ADMIN': [], 'MAINTAIN': [], 'READ': [], 'TRIAGE': [], 'WRITE': [],
283
+ }
284
+ transformed_direct_collaborators: Dict[str, List[Any]] = {
127
285
  'ADMIN': [], 'MAINTAIN': [], 'READ': [], 'TRIAGE': [], 'WRITE': [],
128
286
  }
129
287
  transformed_requirements_files: List[Dict] = []
@@ -131,14 +289,22 @@ def transform(repos_json: List[Dict]) -> Dict:
131
289
  _transform_repo_languages(repo_object['url'], repo_object, transformed_repo_languages)
132
290
  _transform_repo_objects(repo_object, transformed_repo_list)
133
291
  _transform_repo_owners(repo_object['owner']['url'], repo_object, transformed_repo_owners)
134
- _transform_collaborators(repo_object['collaborators'], repo_object['url'], transformed_collaborators)
292
+ _transform_collaborators(
293
+ repo_object['url'], outside_collaborators[repo_object['url']],
294
+ transformed_outside_collaborators,
295
+ )
296
+ _transform_collaborators(
297
+ repo_object['url'], direct_collaborators[repo_object['url']],
298
+ transformed_direct_collaborators,
299
+ )
135
300
  _transform_requirements_txt(repo_object['requirements'], repo_object['url'], transformed_requirements_files)
136
301
  _transform_setup_cfg_requirements(repo_object['setupCfg'], repo_object['url'], transformed_requirements_files)
137
302
  results = {
138
303
  'repos': transformed_repo_list,
139
304
  'repo_languages': transformed_repo_languages,
140
305
  'repo_owners': transformed_repo_owners,
141
- 'repo_collaborators': transformed_collaborators,
306
+ 'repo_outside_collaborators': transformed_outside_collaborators,
307
+ 'repo_direct_collaborators': transformed_direct_collaborators,
142
308
  'python_requirements': transformed_requirements_files,
143
309
  }
144
310
  return results
@@ -229,11 +395,15 @@ def _transform_repo_languages(repo_url: str, repo: Dict, repo_languages: List[Di
229
395
  })
230
396
 
231
397
 
232
- def _transform_collaborators(collaborators: Dict, repo_url: str, transformed_collaborators: Dict) -> None:
398
+ def _transform_collaborators(
399
+ repo_url: str, collaborators: List[UserAffiliationAndRepoPermission], transformed_collaborators: Dict,
400
+ ) -> None:
233
401
  """
234
- Performs data adjustments for outside collaborators in a GitHub repo.
402
+ Performs data adjustments for collaborators in a GitHub repo.
235
403
  Output data shape = [{permission, repo_url, url (the user's URL), login, name}, ...]
236
- :param collaborators: See cartography.tests.data.github.repos for data shape.
404
+ :param collaborators: For data shape, see
405
+ cartography.tests.data.github.repos.DIRECT_COLLABORATORS
406
+ cartography.tests.data.github.repos.OUTSIDE_COLLABORATORS
237
407
  :param repo_url: The URL of the GitHub repo.
238
408
  :param transformed_collaborators: Output dict. Data shape =
239
409
  {'ADMIN': [{ user }, ...], 'MAINTAIN': [{ user }, ...], 'READ': [ ... ], 'TRIAGE': [ ... ], 'WRITE': [ ... ]}
@@ -241,10 +411,11 @@ def _transform_collaborators(collaborators: Dict, repo_url: str, transformed_col
241
411
  """
242
412
  # `collaborators` is sometimes None
243
413
  if collaborators:
244
- for idx, user in enumerate(collaborators['nodes']):
245
- user_permission = collaborators['edges'][idx]['permission']
414
+ for collaborator in collaborators:
415
+ user = collaborator.user
246
416
  user['repo_url'] = repo_url
247
- transformed_collaborators[user_permission].append(user)
417
+ user['affiliation'] = collaborator.affiliation
418
+ transformed_collaborators[collaborator.permission].append(user)
248
419
 
249
420
 
250
421
  def _transform_requirements_txt(
@@ -482,7 +653,7 @@ def load_github_owners(neo4j_session: neo4j.Session, update_tag: int, repo_owner
482
653
 
483
654
 
484
655
  @timeit
485
- def load_collaborators(neo4j_session: neo4j.Session, update_tag: int, collaborators: Dict) -> None:
656
+ def load_collaborators(neo4j_session: neo4j.Session, update_tag: int, collaborators: Dict, affiliation: str) -> None:
486
657
  query = Template("""
487
658
  UNWIND $UserData as user
488
659
 
@@ -502,7 +673,7 @@ def load_collaborators(neo4j_session: neo4j.Session, update_tag: int, collaborat
502
673
  SET o.lastupdated = $UpdateTag
503
674
  """)
504
675
  for collab_type in collaborators.keys():
505
- relationship_label = f"OUTSIDE_COLLAB_{collab_type}"
676
+ relationship_label = f"{affiliation}_COLLAB_{collab_type}"
506
677
  neo4j_session.run(
507
678
  query.safe_substitute(rel_label=relationship_label),
508
679
  UserData=collaborators[collab_type],
@@ -515,7 +686,12 @@ def load(neo4j_session: neo4j.Session, common_job_parameters: Dict, repo_data: D
515
686
  load_github_repos(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repos'])
516
687
  load_github_owners(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_owners'])
517
688
  load_github_languages(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_languages'])
518
- load_collaborators(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_collaborators'])
689
+ load_collaborators(
690
+ neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_direct_collaborators'], 'DIRECT',
691
+ )
692
+ load_collaborators(
693
+ neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['repo_outside_collaborators'], 'OUTSIDE',
694
+ )
519
695
  load_python_requirements(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['python_requirements'])
520
696
 
521
697
 
@@ -561,6 +737,12 @@ def sync(
561
737
  """
562
738
  logger.info("Syncing GitHub repos")
563
739
  repos_json = get(github_api_key, github_url, organization)
564
- repo_data = transform(repos_json)
740
+ direct_collabs = _get_repo_collaborators_for_multiple_repos(
741
+ repos_json, "DIRECT", organization, github_url, github_api_key,
742
+ )
743
+ outside_collabs = _get_repo_collaborators_for_multiple_repos(
744
+ repos_json, "OUTSIDE", organization, github_url, github_api_key,
745
+ )
746
+ repo_data = transform(repos_json, direct_collabs, outside_collabs)
565
747
  load(neo4j_session, common_job_parameters, repo_data)
566
748
  run_cleanup_job('github_repos_cleanup.json', neo4j_session, common_job_parameters)
@@ -1,6 +1,5 @@
1
1
  import logging
2
2
  from collections import namedtuple
3
- from time import sleep
4
3
  from typing import Any
5
4
  from typing import Dict
6
5
  from typing import List
@@ -13,11 +12,27 @@ from cartography.graph.job import GraphJob
13
12
  from cartography.intel.github.util import fetch_all
14
13
  from cartography.intel.github.util import PaginatedGraphqlData
15
14
  from cartography.models.github.teams import GitHubTeamSchema
15
+ from cartography.util import retries_with_backoff
16
16
  from cartography.util import timeit
17
17
 
18
18
  logger = logging.getLogger(__name__)
19
19
 
20
+ # A team's permission on a repo: https://docs.github.com/en/graphql/reference/enums#repositorypermission
20
21
  RepoPermission = namedtuple('RepoPermission', ['repo_url', 'permission'])
22
+ # A team member's role: https://docs.github.com/en/graphql/reference/enums#teammemberrole
23
+ UserRole = namedtuple('UserRole', ['user_url', 'role'])
24
+
25
+
26
+ def backoff_handler(details: Dict) -> None:
27
+ """
28
+ Custom backoff handler for GitHub calls in this module.
29
+ """
30
+ team_name = details['kwargs'].get('team_name') or 'not present in kwargs'
31
+ updated_details = {**details, 'team_name': team_name}
32
+ logger.warning(
33
+ "Backing off {wait:0.1f} seconds after {tries} tries. Calling function {target} for team {team_name}"
34
+ .format(**updated_details),
35
+ )
21
36
 
22
37
 
23
38
  @timeit
@@ -32,7 +47,10 @@ def get_teams(org: str, api_url: str, token: str) -> Tuple[PaginatedGraphqlData,
32
47
  slug
33
48
  url
34
49
  description
35
- repositories(first: 100) {
50
+ repositories {
51
+ totalCount
52
+ }
53
+ members(membership: IMMEDIATE) {
36
54
  totalCount
37
55
  }
38
56
  }
@@ -47,6 +65,26 @@ def get_teams(org: str, api_url: str, token: str) -> Tuple[PaginatedGraphqlData,
47
65
  return fetch_all(token, api_url, org, org_teams_gql, 'teams')
48
66
 
49
67
 
68
+ def _get_teams_repos_inner_func(
69
+ org: str,
70
+ api_url: str,
71
+ token: str,
72
+ team_name: str,
73
+ repo_urls: list[str],
74
+ repo_permissions: list[str],
75
+ ) -> None:
76
+ logger.info(f"Loading team repos for {team_name}.")
77
+ team_repos = _get_team_repos(org, api_url, token, team_name)
78
+
79
+ # The `or []` is because `.nodes` can be None. See:
80
+ # https://docs.github.com/en/graphql/reference/objects#teamrepositoryconnection
81
+ for repo in team_repos.nodes or []:
82
+ repo_urls.append(repo['url'])
83
+ # The `or []` is because `.edges` can be None.
84
+ for edge in team_repos.edges or []:
85
+ repo_permissions.append(edge['permission'])
86
+
87
+
50
88
  @timeit
51
89
  def _get_team_repos_for_multiple_teams(
52
90
  team_raw_data: list[dict[str, Any]],
@@ -64,36 +102,22 @@ def _get_team_repos_for_multiple_teams(
64
102
  result[team_name] = []
65
103
  continue
66
104
 
67
- repo_urls = []
68
- repo_permissions = []
69
-
70
- max_tries = 5
71
-
72
- for current_try in range(1, max_tries + 1):
73
- team_repos = _get_team_repos(org, api_url, token, team_name)
74
-
75
- try:
76
- # The `or []` is because `.nodes` can be None. See:
77
- # https://docs.github.com/en/graphql/reference/objects#teamrepositoryconnection
78
- for repo in team_repos.nodes or []:
79
- repo_urls.append(repo['url'])
80
-
81
- # The `or []` is because `.edges` can be None.
82
- for edge in team_repos.edges or []:
83
- repo_permissions.append(edge['permission'])
84
- # We're done! Break out of the retry loop.
85
- break
86
-
87
- except TypeError:
88
- # Handles issue #1334
89
- logger.warning(
90
- f"GitHub returned None when trying to find repo or permission data for team {team_name}.",
91
- exc_info=True,
92
- )
93
- if current_try == max_tries:
94
- raise RuntimeError(f"GitHub returned a None repo url for team {team_name}, retries exhausted.")
95
- sleep(current_try ** 2)
105
+ repo_urls: List[str] = []
106
+ repo_permissions: List[str] = []
96
107
 
108
+ retries_with_backoff(
109
+ _get_teams_repos_inner_func,
110
+ TypeError,
111
+ 5,
112
+ backoff_handler,
113
+ )(
114
+ org=org,
115
+ api_url=api_url,
116
+ token=token,
117
+ team_name=team_name,
118
+ repo_urls=repo_urls,
119
+ repo_permissions=repo_permissions,
120
+ )
97
121
  # Shape = [(repo_url, 'WRITE'), ...]]
98
122
  result[team_name] = [RepoPermission(url, perm) for url, perm in zip(repo_urls, repo_permissions)]
99
123
  return result
@@ -142,10 +166,97 @@ def _get_team_repos(org: str, api_url: str, token: str, team: str) -> PaginatedG
142
166
  return team_repos
143
167
 
144
168
 
169
+ def _get_team_users_for_multiple_teams(
170
+ team_raw_data: list[dict[str, Any]],
171
+ org: str,
172
+ api_url: str,
173
+ token: str,
174
+ ) -> dict[str, list[UserRole]]:
175
+ result: dict[str, list[UserRole]] = {}
176
+ for team in team_raw_data:
177
+ team_name = team['slug']
178
+ user_count = team['members']['totalCount']
179
+
180
+ if user_count == 0:
181
+ # This team has no users so let's move on
182
+ result[team_name] = []
183
+ continue
184
+
185
+ user_urls: List[str] = []
186
+ user_roles: List[str] = []
187
+
188
+ def get_teams_users_inner_func(
189
+ org: str, api_url: str, token: str, team_name: str,
190
+ user_urls: List[str], user_roles: List[str],
191
+ ) -> None:
192
+ logger.info(f"Loading team users for {team_name}.")
193
+ team_users = _get_team_users(org, api_url, token, team_name)
194
+ # The `or []` is because `.nodes` can be None. See:
195
+ # https://docs.github.com/en/graphql/reference/objects#teammemberconnection
196
+ for user in team_users.nodes or []:
197
+ user_urls.append(user['url'])
198
+ # The `or []` is because `.edges` can be None.
199
+ for edge in team_users.edges or []:
200
+ user_roles.append(edge['role'])
201
+
202
+ retries_with_backoff(get_teams_users_inner_func, TypeError, 5, backoff_handler)(
203
+ org=org, api_url=api_url, token=token, team_name=team_name, user_urls=user_urls, user_roles=user_roles,
204
+ )
205
+
206
+ # Shape = [(user_url, 'MAINTAINER'), ...]]
207
+ result[team_name] = [UserRole(url, role) for url, role in zip(user_urls, user_roles)]
208
+ return result
209
+
210
+
211
+ @timeit
212
+ def _get_team_users(org: str, api_url: str, token: str, team: str) -> PaginatedGraphqlData:
213
+ team_users_gql = """
214
+ query($login: String!, $team: String!, $cursor: String) {
215
+ organization(login: $login) {
216
+ url
217
+ login
218
+ team(slug: $team) {
219
+ slug
220
+ members(first: 100, after: $cursor, membership: IMMEDIATE) {
221
+ totalCount
222
+ nodes {
223
+ url
224
+ }
225
+ edges {
226
+ role
227
+ }
228
+ pageInfo {
229
+ endCursor
230
+ hasNextPage
231
+ }
232
+ }
233
+ }
234
+ }
235
+ rateLimit {
236
+ limit
237
+ cost
238
+ remaining
239
+ resetAt
240
+ }
241
+ }
242
+ """
243
+ team_users, _ = fetch_all(
244
+ token,
245
+ api_url,
246
+ org,
247
+ team_users_gql,
248
+ 'team',
249
+ resource_inner_type='members',
250
+ team=team,
251
+ )
252
+ return team_users
253
+
254
+
145
255
  def transform_teams(
146
256
  team_paginated_data: PaginatedGraphqlData,
147
257
  org_data: Dict[str, Any],
148
258
  team_repo_data: dict[str, list[RepoPermission]],
259
+ team_user_data: dict[str, list[UserRole]],
149
260
  ) -> list[dict[str, Any]]:
150
261
  result = []
151
262
  for team in team_paginated_data.nodes:
@@ -155,19 +266,29 @@ def transform_teams(
155
266
  'url': team['url'],
156
267
  'description': team['description'],
157
268
  'repo_count': team['repositories']['totalCount'],
269
+ 'member_count': team['members']['totalCount'],
158
270
  'org_url': org_data['url'],
159
271
  'org_login': org_data['login'],
160
272
  }
161
273
  repo_permissions = team_repo_data[team_name]
162
- if not repo_permissions:
274
+ user_roles = team_user_data[team_name]
275
+
276
+ if not repo_permissions and not user_roles:
163
277
  result.append(repo_info)
164
278
  continue
165
279
 
166
- # `permission` can be one of ADMIN, READ, WRITE, TRIAGE, or MAINTAIN
167
- for repo_url, permission in repo_permissions:
168
- repo_info_copy = repo_info.copy()
169
- repo_info_copy[permission] = repo_url
170
- result.append(repo_info_copy)
280
+ if repo_permissions:
281
+ # `permission` can be one of ADMIN, READ, WRITE, TRIAGE, or MAINTAIN
282
+ for repo_url, permission in repo_permissions:
283
+ repo_info_copy = repo_info.copy()
284
+ repo_info_copy[permission] = repo_url
285
+ result.append(repo_info_copy)
286
+ if user_roles:
287
+ # `role` can be one of MAINTAINER, MEMBER
288
+ for user_url, role in user_roles:
289
+ repo_info_copy = repo_info.copy()
290
+ repo_info_copy[role] = user_url
291
+ result.append(repo_info_copy)
171
292
  return result
172
293
 
173
294
 
@@ -203,7 +324,8 @@ def sync_github_teams(
203
324
  ) -> None:
204
325
  teams_paginated, org_data = get_teams(organization, github_url, github_api_key)
205
326
  team_repos = _get_team_repos_for_multiple_teams(teams_paginated.nodes, organization, github_url, github_api_key)
206
- processed_data = transform_teams(teams_paginated, org_data, team_repos)
327
+ team_users = _get_team_users_for_multiple_teams(teams_paginated.nodes, organization, github_url, github_api_key)
328
+ processed_data = transform_teams(teams_paginated, org_data, team_repos, team_users)
207
329
  load_team_repos(neo4j_session, processed_data, common_job_parameters['UPDATE_TAG'], org_data['url'])
208
330
  common_job_parameters['org_url'] = org_data['url']
209
331
  cleanup(neo4j_session, common_job_parameters)