cartography 0.102.0rc1__py3-none-any.whl → 0.103.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/__main__.py +1 -2
- cartography/_version.py +2 -2
- cartography/cli.py +302 -253
- cartography/client/core/tx.py +39 -18
- cartography/config.py +4 -0
- cartography/driftdetect/__main__.py +1 -2
- cartography/driftdetect/add_shortcut.py +10 -2
- cartography/driftdetect/cli.py +71 -75
- cartography/driftdetect/detect_deviations.py +7 -3
- cartography/driftdetect/get_states.py +20 -8
- cartography/driftdetect/model.py +5 -5
- cartography/driftdetect/serializers.py +8 -6
- cartography/driftdetect/storage.py +2 -2
- cartography/graph/cleanupbuilder.py +35 -15
- cartography/graph/job.py +46 -17
- cartography/graph/querybuilder.py +165 -80
- cartography/graph/statement.py +35 -26
- cartography/intel/analysis.py +4 -1
- cartography/intel/aws/__init__.py +114 -55
- cartography/intel/aws/apigateway.py +134 -63
- cartography/intel/aws/cloudtrail.py +127 -0
- cartography/intel/aws/config.py +56 -20
- cartography/intel/aws/dynamodb.py +108 -40
- cartography/intel/aws/ec2/__init__.py +2 -2
- cartography/intel/aws/ec2/auto_scaling_groups.py +181 -78
- cartography/intel/aws/ec2/elastic_ip_addresses.py +41 -13
- cartography/intel/aws/ec2/images.py +49 -20
- cartography/intel/aws/ec2/instances.py +234 -136
- cartography/intel/aws/ec2/internet_gateways.py +40 -11
- cartography/intel/aws/ec2/key_pairs.py +44 -20
- cartography/intel/aws/ec2/launch_templates.py +101 -59
- cartography/intel/aws/ec2/load_balancer_v2s.py +104 -39
- cartography/intel/aws/ec2/load_balancers.py +82 -42
- cartography/intel/aws/ec2/network_acls.py +89 -65
- cartography/intel/aws/ec2/network_interfaces.py +146 -87
- cartography/intel/aws/ec2/reserved_instances.py +45 -16
- cartography/intel/aws/ec2/route_tables.py +327 -0
- cartography/intel/aws/ec2/security_groups.py +71 -21
- cartography/intel/aws/ec2/snapshots.py +61 -22
- cartography/intel/aws/ec2/subnets.py +54 -18
- cartography/intel/aws/ec2/tgw.py +100 -34
- cartography/intel/aws/ec2/util.py +1 -1
- cartography/intel/aws/ec2/volumes.py +69 -41
- cartography/intel/aws/ec2/vpc.py +37 -12
- cartography/intel/aws/ec2/vpc_peerings.py +83 -24
- cartography/intel/aws/ecr.py +88 -32
- cartography/intel/aws/ecs.py +83 -47
- cartography/intel/aws/eks.py +55 -29
- cartography/intel/aws/elasticache.py +42 -18
- cartography/intel/aws/elasticsearch.py +57 -20
- cartography/intel/aws/emr.py +61 -23
- cartography/intel/aws/iam.py +401 -145
- cartography/intel/aws/iam_instance_profiles.py +22 -22
- cartography/intel/aws/identitycenter.py +71 -37
- cartography/intel/aws/inspector.py +159 -89
- cartography/intel/aws/kms.py +92 -38
- cartography/intel/aws/lambda_function.py +103 -34
- cartography/intel/aws/organizations.py +30 -10
- cartography/intel/aws/permission_relationships.py +133 -51
- cartography/intel/aws/rds.py +249 -85
- cartography/intel/aws/redshift.py +107 -46
- cartography/intel/aws/resourcegroupstaggingapi.py +120 -66
- cartography/intel/aws/resources.py +53 -44
- cartography/intel/aws/route53.py +108 -61
- cartography/intel/aws/s3.py +168 -83
- cartography/intel/aws/s3accountpublicaccessblock.py +157 -0
- cartography/intel/aws/secretsmanager.py +24 -12
- cartography/intel/aws/securityhub.py +20 -9
- cartography/intel/aws/sns.py +166 -0
- cartography/intel/aws/sqs.py +60 -28
- cartography/intel/aws/ssm.py +70 -30
- cartography/intel/aws/util/arns.py +7 -7
- cartography/intel/aws/util/common.py +31 -4
- cartography/intel/azure/__init__.py +78 -19
- cartography/intel/azure/compute.py +101 -27
- cartography/intel/azure/cosmosdb.py +496 -170
- cartography/intel/azure/sql.py +296 -105
- cartography/intel/azure/storage.py +322 -113
- cartography/intel/azure/subscription.py +39 -23
- cartography/intel/azure/tenant.py +13 -4
- cartography/intel/azure/util/credentials.py +95 -55
- cartography/intel/bigfix/__init__.py +2 -2
- cartography/intel/bigfix/computers.py +93 -65
- cartography/intel/create_indexes.py +3 -2
- cartography/intel/crowdstrike/__init__.py +11 -9
- cartography/intel/crowdstrike/endpoints.py +5 -1
- cartography/intel/crowdstrike/spotlight.py +8 -3
- cartography/intel/cve/__init__.py +46 -13
- cartography/intel/cve/feed.py +48 -12
- cartography/intel/digitalocean/__init__.py +22 -13
- cartography/intel/digitalocean/compute.py +75 -108
- cartography/intel/digitalocean/management.py +44 -80
- cartography/intel/digitalocean/platform.py +48 -43
- cartography/intel/dns.py +36 -10
- cartography/intel/duo/__init__.py +21 -16
- cartography/intel/duo/api_host.py +14 -9
- cartography/intel/duo/endpoints.py +50 -45
- cartography/intel/duo/groups.py +18 -14
- cartography/intel/duo/phones.py +37 -34
- cartography/intel/duo/tokens.py +26 -23
- cartography/intel/duo/users.py +54 -50
- cartography/intel/duo/web_authn_credentials.py +30 -25
- cartography/intel/entra/__init__.py +25 -7
- cartography/intel/entra/ou.py +112 -0
- cartography/intel/entra/users.py +69 -63
- cartography/intel/gcp/__init__.py +185 -49
- cartography/intel/gcp/compute.py +418 -231
- cartography/intel/gcp/crm.py +96 -43
- cartography/intel/gcp/dns.py +60 -19
- cartography/intel/gcp/gke.py +72 -38
- cartography/intel/gcp/iam.py +61 -41
- cartography/intel/gcp/storage.py +84 -55
- cartography/intel/github/__init__.py +13 -11
- cartography/intel/github/repos.py +270 -137
- cartography/intel/github/teams.py +170 -88
- cartography/intel/github/users.py +70 -39
- cartography/intel/github/util.py +36 -34
- cartography/intel/gsuite/__init__.py +47 -26
- cartography/intel/gsuite/api.py +73 -30
- cartography/intel/jamf/__init__.py +19 -1
- cartography/intel/jamf/computers.py +30 -7
- cartography/intel/jamf/util.py +7 -2
- cartography/intel/kandji/__init__.py +6 -3
- cartography/intel/kandji/devices.py +14 -8
- cartography/intel/kubernetes/namespaces.py +7 -4
- cartography/intel/kubernetes/pods.py +7 -4
- cartography/intel/kubernetes/services.py +8 -4
- cartography/intel/lastpass/__init__.py +2 -2
- cartography/intel/lastpass/users.py +23 -12
- cartography/intel/oci/__init__.py +44 -11
- cartography/intel/oci/iam.py +134 -38
- cartography/intel/oci/organizations.py +13 -6
- cartography/intel/oci/utils.py +43 -20
- cartography/intel/okta/__init__.py +66 -15
- cartography/intel/okta/applications.py +42 -20
- cartography/intel/okta/awssaml.py +93 -33
- cartography/intel/okta/factors.py +16 -4
- cartography/intel/okta/groups.py +56 -29
- cartography/intel/okta/organization.py +5 -1
- cartography/intel/okta/origins.py +6 -2
- cartography/intel/okta/roles.py +15 -5
- cartography/intel/okta/users.py +20 -8
- cartography/intel/okta/utils.py +6 -4
- cartography/intel/pagerduty/__init__.py +8 -7
- cartography/intel/pagerduty/escalation_policies.py +18 -6
- cartography/intel/pagerduty/schedules.py +12 -4
- cartography/intel/pagerduty/services.py +11 -4
- cartography/intel/pagerduty/teams.py +8 -3
- cartography/intel/pagerduty/users.py +3 -1
- cartography/intel/pagerduty/vendors.py +3 -1
- cartography/intel/semgrep/__init__.py +24 -6
- cartography/intel/semgrep/dependencies.py +50 -28
- cartography/intel/semgrep/deployment.py +3 -1
- cartography/intel/semgrep/findings.py +42 -18
- cartography/intel/snipeit/__init__.py +17 -3
- cartography/intel/snipeit/asset.py +12 -6
- cartography/intel/snipeit/user.py +8 -5
- cartography/intel/snipeit/util.py +9 -4
- cartography/models/aws/apigateway.py +21 -17
- cartography/models/aws/apigatewaycertificate.py +28 -22
- cartography/models/aws/apigatewayresource.py +28 -20
- cartography/models/aws/apigatewaystage.py +33 -25
- cartography/models/aws/cloudtrail/__init__.py +0 -0
- cartography/models/aws/cloudtrail/trail.py +61 -0
- cartography/models/aws/dynamodb/gsi.py +30 -22
- cartography/models/aws/dynamodb/tables.py +25 -17
- cartography/models/aws/ec2/auto_scaling_groups.py +102 -82
- cartography/models/aws/ec2/images.py +36 -34
- cartography/models/aws/ec2/instances.py +51 -45
- cartography/models/aws/ec2/keypair.py +21 -16
- cartography/models/aws/ec2/keypair_instance.py +28 -21
- cartography/models/aws/ec2/launch_configurations.py +30 -26
- cartography/models/aws/ec2/launch_template_versions.py +48 -38
- cartography/models/aws/ec2/launch_templates.py +21 -17
- cartography/models/aws/ec2/load_balancer_listeners.py +27 -23
- cartography/models/aws/ec2/load_balancers.py +47 -37
- cartography/models/aws/ec2/network_acl_rules.py +38 -30
- cartography/models/aws/ec2/network_acls.py +38 -29
- cartography/models/aws/ec2/networkinterface_instance.py +52 -39
- cartography/models/aws/ec2/networkinterfaces.py +53 -37
- cartography/models/aws/ec2/privateip_networkinterface.py +32 -22
- cartography/models/aws/ec2/reservations.py +18 -14
- cartography/models/aws/ec2/route_table_associations.py +97 -0
- cartography/models/aws/ec2/route_tables.py +128 -0
- cartography/models/aws/ec2/routes.py +85 -0
- cartography/models/aws/ec2/securitygroup_instance.py +29 -20
- cartography/models/aws/ec2/securitygroup_networkinterface.py +24 -15
- cartography/models/aws/ec2/subnet_instance.py +24 -19
- cartography/models/aws/ec2/subnet_networkinterface.py +40 -31
- cartography/models/aws/ec2/volumes.py +47 -40
- cartography/models/aws/eks/clusters.py +23 -21
- cartography/models/aws/emr.py +32 -30
- cartography/models/aws/iam/instanceprofile.py +33 -24
- cartography/models/aws/identitycenter/awsidentitycenter.py +18 -14
- cartography/models/aws/identitycenter/awspermissionset.py +37 -29
- cartography/models/aws/identitycenter/awsssouser.py +23 -21
- cartography/models/aws/inspector/findings.py +77 -65
- cartography/models/aws/inspector/packages.py +35 -29
- cartography/models/aws/s3/__init__.py +0 -0
- cartography/models/aws/s3/account_public_access_block.py +51 -0
- cartography/models/aws/sns/__init__.py +0 -0
- cartography/models/aws/sns/topic.py +50 -0
- cartography/models/aws/ssm/instance_information.py +51 -39
- cartography/models/aws/ssm/instance_patch.py +32 -26
- cartography/models/bigfix/bigfix_computer.py +42 -38
- cartography/models/bigfix/bigfix_root.py +3 -3
- cartography/models/core/common.py +12 -10
- cartography/models/core/nodes.py +5 -2
- cartography/models/core/relationships.py +14 -6
- cartography/models/crowdstrike/hosts.py +37 -35
- cartography/models/cve/cve.py +34 -32
- cartography/models/cve/cve_feed.py +6 -6
- cartography/models/digitalocean/__init__.py +0 -0
- cartography/models/digitalocean/account.py +21 -0
- cartography/models/digitalocean/droplet.py +56 -0
- cartography/models/digitalocean/project.py +48 -0
- cartography/models/duo/api_host.py +3 -3
- cartography/models/duo/endpoint.py +43 -41
- cartography/models/duo/group.py +14 -14
- cartography/models/duo/phone.py +27 -27
- cartography/models/duo/token.py +16 -16
- cartography/models/duo/user.py +46 -44
- cartography/models/duo/web_authn_credential.py +27 -19
- cartography/models/entra/ou.py +48 -0
- cartography/models/entra/tenant.py +24 -18
- cartography/models/entra/user.py +64 -48
- cartography/models/gcp/iam.py +23 -23
- cartography/models/github/orgs.py +5 -4
- cartography/models/github/teams.py +37 -31
- cartography/models/github/users.py +34 -23
- cartography/models/kandji/device.py +22 -16
- cartography/models/kandji/tenant.py +6 -4
- cartography/models/lastpass/tenant.py +3 -3
- cartography/models/lastpass/user.py +32 -28
- cartography/models/semgrep/dependencies.py +36 -24
- cartography/models/semgrep/deployment.py +5 -5
- cartography/models/semgrep/findings.py +58 -42
- cartography/models/semgrep/locations.py +27 -21
- cartography/models/snipeit/asset.py +30 -21
- cartography/models/snipeit/tenant.py +6 -4
- cartography/models/snipeit/user.py +19 -12
- cartography/stats.py +3 -3
- cartography/sync.py +107 -31
- cartography/util.py +84 -62
- {cartography-0.102.0rc1.dist-info → cartography-0.103.0rc1.dist-info}/METADATA +3 -14
- cartography-0.103.0rc1.dist-info/RECORD +396 -0
- {cartography-0.102.0rc1.dist-info → cartography-0.103.0rc1.dist-info}/WHEEL +1 -1
- cartography-0.102.0rc1.dist-info/RECORD +0 -377
- {cartography-0.102.0rc1.dist-info → cartography-0.103.0rc1.dist-info}/entry_points.txt +0 -0
- {cartography-0.102.0rc1.dist-info → cartography-0.103.0rc1.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.102.0rc1.dist-info → cartography-0.103.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -26,11 +26,11 @@ logger = logging.getLogger(__name__)
|
|
|
26
26
|
# - Permission: https://docs.github.com/en/graphql/reference/enums#repositorypermission
|
|
27
27
|
# - Affiliation: https://docs.github.com/en/graphql/reference/enums#collaboratoraffiliation
|
|
28
28
|
UserAffiliationAndRepoPermission = namedtuple(
|
|
29
|
-
|
|
29
|
+
"UserAffiliationAndRepoPermission",
|
|
30
30
|
[
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
31
|
+
"user", # Dict
|
|
32
|
+
"permission", # 'WRITE', 'MAINTAIN', 'ADMIN', etc
|
|
33
|
+
"affiliation", # 'OUTSIDE', 'DIRECT'
|
|
34
34
|
],
|
|
35
35
|
)
|
|
36
36
|
|
|
@@ -137,28 +137,37 @@ GITHUB_REPO_COLLABS_PAGINATED_GRAPHQL = """
|
|
|
137
137
|
|
|
138
138
|
|
|
139
139
|
def _get_repo_collaborators_inner_func(
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
140
|
+
org: str,
|
|
141
|
+
api_url: str,
|
|
142
|
+
token: str,
|
|
143
|
+
repo_raw_data: list[dict[str, Any]],
|
|
144
|
+
affiliation: str,
|
|
145
|
+
collab_users: list[dict[str, Any]],
|
|
146
|
+
collab_permission: list[str],
|
|
147
147
|
) -> dict[str, list[UserAffiliationAndRepoPermission]]:
|
|
148
148
|
result: dict[str, list[UserAffiliationAndRepoPermission]] = {}
|
|
149
149
|
|
|
150
150
|
for repo in repo_raw_data:
|
|
151
|
-
repo_name = repo[
|
|
152
|
-
repo_url = repo[
|
|
153
|
-
|
|
154
|
-
if (
|
|
155
|
-
|
|
151
|
+
repo_name = repo["name"]
|
|
152
|
+
repo_url = repo["url"]
|
|
153
|
+
|
|
154
|
+
if (
|
|
155
|
+
affiliation == "OUTSIDE" and repo["outsideCollaborators"]["totalCount"] == 0
|
|
156
|
+
) or (
|
|
157
|
+
affiliation == "DIRECT" and repo["directCollaborators"]["totalCount"] == 0
|
|
158
|
+
):
|
|
156
159
|
# repo has no collabs of the affiliation type we're looking for, so don't waste time making an API call
|
|
157
160
|
result[repo_url] = []
|
|
158
161
|
continue
|
|
159
162
|
|
|
160
163
|
logger.info(f"Loading {affiliation} collaborators for repo {repo_name}.")
|
|
161
|
-
collaborators = _get_repo_collaborators(
|
|
164
|
+
collaborators = _get_repo_collaborators(
|
|
165
|
+
token,
|
|
166
|
+
api_url,
|
|
167
|
+
org,
|
|
168
|
+
repo_name,
|
|
169
|
+
affiliation,
|
|
170
|
+
)
|
|
162
171
|
|
|
163
172
|
# nodes and edges are expected to always be present given that we only call for them if totalCount is > 0
|
|
164
173
|
# however sometimes GitHub returns None, as in issue 1334 and 1404.
|
|
@@ -167,7 +176,7 @@ def _get_repo_collaborators_inner_func(
|
|
|
167
176
|
|
|
168
177
|
# The `or []` is because `.edges` can be None.
|
|
169
178
|
for perm in collaborators.edges or []:
|
|
170
|
-
collab_permission.append(perm[
|
|
179
|
+
collab_permission.append(perm["permission"])
|
|
171
180
|
|
|
172
181
|
result[repo_url] = [
|
|
173
182
|
UserAffiliationAndRepoPermission(user, permission, affiliation)
|
|
@@ -177,11 +186,11 @@ def _get_repo_collaborators_inner_func(
|
|
|
177
186
|
|
|
178
187
|
|
|
179
188
|
def _get_repo_collaborators_for_multiple_repos(
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
189
|
+
repo_raw_data: list[dict[str, Any]],
|
|
190
|
+
affiliation: str,
|
|
191
|
+
org: str,
|
|
192
|
+
api_url: str,
|
|
193
|
+
token: str,
|
|
185
194
|
) -> dict[str, list[UserAffiliationAndRepoPermission]]:
|
|
186
195
|
"""
|
|
187
196
|
For every repo in the given list, retrieve the collaborators.
|
|
@@ -193,7 +202,9 @@ def _get_repo_collaborators_for_multiple_repos(
|
|
|
193
202
|
:param token: The Github API token as string.
|
|
194
203
|
:return: A dictionary of repo URL to list of UserAffiliationAndRepoPermission
|
|
195
204
|
"""
|
|
196
|
-
logger.info(
|
|
205
|
+
logger.info(
|
|
206
|
+
f'Retrieving repo collaborators for affiliation "{affiliation}" on org "{org}".',
|
|
207
|
+
)
|
|
197
208
|
collab_users: List[dict[str, Any]] = []
|
|
198
209
|
collab_permission: List[str] = []
|
|
199
210
|
|
|
@@ -215,7 +226,11 @@ def _get_repo_collaborators_for_multiple_repos(
|
|
|
215
226
|
|
|
216
227
|
|
|
217
228
|
def _get_repo_collaborators(
|
|
218
|
-
|
|
229
|
+
token: str,
|
|
230
|
+
api_url: str,
|
|
231
|
+
organization: str,
|
|
232
|
+
repo: str,
|
|
233
|
+
affiliation: str,
|
|
219
234
|
) -> PaginatedGraphqlData:
|
|
220
235
|
"""
|
|
221
236
|
Retrieve a list of collaborators for a given repository, as described in
|
|
@@ -233,8 +248,8 @@ def _get_repo_collaborators(
|
|
|
233
248
|
api_url,
|
|
234
249
|
organization,
|
|
235
250
|
GITHUB_REPO_COLLABS_PAGINATED_GRAPHQL,
|
|
236
|
-
|
|
237
|
-
resource_inner_type=
|
|
251
|
+
"repository",
|
|
252
|
+
resource_inner_type="collaborators",
|
|
238
253
|
repo=repo,
|
|
239
254
|
affiliation=affiliation,
|
|
240
255
|
)
|
|
@@ -257,15 +272,15 @@ def get(token: str, api_url: str, organization: str) -> List[Dict]:
|
|
|
257
272
|
api_url,
|
|
258
273
|
organization,
|
|
259
274
|
GITHUB_ORG_REPOS_PAGINATED_GRAPHQL,
|
|
260
|
-
|
|
275
|
+
"repositories",
|
|
261
276
|
)
|
|
262
277
|
return repos.nodes
|
|
263
278
|
|
|
264
279
|
|
|
265
280
|
def transform(
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
281
|
+
repos_json: List[Dict],
|
|
282
|
+
direct_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
|
|
283
|
+
outside_collaborators: dict[str, List[UserAffiliationAndRepoPermission]],
|
|
269
284
|
) -> Dict:
|
|
270
285
|
"""
|
|
271
286
|
Parses the JSON returned from GitHub API to create data for graph ingestion
|
|
@@ -283,41 +298,65 @@ def transform(
|
|
|
283
298
|
transformed_repo_owners: List[Dict] = []
|
|
284
299
|
# See https://docs.github.com/en/graphql/reference/enums#repositorypermission
|
|
285
300
|
transformed_outside_collaborators: Dict[str, List[Any]] = {
|
|
286
|
-
|
|
301
|
+
"ADMIN": [],
|
|
302
|
+
"MAINTAIN": [],
|
|
303
|
+
"READ": [],
|
|
304
|
+
"TRIAGE": [],
|
|
305
|
+
"WRITE": [],
|
|
287
306
|
}
|
|
288
307
|
transformed_direct_collaborators: Dict[str, List[Any]] = {
|
|
289
|
-
|
|
308
|
+
"ADMIN": [],
|
|
309
|
+
"MAINTAIN": [],
|
|
310
|
+
"READ": [],
|
|
311
|
+
"TRIAGE": [],
|
|
312
|
+
"WRITE": [],
|
|
290
313
|
}
|
|
291
314
|
transformed_requirements_files: List[Dict] = []
|
|
292
315
|
for repo_object in repos_json:
|
|
293
|
-
_transform_repo_languages(
|
|
316
|
+
_transform_repo_languages(
|
|
317
|
+
repo_object["url"],
|
|
318
|
+
repo_object,
|
|
319
|
+
transformed_repo_languages,
|
|
320
|
+
)
|
|
294
321
|
_transform_repo_objects(repo_object, transformed_repo_list)
|
|
295
|
-
_transform_repo_owners(
|
|
322
|
+
_transform_repo_owners(
|
|
323
|
+
repo_object["owner"]["url"],
|
|
324
|
+
repo_object,
|
|
325
|
+
transformed_repo_owners,
|
|
326
|
+
)
|
|
296
327
|
|
|
297
328
|
# Allow sync to continue if we didn't have permissions to list collaborators
|
|
298
|
-
repo_url = repo_object[
|
|
329
|
+
repo_url = repo_object["url"]
|
|
299
330
|
if repo_url in outside_collaborators:
|
|
300
331
|
_transform_collaborators(
|
|
301
|
-
repo_object[
|
|
302
|
-
outside_collaborators[repo_object[
|
|
332
|
+
repo_object["url"],
|
|
333
|
+
outside_collaborators[repo_object["url"]],
|
|
303
334
|
transformed_outside_collaborators,
|
|
304
335
|
)
|
|
305
336
|
if repo_url in direct_collaborators:
|
|
306
337
|
_transform_collaborators(
|
|
307
|
-
repo_object[
|
|
308
|
-
direct_collaborators[repo_object[
|
|
338
|
+
repo_object["url"],
|
|
339
|
+
direct_collaborators[repo_object["url"]],
|
|
309
340
|
transformed_direct_collaborators,
|
|
310
341
|
)
|
|
311
342
|
|
|
312
|
-
_transform_requirements_txt(
|
|
313
|
-
|
|
343
|
+
_transform_requirements_txt(
|
|
344
|
+
repo_object["requirements"],
|
|
345
|
+
repo_url,
|
|
346
|
+
transformed_requirements_files,
|
|
347
|
+
)
|
|
348
|
+
_transform_setup_cfg_requirements(
|
|
349
|
+
repo_object["setupCfg"],
|
|
350
|
+
repo_url,
|
|
351
|
+
transformed_requirements_files,
|
|
352
|
+
)
|
|
314
353
|
results = {
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
354
|
+
"repos": transformed_repo_list,
|
|
355
|
+
"repo_languages": transformed_repo_languages,
|
|
356
|
+
"repo_owners": transformed_repo_owners,
|
|
357
|
+
"repo_outside_collaborators": transformed_outside_collaborators,
|
|
358
|
+
"repo_direct_collaborators": transformed_direct_collaborators,
|
|
359
|
+
"python_requirements": transformed_requirements_files,
|
|
321
360
|
}
|
|
322
361
|
return results
|
|
323
362
|
|
|
@@ -346,33 +385,37 @@ def _transform_repo_objects(input_repo_object: Dict, out_repo_list: List[Dict])
|
|
|
346
385
|
:return: Nothing
|
|
347
386
|
"""
|
|
348
387
|
# Create a unique ID for a GitHubBranch node representing the default branch of this repo object.
|
|
349
|
-
dbr = input_repo_object[
|
|
350
|
-
default_branch_name = dbr[
|
|
351
|
-
default_branch_id =
|
|
388
|
+
dbr = input_repo_object["defaultBranchRef"]
|
|
389
|
+
default_branch_name = dbr["name"] if dbr else None
|
|
390
|
+
default_branch_id = (
|
|
391
|
+
_create_default_branch_id(input_repo_object["url"], dbr["id"]) if dbr else None
|
|
392
|
+
)
|
|
352
393
|
|
|
353
394
|
# Create a git:// URL from the given SSH URL, if it exists.
|
|
354
|
-
ssh_url = input_repo_object.get(
|
|
395
|
+
ssh_url = input_repo_object.get("sshUrl")
|
|
355
396
|
git_url = _create_git_url_from_ssh_url(ssh_url) if ssh_url else None
|
|
356
397
|
|
|
357
|
-
out_repo_list.append(
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
398
|
+
out_repo_list.append(
|
|
399
|
+
{
|
|
400
|
+
"id": input_repo_object["url"],
|
|
401
|
+
"createdat": input_repo_object["createdAt"],
|
|
402
|
+
"name": input_repo_object["name"],
|
|
403
|
+
"fullname": input_repo_object["nameWithOwner"],
|
|
404
|
+
"description": input_repo_object["description"],
|
|
405
|
+
"primarylanguage": input_repo_object["primaryLanguage"],
|
|
406
|
+
"homepage": input_repo_object["homepageUrl"],
|
|
407
|
+
"defaultbranch": default_branch_name,
|
|
408
|
+
"defaultbranchid": default_branch_id,
|
|
409
|
+
"private": input_repo_object["isPrivate"],
|
|
410
|
+
"disabled": input_repo_object["isDisabled"],
|
|
411
|
+
"archived": input_repo_object["isArchived"],
|
|
412
|
+
"locked": input_repo_object["isLocked"],
|
|
413
|
+
"giturl": git_url,
|
|
414
|
+
"url": input_repo_object["url"],
|
|
415
|
+
"sshurl": ssh_url,
|
|
416
|
+
"updatedat": input_repo_object["updatedAt"],
|
|
417
|
+
},
|
|
418
|
+
)
|
|
376
419
|
|
|
377
420
|
|
|
378
421
|
def _transform_repo_owners(owner_id: str, repo: Dict, repo_owners: List[Dict]) -> None:
|
|
@@ -383,15 +426,21 @@ def _transform_repo_owners(owner_id: str, repo: Dict, repo_owners: List[Dict]) -
|
|
|
383
426
|
:param repo_owners: Output array to append transformed results to.
|
|
384
427
|
:return: Nothing.
|
|
385
428
|
"""
|
|
386
|
-
repo_owners.append(
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
429
|
+
repo_owners.append(
|
|
430
|
+
{
|
|
431
|
+
"repo_id": repo["url"],
|
|
432
|
+
"owner": repo["owner"]["login"],
|
|
433
|
+
"owner_id": owner_id,
|
|
434
|
+
"type": repo["owner"]["__typename"],
|
|
435
|
+
},
|
|
436
|
+
)
|
|
392
437
|
|
|
393
438
|
|
|
394
|
-
def _transform_repo_languages(
|
|
439
|
+
def _transform_repo_languages(
|
|
440
|
+
repo_url: str,
|
|
441
|
+
repo: Dict,
|
|
442
|
+
repo_languages: List[Dict],
|
|
443
|
+
) -> None:
|
|
395
444
|
"""
|
|
396
445
|
Helper function to transform the languages in a GitHub repo.
|
|
397
446
|
:param repo_url: The URL of the repo.
|
|
@@ -399,16 +448,20 @@ def _transform_repo_languages(repo_url: str, repo: Dict, repo_languages: List[Di
|
|
|
399
448
|
:param repo_languages: Output array to append transformed results to.
|
|
400
449
|
:return: Nothing.
|
|
401
450
|
"""
|
|
402
|
-
if repo[
|
|
403
|
-
for language in repo[
|
|
404
|
-
repo_languages.append(
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
451
|
+
if repo["languages"]["totalCount"] > 0:
|
|
452
|
+
for language in repo["languages"]["nodes"]:
|
|
453
|
+
repo_languages.append(
|
|
454
|
+
{
|
|
455
|
+
"repo_id": repo_url,
|
|
456
|
+
"language_name": language["name"],
|
|
457
|
+
},
|
|
458
|
+
)
|
|
408
459
|
|
|
409
460
|
|
|
410
461
|
def _transform_collaborators(
|
|
411
|
-
|
|
462
|
+
repo_url: str,
|
|
463
|
+
collaborators: List[UserAffiliationAndRepoPermission],
|
|
464
|
+
transformed_collaborators: Dict,
|
|
412
465
|
) -> None:
|
|
413
466
|
"""
|
|
414
467
|
Performs data adjustments for collaborators in a GitHub repo.
|
|
@@ -425,8 +478,8 @@ def _transform_collaborators(
|
|
|
425
478
|
if collaborators:
|
|
426
479
|
for collaborator in collaborators:
|
|
427
480
|
user = collaborator.user
|
|
428
|
-
user[
|
|
429
|
-
user[
|
|
481
|
+
user["repo_url"] = repo_url
|
|
482
|
+
user["affiliation"] = collaborator.affiliation
|
|
430
483
|
transformed_collaborators[collaborator.permission].append(user)
|
|
431
484
|
|
|
432
485
|
|
|
@@ -442,10 +495,14 @@ def _transform_requirements_txt(
|
|
|
442
495
|
:param out_requirements_files: Output array to append transformed results to.
|
|
443
496
|
:return: Nothing.
|
|
444
497
|
"""
|
|
445
|
-
if req_file_contents and req_file_contents.get(
|
|
446
|
-
text_contents = req_file_contents[
|
|
498
|
+
if req_file_contents and req_file_contents.get("text"):
|
|
499
|
+
text_contents = req_file_contents["text"]
|
|
447
500
|
requirements_list = text_contents.split("\n")
|
|
448
|
-
_transform_python_requirements(
|
|
501
|
+
_transform_python_requirements(
|
|
502
|
+
requirements_list,
|
|
503
|
+
repo_url,
|
|
504
|
+
out_requirements_files,
|
|
505
|
+
)
|
|
449
506
|
|
|
450
507
|
|
|
451
508
|
def _transform_setup_cfg_requirements(
|
|
@@ -460,9 +517,9 @@ def _transform_setup_cfg_requirements(
|
|
|
460
517
|
:param out_requirements_files: Output array to append transformed results to.
|
|
461
518
|
:return: Nothing.
|
|
462
519
|
"""
|
|
463
|
-
if not setup_cfg_contents or not setup_cfg_contents.get(
|
|
520
|
+
if not setup_cfg_contents or not setup_cfg_contents.get("text"):
|
|
464
521
|
return
|
|
465
|
-
text_contents = setup_cfg_contents[
|
|
522
|
+
text_contents = setup_cfg_contents["text"]
|
|
466
523
|
setup_cfg = configparser.ConfigParser()
|
|
467
524
|
try:
|
|
468
525
|
setup_cfg.read_string(text_contents)
|
|
@@ -490,8 +547,8 @@ def _transform_python_requirements(
|
|
|
490
547
|
"""
|
|
491
548
|
parsed_list = []
|
|
492
549
|
for line in requirements_list:
|
|
493
|
-
stripped_line = line.partition(
|
|
494
|
-
if stripped_line ==
|
|
550
|
+
stripped_line = line.partition("#")[0].strip()
|
|
551
|
+
if stripped_line == "":
|
|
495
552
|
continue
|
|
496
553
|
try:
|
|
497
554
|
req = Requirement(stripped_line)
|
|
@@ -499,7 +556,7 @@ def _transform_python_requirements(
|
|
|
499
556
|
except InvalidRequirement:
|
|
500
557
|
# INFO and not WARN/ERROR as we intentionally don't support all ways to specify Python requirements
|
|
501
558
|
logger.info(
|
|
502
|
-
f
|
|
559
|
+
f'Failed to parse line "{line}" in repo {repo_url}\'s requirements.txt; skipping line.',
|
|
503
560
|
exc_info=True,
|
|
504
561
|
)
|
|
505
562
|
|
|
@@ -507,32 +564,44 @@ def _transform_python_requirements(
|
|
|
507
564
|
pinned_version = None
|
|
508
565
|
if len(req.specifier) == 1:
|
|
509
566
|
specifier = next(iter(req.specifier))
|
|
510
|
-
if specifier.operator ==
|
|
567
|
+
if specifier.operator == "==":
|
|
511
568
|
pinned_version = specifier.version
|
|
512
569
|
|
|
513
570
|
# Set `spec` to a default value. Example values for str(req.specifier): "<4.0,>=3.0" or "==1.0.0".
|
|
514
571
|
spec: Optional[str] = str(req.specifier)
|
|
515
572
|
# Set spec to `None` instead of empty string so that the Neo4j driver will leave the library.specifier field
|
|
516
573
|
# undefined. As convention, we prefer undefined values over empty strings in the graph.
|
|
517
|
-
if spec ==
|
|
574
|
+
if spec == "":
|
|
518
575
|
spec = None
|
|
519
576
|
|
|
520
577
|
canon_name = canonicalize_name(req.name)
|
|
521
|
-
requirement_id =
|
|
578
|
+
requirement_id = (
|
|
579
|
+
f"{canon_name}|{pinned_version}" if pinned_version else canon_name
|
|
580
|
+
)
|
|
522
581
|
|
|
523
|
-
out_requirements_files.append(
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
582
|
+
out_requirements_files.append(
|
|
583
|
+
{
|
|
584
|
+
"id": requirement_id,
|
|
585
|
+
"name": canon_name,
|
|
586
|
+
"specifier": spec,
|
|
587
|
+
"version": pinned_version,
|
|
588
|
+
"repo_url": repo_url,
|
|
589
|
+
},
|
|
590
|
+
)
|
|
530
591
|
|
|
531
592
|
|
|
532
593
|
def parse_setup_cfg(config: configparser.ConfigParser) -> List[str]:
|
|
533
594
|
reqs: List[str] = []
|
|
534
|
-
reqs.extend(
|
|
535
|
-
|
|
595
|
+
reqs.extend(
|
|
596
|
+
_parse_setup_cfg_requirements(
|
|
597
|
+
config.get("options", "install_requires", fallback=""),
|
|
598
|
+
),
|
|
599
|
+
)
|
|
600
|
+
reqs.extend(
|
|
601
|
+
_parse_setup_cfg_requirements(
|
|
602
|
+
config.get("options", "setup_requires", fallback=""),
|
|
603
|
+
),
|
|
604
|
+
)
|
|
536
605
|
if config.has_section("options.extras_require"):
|
|
537
606
|
for _, val in config.items("options.extras_require"):
|
|
538
607
|
reqs.extend(_parse_setup_cfg_requirements(val))
|
|
@@ -551,7 +620,11 @@ def _parse_setup_cfg_requirements(reqs: str, separator: str = ";") -> List[str]:
|
|
|
551
620
|
|
|
552
621
|
|
|
553
622
|
@timeit
|
|
554
|
-
def load_github_repos(
|
|
623
|
+
def load_github_repos(
|
|
624
|
+
neo4j_session: neo4j.Session,
|
|
625
|
+
update_tag: int,
|
|
626
|
+
repo_data: List[Dict],
|
|
627
|
+
) -> None:
|
|
555
628
|
"""
|
|
556
629
|
Ingest the GitHub repository information
|
|
557
630
|
:param neo4j_session: Neo4J session object for server communication
|
|
@@ -602,7 +675,11 @@ def load_github_repos(neo4j_session: neo4j.Session, update_tag: int, repo_data:
|
|
|
602
675
|
|
|
603
676
|
|
|
604
677
|
@timeit
|
|
605
|
-
def load_github_languages(
|
|
678
|
+
def load_github_languages(
|
|
679
|
+
neo4j_session: neo4j.Session,
|
|
680
|
+
update_tag: int,
|
|
681
|
+
repo_languages: List[Dict],
|
|
682
|
+
) -> None:
|
|
606
683
|
"""
|
|
607
684
|
Ingest the relationships for repo languages
|
|
608
685
|
:param neo4j_session: Neo4J session object for server communication
|
|
@@ -632,7 +709,11 @@ def load_github_languages(neo4j_session: neo4j.Session, update_tag: int, repo_la
|
|
|
632
709
|
|
|
633
710
|
|
|
634
711
|
@timeit
|
|
635
|
-
def load_github_owners(
|
|
712
|
+
def load_github_owners(
|
|
713
|
+
neo4j_session: neo4j.Session,
|
|
714
|
+
update_tag: int,
|
|
715
|
+
repo_owners: List[Dict],
|
|
716
|
+
) -> None:
|
|
636
717
|
"""
|
|
637
718
|
Ingest the relationships for repo owners
|
|
638
719
|
:param neo4j_session: Neo4J session object for server communication
|
|
@@ -641,7 +722,8 @@ def load_github_owners(neo4j_session: neo4j.Session, update_tag: int, repo_owner
|
|
|
641
722
|
:return: Nothing
|
|
642
723
|
"""
|
|
643
724
|
for owner in repo_owners:
|
|
644
|
-
ingest_owner_template = Template(
|
|
725
|
+
ingest_owner_template = Template(
|
|
726
|
+
"""
|
|
645
727
|
MERGE (user:$account_type{id: $Id})
|
|
646
728
|
ON CREATE SET user.firstseen = timestamp()
|
|
647
729
|
SET user.username = $UserName,
|
|
@@ -651,22 +733,31 @@ def load_github_owners(neo4j_session: neo4j.Session, update_tag: int, repo_owner
|
|
|
651
733
|
MATCH (repo:GitHubRepository{id: $RepoId})
|
|
652
734
|
MERGE (user)<-[r:OWNER]-(repo)
|
|
653
735
|
ON CREATE SET r.firstseen = timestamp()
|
|
654
|
-
SET r.lastupdated = $UpdateTag"""
|
|
736
|
+
SET r.lastupdated = $UpdateTag""",
|
|
737
|
+
)
|
|
655
738
|
|
|
656
|
-
account_type = {
|
|
739
|
+
account_type = {"User": "GitHubUser", "Organization": "GitHubOrganization"}
|
|
657
740
|
|
|
658
741
|
neo4j_session.run(
|
|
659
|
-
ingest_owner_template.safe_substitute(
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
742
|
+
ingest_owner_template.safe_substitute(
|
|
743
|
+
account_type=account_type[owner["type"]],
|
|
744
|
+
),
|
|
745
|
+
Id=owner["owner_id"],
|
|
746
|
+
UserName=owner["owner"],
|
|
747
|
+
RepoId=owner["repo_id"],
|
|
663
748
|
UpdateTag=update_tag,
|
|
664
749
|
)
|
|
665
750
|
|
|
666
751
|
|
|
667
752
|
@timeit
|
|
668
|
-
def load_collaborators(
|
|
669
|
-
|
|
753
|
+
def load_collaborators(
|
|
754
|
+
neo4j_session: neo4j.Session,
|
|
755
|
+
update_tag: int,
|
|
756
|
+
collaborators: Dict,
|
|
757
|
+
affiliation: str,
|
|
758
|
+
) -> None:
|
|
759
|
+
query = Template(
|
|
760
|
+
"""
|
|
670
761
|
UNWIND $UserData as user
|
|
671
762
|
|
|
672
763
|
MERGE (u:GitHubUser{id: user.url})
|
|
@@ -683,7 +774,8 @@ def load_collaborators(neo4j_session: neo4j.Session, update_tag: int, collaborat
|
|
|
683
774
|
MERGE (repo)<-[o:$rel_label]-(u)
|
|
684
775
|
ON CREATE SET o.firstseen = timestamp()
|
|
685
776
|
SET o.lastupdated = $UpdateTag
|
|
686
|
-
"""
|
|
777
|
+
""",
|
|
778
|
+
)
|
|
687
779
|
for collab_type in collaborators.keys():
|
|
688
780
|
relationship_label = f"{affiliation}_COLLAB_{collab_type}"
|
|
689
781
|
neo4j_session.run(
|
|
@@ -694,21 +786,51 @@ def load_collaborators(neo4j_session: neo4j.Session, update_tag: int, collaborat
|
|
|
694
786
|
|
|
695
787
|
|
|
696
788
|
@timeit
|
|
697
|
-
def load(
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
789
|
+
def load(
|
|
790
|
+
neo4j_session: neo4j.Session,
|
|
791
|
+
common_job_parameters: Dict,
|
|
792
|
+
repo_data: Dict,
|
|
793
|
+
) -> None:
|
|
794
|
+
load_github_repos(
|
|
795
|
+
neo4j_session,
|
|
796
|
+
common_job_parameters["UPDATE_TAG"],
|
|
797
|
+
repo_data["repos"],
|
|
798
|
+
)
|
|
799
|
+
load_github_owners(
|
|
800
|
+
neo4j_session,
|
|
801
|
+
common_job_parameters["UPDATE_TAG"],
|
|
802
|
+
repo_data["repo_owners"],
|
|
803
|
+
)
|
|
804
|
+
load_github_languages(
|
|
805
|
+
neo4j_session,
|
|
806
|
+
common_job_parameters["UPDATE_TAG"],
|
|
807
|
+
repo_data["repo_languages"],
|
|
808
|
+
)
|
|
701
809
|
load_collaborators(
|
|
702
|
-
neo4j_session,
|
|
810
|
+
neo4j_session,
|
|
811
|
+
common_job_parameters["UPDATE_TAG"],
|
|
812
|
+
repo_data["repo_direct_collaborators"],
|
|
813
|
+
"DIRECT",
|
|
703
814
|
)
|
|
704
815
|
load_collaborators(
|
|
705
|
-
neo4j_session,
|
|
816
|
+
neo4j_session,
|
|
817
|
+
common_job_parameters["UPDATE_TAG"],
|
|
818
|
+
repo_data["repo_outside_collaborators"],
|
|
819
|
+
"OUTSIDE",
|
|
820
|
+
)
|
|
821
|
+
load_python_requirements(
|
|
822
|
+
neo4j_session,
|
|
823
|
+
common_job_parameters["UPDATE_TAG"],
|
|
824
|
+
repo_data["python_requirements"],
|
|
706
825
|
)
|
|
707
|
-
load_python_requirements(neo4j_session, common_job_parameters['UPDATE_TAG'], repo_data['python_requirements'])
|
|
708
826
|
|
|
709
827
|
|
|
710
828
|
@timeit
|
|
711
|
-
def load_python_requirements(
|
|
829
|
+
def load_python_requirements(
|
|
830
|
+
neo4j_session: neo4j.Session,
|
|
831
|
+
update_tag: int,
|
|
832
|
+
requirements_objects: List[Dict],
|
|
833
|
+
) -> None:
|
|
712
834
|
query = """
|
|
713
835
|
UNWIND $Requirements AS req
|
|
714
836
|
MERGE (lib:PythonLibrary:Dependency{id: req.id})
|
|
@@ -732,11 +854,11 @@ def load_python_requirements(neo4j_session: neo4j.Session, update_tag: int, requ
|
|
|
732
854
|
|
|
733
855
|
|
|
734
856
|
def sync(
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
857
|
+
neo4j_session: neo4j.Session,
|
|
858
|
+
common_job_parameters: Dict[str, Any],
|
|
859
|
+
github_api_key: str,
|
|
860
|
+
github_url: str,
|
|
861
|
+
organization: str,
|
|
740
862
|
) -> None:
|
|
741
863
|
"""
|
|
742
864
|
Performs the sequential tasks to collect, transform, and sync github data
|
|
@@ -753,14 +875,25 @@ def sync(
|
|
|
753
875
|
outside_collabs: dict[str, list[UserAffiliationAndRepoPermission]] = {}
|
|
754
876
|
try:
|
|
755
877
|
direct_collabs = _get_repo_collaborators_for_multiple_repos(
|
|
756
|
-
repos_json,
|
|
878
|
+
repos_json,
|
|
879
|
+
"DIRECT",
|
|
880
|
+
organization,
|
|
881
|
+
github_url,
|
|
882
|
+
github_api_key,
|
|
757
883
|
)
|
|
758
884
|
outside_collabs = _get_repo_collaborators_for_multiple_repos(
|
|
759
|
-
repos_json,
|
|
885
|
+
repos_json,
|
|
886
|
+
"OUTSIDE",
|
|
887
|
+
organization,
|
|
888
|
+
github_url,
|
|
889
|
+
github_api_key,
|
|
760
890
|
)
|
|
761
891
|
except TypeError:
|
|
762
892
|
# due to permission errors or transient network error or some other nonsense
|
|
763
|
-
logger.warning(
|
|
893
|
+
logger.warning(
|
|
894
|
+
"Unable to list repo collaborators due to permission errors; continuing on.",
|
|
895
|
+
exc_info=True,
|
|
896
|
+
)
|
|
764
897
|
repo_data = transform(repos_json, direct_collabs, outside_collabs)
|
|
765
898
|
load(neo4j_session, common_job_parameters, repo_data)
|
|
766
|
-
run_cleanup_job(
|
|
899
|
+
run_cleanup_job("github_repos_cleanup.json", neo4j_session, common_job_parameters)
|