cartography 0.103.0__py3-none-any.whl → 0.104.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (42) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +21 -3
  3. cartography/config.py +4 -0
  4. cartography/graph/cleanupbuilder.py +151 -41
  5. cartography/intel/anthropic/__init__.py +62 -0
  6. cartography/intel/anthropic/apikeys.py +72 -0
  7. cartography/intel/anthropic/users.py +75 -0
  8. cartography/intel/anthropic/util.py +51 -0
  9. cartography/intel/anthropic/workspaces.py +95 -0
  10. cartography/intel/aws/cloudtrail.py +3 -38
  11. cartography/intel/aws/cloudwatch.py +1 -1
  12. cartography/intel/aws/ec2/load_balancer_v2s.py +4 -1
  13. cartography/intel/aws/resources.py +0 -2
  14. cartography/intel/aws/secretsmanager.py +150 -3
  15. cartography/intel/aws/ssm.py +71 -0
  16. cartography/intel/entra/ou.py +21 -5
  17. cartography/intel/openai/adminapikeys.py +1 -2
  18. cartography/intel/openai/apikeys.py +1 -1
  19. cartography/intel/openai/projects.py +4 -1
  20. cartography/intel/openai/serviceaccounts.py +1 -1
  21. cartography/intel/openai/users.py +0 -3
  22. cartography/intel/openai/util.py +17 -1
  23. cartography/models/anthropic/apikey.py +90 -0
  24. cartography/models/anthropic/organization.py +19 -0
  25. cartography/models/anthropic/user.py +48 -0
  26. cartography/models/anthropic/workspace.py +90 -0
  27. cartography/models/aws/cloudtrail/trail.py +24 -0
  28. cartography/models/aws/secretsmanager/__init__.py +0 -0
  29. cartography/models/aws/secretsmanager/secret_version.py +116 -0
  30. cartography/models/aws/ssm/parameters.py +84 -0
  31. cartography/models/core/nodes.py +15 -2
  32. cartography/models/openai/project.py +20 -1
  33. cartography/sync.py +2 -0
  34. {cartography-0.103.0.dist-info → cartography-0.104.0.dist-info}/METADATA +4 -4
  35. {cartography-0.103.0.dist-info → cartography-0.104.0.dist-info}/RECORD +40 -30
  36. {cartography-0.103.0.dist-info → cartography-0.104.0.dist-info}/WHEEL +1 -1
  37. cartography/intel/aws/efs.py +0 -93
  38. cartography/models/aws/efs/mount_target.py +0 -52
  39. /cartography/models/{aws/efs → anthropic}/__init__.py +0 -0
  40. {cartography-0.103.0.dist-info → cartography-0.104.0.dist-info}/entry_points.txt +0 -0
  41. {cartography-0.103.0.dist-info → cartography-0.104.0.dist-info}/licenses/LICENSE +0 -0
  42. {cartography-0.103.0.dist-info → cartography-0.104.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,95 @@
1
+ import logging
2
+ from typing import Any
3
+ from typing import Tuple
4
+
5
+ import neo4j
6
+ import requests
7
+
8
+ from cartography.client.core.tx import load
9
+ from cartography.graph.job import GraphJob
10
+ from cartography.intel.anthropic.util import paginated_get
11
+ from cartography.models.anthropic.workspace import AnthropicWorkspaceSchema
12
+ from cartography.util import timeit
13
+
14
+ logger = logging.getLogger(__name__)
15
+ # Connect and read timeouts of 60 seconds each; see https://requests.readthedocs.io/en/master/user/advanced/#timeouts
16
+ _TIMEOUT = (60, 60)
17
+
18
+
19
+ @timeit
20
+ def sync(
21
+ neo4j_session: neo4j.Session,
22
+ api_session: requests.Session,
23
+ common_job_parameters: dict[str, Any],
24
+ ) -> list[dict]:
25
+ org_id, workspaces = get(
26
+ api_session,
27
+ common_job_parameters["BASE_URL"],
28
+ )
29
+ common_job_parameters["ORG_ID"] = org_id
30
+ for workspace in workspaces:
31
+ workspace["users"] = []
32
+ workspace["admins"] = []
33
+ for user in get_workspace_users(
34
+ api_session,
35
+ common_job_parameters["BASE_URL"],
36
+ workspace["id"],
37
+ ):
38
+ workspace["users"].append(user["user_id"])
39
+ if user["workspace_role"] == "workspace_admin":
40
+ workspace["admins"].append(user["user_id"])
41
+ load_workspaces(
42
+ neo4j_session, workspaces, org_id, common_job_parameters["UPDATE_TAG"]
43
+ )
44
+ cleanup(neo4j_session, common_job_parameters)
45
+ return workspaces
46
+
47
+
48
+ @timeit
49
+ def get(
50
+ api_session: requests.Session,
51
+ base_url: str,
52
+ ) -> Tuple[str, list[dict[str, Any]]]:
53
+ return paginated_get(
54
+ api_session, f"{base_url}/organizations/workspaces", timeout=_TIMEOUT
55
+ )
56
+
57
+
58
+ @timeit
59
+ def get_workspace_users(
60
+ api_session: requests.Session,
61
+ base_url: str,
62
+ workspace_id: str,
63
+ ) -> list[dict[str, Any]]:
64
+ _, result = paginated_get(
65
+ api_session,
66
+ f"{base_url}/organizations/workspaces/{workspace_id}/members",
67
+ timeout=_TIMEOUT,
68
+ )
69
+ return result
70
+
71
+
72
+ @timeit
73
+ def load_workspaces(
74
+ neo4j_session: neo4j.Session,
75
+ data: list[dict[str, Any]],
76
+ ORG_ID: str,
77
+ update_tag: int,
78
+ ) -> None:
79
+ logger.info("Loading %d Anthropic workspaces into Neo4j.", len(data))
80
+ load(
81
+ neo4j_session,
82
+ AnthropicWorkspaceSchema(),
83
+ data,
84
+ lastupdated=update_tag,
85
+ ORG_ID=ORG_ID,
86
+ )
87
+
88
+
89
+ @timeit
90
+ def cleanup(
91
+ neo4j_session: neo4j.Session, common_job_parameters: dict[str, Any]
92
+ ) -> None:
93
+ GraphJob.from_node_schema(AnthropicWorkspaceSchema(), common_job_parameters).run(
94
+ neo4j_session
95
+ )
@@ -4,7 +4,6 @@ from typing import Dict
4
4
  from typing import List
5
5
 
6
6
  import boto3
7
- import botocore.exceptions
8
7
  import neo4j
9
8
 
10
9
  from cartography.client.core.tx import load
@@ -25,10 +24,8 @@ def get_cloudtrail_trails(
25
24
  client = boto3_session.client(
26
25
  "cloudtrail", region_name=region, config=get_botocore_config()
27
26
  )
28
- paginator = client.get_paginator("list_trails")
29
- trails = []
30
- for page in paginator.paginate():
31
- trails.extend(page["Trails"])
27
+
28
+ trails = client.describe_trails()["trailList"]
32
29
 
33
30
  # CloudTrail multi-region trails are shown in list_trails,
34
31
  # but the get_trail call only works in the home region
@@ -36,28 +33,6 @@ def get_cloudtrail_trails(
36
33
  return trails_filtered
37
34
 
38
35
 
39
- @timeit
40
- def get_cloudtrail_trail(
41
- boto3_session: boto3.Session,
42
- region: str,
43
- trail_name: str,
44
- ) -> Dict[str, Any]:
45
- client = boto3_session.client(
46
- "cloudtrail", region_name=region, config=get_botocore_config()
47
- )
48
- trail_details: Dict[str, Any] = {}
49
- try:
50
- response = client.get_trail(Name=trail_name)
51
- trail_details = response["Trail"]
52
- except botocore.exceptions.ClientError as e:
53
- code = e.response["Error"]["Code"]
54
- msg = e.response["Error"]["Message"]
55
- logger.warning(
56
- f"Could not run CloudTrail get_trail due to boto3 error {code}: {msg}. Skipping.",
57
- )
58
- return trail_details
59
-
60
-
61
36
  @timeit
62
37
  def load_cloudtrail_trails(
63
38
  neo4j_session: neo4j.Session,
@@ -105,20 +80,10 @@ def sync(
105
80
  f"Syncing CloudTrail for region '{region}' in account '{current_aws_account_id}'.",
106
81
  )
107
82
  trails = get_cloudtrail_trails(boto3_session, region)
108
- trail_data: List[Dict[str, Any]] = []
109
- for trail in trails:
110
- trail_name = trail["Name"]
111
- trail_details = get_cloudtrail_trail(
112
- boto3_session,
113
- region,
114
- trail_name,
115
- )
116
- if trail_details:
117
- trail_data.append(trail_details)
118
83
 
119
84
  load_cloudtrail_trails(
120
85
  neo4j_session,
121
- trail_data,
86
+ trails,
122
87
  region,
123
88
  current_aws_account_id,
124
89
  update_tag,
@@ -22,7 +22,7 @@ def get_cloudwatch_log_groups(
22
22
  boto3_session: boto3.Session, region: str
23
23
  ) -> List[Dict[str, Any]]:
24
24
  client = boto3_session.client(
25
- "cloudwatch", region_name=region, config=get_botocore_config()
25
+ "logs", region_name=region, config=get_botocore_config()
26
26
  )
27
27
  paginator = client.get_paginator("describe_log_groups")
28
28
  logGroups = []
@@ -99,7 +99,10 @@ def load_load_balancer_v2s(
99
99
  SET r.lastupdated = $update_tag
100
100
  """
101
101
  for lb in data:
102
- load_balancer_id = lb["DNSName"]
102
+ load_balancer_id = lb.get("DNSName")
103
+ if not load_balancer_id:
104
+ logger.warning("Skipping load balancer entry with missing DNSName: %r", lb)
105
+ continue
103
106
 
104
107
  neo4j_session.run(
105
108
  ingest_load_balancer_v2,
@@ -10,7 +10,6 @@ from . import config
10
10
  from . import dynamodb
11
11
  from . import ecr
12
12
  from . import ecs
13
- from . import efs
14
13
  from . import eks
15
14
  from . import elasticache
16
15
  from . import elasticsearch
@@ -105,5 +104,4 @@ RESOURCE_FUNCTIONS: Dict[str, Callable[..., None]] = {
105
104
  "identitycenter": identitycenter.sync_identity_center_instances,
106
105
  "cloudtrail": cloudtrail.sync,
107
106
  "cloudwatch": cloudwatch.sync,
108
- "efs": efs.sync,
109
107
  }
@@ -5,12 +5,20 @@ from typing import List
5
5
  import boto3
6
6
  import neo4j
7
7
 
8
+ from cartography.client.core.tx import load
9
+ from cartography.graph.job import GraphJob
10
+ from cartography.models.aws.secretsmanager.secret_version import (
11
+ SecretsManagerSecretVersionSchema,
12
+ )
13
+ from cartography.stats import get_stats_client
8
14
  from cartography.util import aws_handle_regions
9
15
  from cartography.util import dict_date_to_epoch
16
+ from cartography.util import merge_module_sync_metadata
10
17
  from cartography.util import run_cleanup_job
11
18
  from cartography.util import timeit
12
19
 
13
20
  logger = logging.getLogger(__name__)
21
+ stat_handler = get_stats_client(__name__)
14
22
 
15
23
 
16
24
  @timeit
@@ -76,6 +84,107 @@ def cleanup_secrets(neo4j_session: neo4j.Session, common_job_parameters: Dict) -
76
84
  )
77
85
 
78
86
 
87
+ @timeit
88
+ @aws_handle_regions
89
+ def get_secret_versions(
90
+ boto3_session: boto3.session.Session, region: str, secret_arn: str
91
+ ) -> List[Dict]:
92
+ """
93
+ Get all versions of a secret from AWS Secrets Manager.
94
+
95
+ Note: list_secret_version_ids is not paginatable through boto3's paginator,
96
+ so we implement manual pagination.
97
+ """
98
+ client = boto3_session.client("secretsmanager", region_name=region)
99
+ next_token = None
100
+ versions = []
101
+
102
+ while True:
103
+ params = {"SecretId": secret_arn, "IncludeDeprecated": True}
104
+ if next_token:
105
+ params["NextToken"] = next_token
106
+
107
+ response = client.list_secret_version_ids(**params)
108
+
109
+ for version in response.get("Versions", []):
110
+ version["SecretId"] = secret_arn
111
+ version["ARN"] = f"{secret_arn}:version:{version['VersionId']}"
112
+
113
+ versions.extend(response.get("Versions", []))
114
+
115
+ next_token = response.get("NextToken")
116
+ if not next_token:
117
+ break
118
+
119
+ return versions
120
+
121
+
122
+ def transform_secret_versions(
123
+ versions: List[Dict],
124
+ region: str,
125
+ aws_account_id: str,
126
+ ) -> List[Dict]:
127
+ """
128
+ Transform AWS Secrets Manager Secret Versions to match the data model.
129
+ """
130
+ transformed_data = []
131
+ for version in versions:
132
+ transformed = {
133
+ "ARN": version["ARN"],
134
+ "SecretId": version["SecretId"],
135
+ "VersionId": version["VersionId"],
136
+ "VersionStages": version.get("VersionStages"),
137
+ "CreatedDate": dict_date_to_epoch(version, "CreatedDate"),
138
+ }
139
+
140
+ if "KmsKeyId" in version and version["KmsKeyId"]:
141
+ transformed["KmsKeyId"] = version["KmsKeyId"]
142
+
143
+ if "Tags" in version and version["Tags"]:
144
+ transformed["Tags"] = version["Tags"]
145
+
146
+ transformed_data.append(transformed)
147
+
148
+ return transformed_data
149
+
150
+
151
+ @timeit
152
+ def load_secret_versions(
153
+ neo4j_session: neo4j.Session,
154
+ data: List[Dict],
155
+ region: str,
156
+ aws_account_id: str,
157
+ update_tag: int,
158
+ ) -> None:
159
+ """
160
+ Load secret versions into Neo4j using the data model.
161
+ """
162
+ logger.info(f"Loading {len(data)} Secret Versions for region {region} into graph.")
163
+
164
+ load(
165
+ neo4j_session,
166
+ SecretsManagerSecretVersionSchema(),
167
+ data,
168
+ lastupdated=update_tag,
169
+ Region=region,
170
+ AWS_ID=aws_account_id,
171
+ )
172
+
173
+
174
+ @timeit
175
+ def cleanup_secret_versions(
176
+ neo4j_session: neo4j.Session, common_job_parameters: Dict
177
+ ) -> None:
178
+ """
179
+ Run Secret Versions cleanup job.
180
+ """
181
+ logger.debug("Running Secret Versions cleanup job.")
182
+ cleanup_job = GraphJob.from_node_schema(
183
+ SecretsManagerSecretVersionSchema(), common_job_parameters
184
+ )
185
+ cleanup_job.run(neo4j_session)
186
+
187
+
79
188
  @timeit
80
189
  def sync(
81
190
  neo4j_session: neo4j.Session,
@@ -85,12 +194,50 @@ def sync(
85
194
  update_tag: int,
86
195
  common_job_parameters: Dict,
87
196
  ) -> None:
197
+ """
198
+ Sync AWS Secrets Manager resources.
199
+ """
88
200
  for region in regions:
89
201
  logger.info(
90
- "Syncing Secrets Manager for region '%s' in account '%s'.",
91
- region,
92
- current_aws_account_id,
202
+ f"Syncing Secrets Manager for region '{region}' in account '{current_aws_account_id}'."
93
203
  )
94
204
  secrets = get_secret_list(boto3_session, region)
205
+
95
206
  load_secrets(neo4j_session, secrets, region, current_aws_account_id, update_tag)
207
+
208
+ all_versions = []
209
+ for secret in secrets:
210
+ logger.info(
211
+ f"Getting versions for secret {secret.get('Name', 'unnamed')} ({secret['ARN']})"
212
+ )
213
+ versions = get_secret_versions(boto3_session, region, secret["ARN"])
214
+ logger.info(
215
+ f"Found {len(versions)} versions for secret {secret.get('Name', 'unnamed')}"
216
+ )
217
+ all_versions.extend(versions)
218
+
219
+ transformed_data = transform_secret_versions(
220
+ all_versions,
221
+ region,
222
+ current_aws_account_id,
223
+ )
224
+
225
+ load_secret_versions(
226
+ neo4j_session,
227
+ transformed_data,
228
+ region,
229
+ current_aws_account_id,
230
+ update_tag,
231
+ )
232
+
96
233
  cleanup_secrets(neo4j_session, common_job_parameters)
234
+ cleanup_secret_versions(neo4j_session, common_job_parameters)
235
+
236
+ merge_module_sync_metadata(
237
+ neo4j_session,
238
+ group_type="AWSAccount",
239
+ group_id=current_aws_account_id,
240
+ synced_type="SecretsManagerSecretVersion",
241
+ update_tag=update_tag,
242
+ stat_handler=stat_handler,
243
+ )
@@ -1,4 +1,6 @@
1
+ import json
1
2
  import logging
3
+ import re
2
4
  from typing import Any
3
5
  from typing import Dict
4
6
  from typing import List
@@ -10,6 +12,7 @@ from cartography.client.core.tx import load
10
12
  from cartography.graph.job import GraphJob
11
13
  from cartography.models.aws.ssm.instance_information import SSMInstanceInformationSchema
12
14
  from cartography.models.aws.ssm.instance_patch import SSMInstancePatchSchema
15
+ from cartography.models.aws.ssm.parameters import SSMParameterSchema
13
16
  from cartography.util import aws_handle_regions
14
17
  from cartography.util import dict_date_to_epoch
15
18
  from cartography.util import timeit
@@ -107,6 +110,42 @@ def transform_instance_patches(data_list: List[Dict[str, Any]]) -> List[Dict[str
107
110
  return data_list
108
111
 
109
112
 
113
+ @timeit
114
+ @aws_handle_regions
115
+ def get_ssm_parameters(
116
+ boto3_session: boto3.session.Session,
117
+ region: str,
118
+ ) -> List[Dict[str, Any]]:
119
+ client = boto3_session.client("ssm", region_name=region)
120
+ paginator = client.get_paginator("describe_parameters")
121
+ ssm_parameters_data: List[Dict[str, Any]] = []
122
+ for page in paginator.paginate(PaginationConfig={"PageSize": 50}):
123
+ ssm_parameters_data.extend(page.get("Parameters", []))
124
+ return ssm_parameters_data
125
+
126
+
127
+ def transform_ssm_parameters(
128
+ raw_parameters_data: List[Dict[str, Any]],
129
+ ) -> List[Dict[str, Any]]:
130
+ transformed_list: List[Dict[str, Any]] = []
131
+ for param in raw_parameters_data:
132
+ param["LastModifiedDate"] = dict_date_to_epoch(param, "LastModifiedDate")
133
+ param["PoliciesJson"] = json.dumps(param.get("Policies", []))
134
+ # KMSKey uses shorter UUID as their primary id
135
+ # SSM Parameters, when encrypted, reference KMS keys using their full ARNs in the KeyId field
136
+ # Adding a param to match on the id property of the target node
137
+ if param.get("Type") == "SecureString" and param.get("KeyId") is not None:
138
+ match = re.match(r".*key/(.*)$", param["KeyId"])
139
+ if match:
140
+ param["KMSKeyIdShort"] = match.group(1)
141
+ else:
142
+ param["KMSKeyIdShort"] = None
143
+ else:
144
+ param["KMSKeyIdShort"] = None
145
+ transformed_list.append(param)
146
+ return transformed_list
147
+
148
+
110
149
  @timeit
111
150
  def load_instance_information(
112
151
  neo4j_session: neo4j.Session,
@@ -143,6 +182,24 @@ def load_instance_patches(
143
182
  )
144
183
 
145
184
 
185
+ @timeit
186
+ def load_ssm_parameters(
187
+ neo4j_session: neo4j.Session,
188
+ data: List[Dict[str, Any]],
189
+ region: str,
190
+ current_aws_account_id: str,
191
+ aws_update_tag: int,
192
+ ) -> None:
193
+ load(
194
+ neo4j_session,
195
+ SSMParameterSchema(),
196
+ data,
197
+ lastupdated=aws_update_tag,
198
+ Region=region,
199
+ AWS_ID=current_aws_account_id,
200
+ )
201
+
202
+
146
203
  @timeit
147
204
  def cleanup_ssm(
148
205
  neo4j_session: neo4j.Session,
@@ -156,6 +213,9 @@ def cleanup_ssm(
156
213
  GraphJob.from_node_schema(SSMInstancePatchSchema(), common_job_parameters).run(
157
214
  neo4j_session,
158
215
  )
216
+ GraphJob.from_node_schema(SSMParameterSchema(), common_job_parameters).run(
217
+ neo4j_session,
218
+ )
159
219
 
160
220
 
161
221
  @timeit
@@ -193,4 +253,15 @@ def sync(
193
253
  current_aws_account_id,
194
254
  update_tag,
195
255
  )
256
+
257
+ data = get_ssm_parameters(boto3_session, region)
258
+ data = transform_ssm_parameters(data)
259
+ load_ssm_parameters(
260
+ neo4j_session,
261
+ data,
262
+ region,
263
+ current_aws_account_id,
264
+ update_tag,
265
+ )
266
+
196
267
  cleanup_ssm(neo4j_session, common_job_parameters)
@@ -22,12 +22,28 @@ async def get_entra_ous(client: GraphServiceClient) -> list[AdministrativeUnit]:
22
22
  Get all OUs from Microsoft Graph API with pagination support
23
23
  """
24
24
  all_units: list[AdministrativeUnit] = []
25
- request = client.directory.administrative_units.request()
26
25
 
27
- while request:
28
- response = await request.get()
29
- all_units.extend(response.value)
30
- request = response.odata_next_link if response.odata_next_link else None
26
+ # Initialize first page request
27
+ current_request = client.directory.administrative_units
28
+
29
+ while current_request:
30
+ try:
31
+ response = await current_request.get()
32
+ if response and response.value:
33
+ all_units.extend(response.value)
34
+
35
+ # Handle next page using OData link
36
+ if response.odata_next_link:
37
+ current_request = client.directory.administrative_units.with_url(
38
+ response.odata_next_link
39
+ )
40
+ else:
41
+ current_request = None
42
+ else:
43
+ current_request = None
44
+ except Exception as e:
45
+ logger.error(f"Failed to retrieve administrative units: {str(e)}")
46
+ current_request = None
31
47
 
32
48
  return all_units
33
49
 
@@ -23,7 +23,7 @@ def sync(
23
23
  api_session: requests.Session,
24
24
  common_job_parameters: Dict[str, Any],
25
25
  ORG_ID: str,
26
- ) -> List[Dict]:
26
+ ) -> None:
27
27
  adminapikeys = get(
28
28
  api_session,
29
29
  common_job_parameters["BASE_URL"],
@@ -36,7 +36,6 @@ def sync(
36
36
  common_job_parameters["UPDATE_TAG"],
37
37
  )
38
38
  cleanup(neo4j_session, common_job_parameters)
39
- return adminapikeys
40
39
 
41
40
 
42
41
  @timeit
@@ -77,7 +77,7 @@ def load_apikeys(
77
77
  project_id: str,
78
78
  update_tag: int,
79
79
  ) -> None:
80
- logger.info("Loading %d OpenAI Project APIKey into Neo4j.", len(data))
80
+ logger.info("Loading %d OpenAI APIKey into Neo4j.", len(data))
81
81
  load(
82
82
  neo4j_session,
83
83
  OpenAIApiKeySchema(),
@@ -30,12 +30,15 @@ def sync(
30
30
  )
31
31
  for project in projects:
32
32
  project["users"] = []
33
+ project["admins"] = []
33
34
  for user in get_project_users(
34
35
  api_session,
35
36
  common_job_parameters["BASE_URL"],
36
37
  project["id"],
37
38
  ):
38
39
  project["users"].append(user["id"])
40
+ if user["role"] == "owner":
41
+ project["admins"].append(user["id"])
39
42
  load_projects(neo4j_session, projects, ORG_ID, common_job_parameters["UPDATE_TAG"])
40
43
  cleanup(neo4j_session, common_job_parameters)
41
44
  return projects
@@ -75,7 +78,7 @@ def load_projects(
75
78
  ORG_ID: str,
76
79
  update_tag: int,
77
80
  ) -> None:
78
- logger.info("Loading %d OpenAIProjectSchema into Neo4j.", len(data))
81
+ logger.info("Loading %d OpenAI Projects into Neo4j.", len(data))
79
82
  load(
80
83
  neo4j_session,
81
84
  OpenAIProjectSchema(),
@@ -63,7 +63,7 @@ def load_serviceaccounts(
63
63
  project_id: str,
64
64
  update_tag: int,
65
65
  ) -> None:
66
- logger.info("Loading %d OpenAI ProjectServiceAccount into Neo4j.", len(data))
66
+ logger.info("Loading %d OpenAI ServiceAccount into Neo4j.", len(data))
67
67
  load(
68
68
  neo4j_session,
69
69
  OpenAIServiceAccountSchema(),
@@ -70,9 +70,6 @@ def load_users(
70
70
  def cleanup(
71
71
  neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any]
72
72
  ) -> None:
73
- GraphJob.from_node_schema(OpenAIOrganizationSchema(), common_job_parameters).run(
74
- neo4j_session
75
- )
76
73
  GraphJob.from_node_schema(OpenAIUserSchema(), common_job_parameters).run(
77
74
  neo4j_session
78
75
  )
@@ -10,7 +10,23 @@ def paginated_get(
10
10
  timeout: tuple[int, int],
11
11
  after: str | None = None,
12
12
  ) -> Generator[dict[str, Any], None, None]:
13
- # DOC
13
+ """Helper function to get paginated data from the OpenAI API.
14
+
15
+ This function handles the pagination of the API requests and returns
16
+ the results as a generator. It will continue to make requests until
17
+ all pages of data have been retrieved. The results are returned as a
18
+ list of dictionaries, where each dictionary represents a single
19
+ entity.
20
+
21
+ Args:
22
+ api_session (requests.Session): The requests session to use for making API calls.
23
+ url (str): The URL to make the API call to.
24
+ timeout (tuple[int, int]): The timeout for the API call.
25
+ after (str | None): The ID of the last item retrieved in the previous request.
26
+ If None, the first page of results will be retrieved.
27
+ Returns:
28
+ Generator[dict[str, Any], None, None]: A generator yielding dictionaries representing the results.
29
+ """
14
30
  params = {"after": after} if after else {}
15
31
  req = api_session.get(
16
32
  url,