cartography 0.106.0rc2__py3-none-any.whl → 0.107.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (81) hide show
  1. cartography/_version.py +2 -2
  2. cartography/cli.py +131 -2
  3. cartography/config.py +42 -0
  4. cartography/driftdetect/cli.py +3 -2
  5. cartography/intel/airbyte/__init__.py +105 -0
  6. cartography/intel/airbyte/connections.py +120 -0
  7. cartography/intel/airbyte/destinations.py +81 -0
  8. cartography/intel/airbyte/organizations.py +59 -0
  9. cartography/intel/airbyte/sources.py +78 -0
  10. cartography/intel/airbyte/tags.py +64 -0
  11. cartography/intel/airbyte/users.py +106 -0
  12. cartography/intel/airbyte/util.py +122 -0
  13. cartography/intel/airbyte/workspaces.py +63 -0
  14. cartography/intel/aws/__init__.py +1 -0
  15. cartography/intel/aws/cloudtrail_management_events.py +364 -0
  16. cartography/intel/aws/codebuild.py +132 -0
  17. cartography/intel/aws/inspector.py +77 -48
  18. cartography/intel/aws/resources.py +4 -0
  19. cartography/intel/aws/sns.py +62 -2
  20. cartography/intel/entra/users.py +84 -42
  21. cartography/intel/scaleway/__init__.py +127 -0
  22. cartography/intel/scaleway/iam/__init__.py +0 -0
  23. cartography/intel/scaleway/iam/apikeys.py +71 -0
  24. cartography/intel/scaleway/iam/applications.py +71 -0
  25. cartography/intel/scaleway/iam/groups.py +71 -0
  26. cartography/intel/scaleway/iam/users.py +71 -0
  27. cartography/intel/scaleway/instances/__init__.py +0 -0
  28. cartography/intel/scaleway/instances/flexibleips.py +86 -0
  29. cartography/intel/scaleway/instances/instances.py +92 -0
  30. cartography/intel/scaleway/projects.py +79 -0
  31. cartography/intel/scaleway/storage/__init__.py +0 -0
  32. cartography/intel/scaleway/storage/snapshots.py +86 -0
  33. cartography/intel/scaleway/storage/volumes.py +84 -0
  34. cartography/intel/scaleway/utils.py +37 -0
  35. cartography/intel/sentinelone/__init__.py +63 -0
  36. cartography/intel/sentinelone/account.py +140 -0
  37. cartography/intel/sentinelone/agent.py +139 -0
  38. cartography/intel/sentinelone/api.py +113 -0
  39. cartography/intel/sentinelone/utils.py +9 -0
  40. cartography/models/airbyte/__init__.py +0 -0
  41. cartography/models/airbyte/connection.py +138 -0
  42. cartography/models/airbyte/destination.py +75 -0
  43. cartography/models/airbyte/organization.py +19 -0
  44. cartography/models/airbyte/source.py +75 -0
  45. cartography/models/airbyte/stream.py +74 -0
  46. cartography/models/airbyte/tag.py +69 -0
  47. cartography/models/airbyte/user.py +111 -0
  48. cartography/models/airbyte/workspace.py +46 -0
  49. cartography/models/aws/cloudtrail/management_events.py +64 -0
  50. cartography/models/aws/codebuild/__init__.py +0 -0
  51. cartography/models/aws/codebuild/project.py +49 -0
  52. cartography/models/aws/ecs/containers.py +19 -0
  53. cartography/models/aws/ecs/task_definitions.py +38 -0
  54. cartography/models/aws/inspector/findings.py +37 -0
  55. cartography/models/aws/inspector/packages.py +1 -31
  56. cartography/models/aws/sns/topic_subscription.py +74 -0
  57. cartography/models/entra/user.py +17 -51
  58. cartography/models/scaleway/__init__.py +0 -0
  59. cartography/models/scaleway/iam/__init__.py +0 -0
  60. cartography/models/scaleway/iam/apikey.py +96 -0
  61. cartography/models/scaleway/iam/application.py +52 -0
  62. cartography/models/scaleway/iam/group.py +95 -0
  63. cartography/models/scaleway/iam/user.py +60 -0
  64. cartography/models/scaleway/instance/__init__.py +0 -0
  65. cartography/models/scaleway/instance/flexibleip.py +52 -0
  66. cartography/models/scaleway/instance/instance.py +118 -0
  67. cartography/models/scaleway/organization.py +19 -0
  68. cartography/models/scaleway/project.py +48 -0
  69. cartography/models/scaleway/storage/__init__.py +0 -0
  70. cartography/models/scaleway/storage/snapshot.py +78 -0
  71. cartography/models/scaleway/storage/volume.py +51 -0
  72. cartography/models/sentinelone/__init__.py +1 -0
  73. cartography/models/sentinelone/account.py +40 -0
  74. cartography/models/sentinelone/agent.py +50 -0
  75. cartography/sync.py +11 -4
  76. {cartography-0.106.0rc2.dist-info → cartography-0.107.0rc2.dist-info}/METADATA +20 -16
  77. {cartography-0.106.0rc2.dist-info → cartography-0.107.0rc2.dist-info}/RECORD +81 -21
  78. {cartography-0.106.0rc2.dist-info → cartography-0.107.0rc2.dist-info}/WHEEL +0 -0
  79. {cartography-0.106.0rc2.dist-info → cartography-0.107.0rc2.dist-info}/entry_points.txt +0 -0
  80. {cartography-0.106.0rc2.dist-info → cartography-0.107.0rc2.dist-info}/licenses/LICENSE +0 -0
  81. {cartography-0.106.0rc2.dist-info → cartography-0.107.0rc2.dist-info}/top_level.txt +0 -0
cartography/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.106.0rc2'
21
- __version_tuple__ = version_tuple = (0, 106, 0, 'rc2')
20
+ __version__ = version = '0.107.0rc2'
21
+ __version_tuple__ = version_tuple = (0, 107, 0, 'rc2')
cartography/cli.py CHANGED
@@ -71,8 +71,8 @@ class CLI:
71
71
  default="bolt://localhost:7687",
72
72
  help=(
73
73
  "A valid Neo4j URI to sync against. See "
74
- "https://neo4j.com/docs/api/python-driver/current/driver.html#uri for complete documentation on the "
75
- "structure of a Neo4j URI."
74
+ "https://neo4j.com/docs/browser-manual/current/operations/dbms-connection/#uri-scheme for complete "
75
+ "documentation on the structure of a Neo4j URI."
76
76
  ),
77
77
  )
78
78
  parser.add_argument(
@@ -182,6 +182,14 @@ class CLI:
182
182
  "syncing other accounts and delay raising an exception until the very end."
183
183
  ),
184
184
  )
185
+ parser.add_argument(
186
+ "--aws-cloudtrail-management-events-lookback-hours",
187
+ type=int,
188
+ default=None,
189
+ help=(
190
+ "Number of hours back to retrieve CloudTrail management events from. If not specified, CloudTrail management events will not be retrieved."
191
+ ),
192
+ )
185
193
  parser.add_argument(
186
194
  "--oci-sync-all-profiles",
187
195
  action="store_true",
@@ -637,6 +645,33 @@ class CLI:
637
645
  "Required if you are using the Anthropic intel module. Ignored otherwise."
638
646
  ),
639
647
  )
648
+ parser.add_argument(
649
+ "--airbyte-client-id",
650
+ type=str,
651
+ default=None,
652
+ help=(
653
+ "The Airbyte client ID to use for authentication. "
654
+ "Required if you are using the Airbyte intel module. Ignored otherwise."
655
+ ),
656
+ )
657
+ parser.add_argument(
658
+ "--airbyte-client-secret-env-var",
659
+ type=str,
660
+ default=None,
661
+ help=(
662
+ "The name of an environment variable containing the Airbyte client secret for authentication. "
663
+ "Required if you are using the Airbyte intel module. Ignored otherwise."
664
+ ),
665
+ )
666
+ parser.add_argument(
667
+ "--airbyte-api-url",
668
+ type=str,
669
+ default="https://api.airbyte.com/v1",
670
+ help=(
671
+ "The base URL for the Airbyte API (default is the public Airbyte Cloud API). "
672
+ "Required if you are using the Airbyte intel module. Ignored otherwise."
673
+ ),
674
+ )
640
675
  parser.add_argument(
641
676
  "--trivy-s3-bucket",
642
677
  type=str,
@@ -655,6 +690,59 @@ class CLI:
655
690
  "Required if you are using the Trivy module. Ignored otherwise."
656
691
  ),
657
692
  )
693
+ parser.add_argument(
694
+ "--scaleway-org",
695
+ type=str,
696
+ default=None,
697
+ help=(
698
+ "The Scaleway organization ID to sync. "
699
+ "Required if you are using the Scaleway intel module. Ignored otherwise."
700
+ ),
701
+ )
702
+ parser.add_argument(
703
+ "--scaleway-access-key",
704
+ type=str,
705
+ default=None,
706
+ help=(
707
+ "The Scaleway access key to use for authentication. "
708
+ "Required if you are using the Scaleway intel module. Ignored otherwise."
709
+ ),
710
+ )
711
+ parser.add_argument(
712
+ "--scaleway-secret-key-env-var",
713
+ type=str,
714
+ default=None,
715
+ help=(
716
+ "The name of an environment variable containing the Scaleway secret key for authentication. "
717
+ "Required if you are using the Scaleway intel module. Ignored otherwise."
718
+ ),
719
+ )
720
+ parser.add_argument(
721
+ "--sentinelone-account-ids",
722
+ type=str,
723
+ default=None,
724
+ help=(
725
+ "Comma-separated list of SentinelOne account IDs to sync. "
726
+ "If not specified, all accessible accounts will be synced."
727
+ ),
728
+ )
729
+ parser.add_argument(
730
+ "--sentinelone-api-url",
731
+ type=str,
732
+ default=None,
733
+ help=(
734
+ "SentinelOne API URL. Required if you are using the SentinelOne intel module. Ignored otherwise."
735
+ ),
736
+ )
737
+ parser.add_argument(
738
+ "--sentinelone-api-token-env-var",
739
+ type=str,
740
+ default="SENTINELONE_API_TOKEN",
741
+ help=(
742
+ "The name of an environment variable containing the SentinelOne API token. "
743
+ "Required if you are using the SentinelOne intel module. Ignored otherwise."
744
+ ),
745
+ )
658
746
 
659
747
  return parser
660
748
 
@@ -973,6 +1061,17 @@ class CLI:
973
1061
  else:
974
1062
  config.anthropic_apikey = None
975
1063
 
1064
+ # Airbyte config
1065
+ if config.airbyte_client_id and config.airbyte_client_secret_env_var:
1066
+ logger.debug(
1067
+ f"Reading Airbyte client secret from environment variable {config.airbyte_client_secret_env_var}",
1068
+ )
1069
+ config.airbyte_client_secret = os.environ.get(
1070
+ config.airbyte_client_secret_env_var,
1071
+ )
1072
+ else:
1073
+ config.airbyte_client_secret = None
1074
+
976
1075
  # Trivy config
977
1076
  if config.trivy_s3_bucket:
978
1077
  logger.debug(f"Trivy S3 bucket: {config.trivy_s3_bucket}")
@@ -980,6 +1079,36 @@ class CLI:
980
1079
  if config.trivy_s3_prefix:
981
1080
  logger.debug(f"Trivy S3 prefix: {config.trivy_s3_prefix}")
982
1081
 
1082
+ # Scaleway config
1083
+ if config.scaleway_secret_key_env_var:
1084
+ logger.debug(
1085
+ f"Reading Scaleway secret key from environment variable {config.scaleway_secret_key_env_var}",
1086
+ )
1087
+ config.scaleway_secret_key = os.environ.get(
1088
+ config.scaleway_secret_key_env_var,
1089
+ )
1090
+ else:
1091
+ config.scaleway_secret_key = None
1092
+
1093
+ # SentinelOne config
1094
+ if config.sentinelone_account_ids:
1095
+ config.sentinelone_account_ids = [
1096
+ id.strip() for id in config.sentinelone_account_ids.split(",")
1097
+ ]
1098
+ logger.debug(
1099
+ f"Parsed {len(config.sentinelone_account_ids)} SentinelOne account IDs to sync"
1100
+ )
1101
+ else:
1102
+ config.sentinelone_account_ids = None
1103
+
1104
+ if config.sentinelone_api_url and config.sentinelone_api_token_env_var:
1105
+ logger.debug(
1106
+ f"Reading API token for SentinelOne from environment variable {config.sentinelone_api_token_env_var}",
1107
+ )
1108
+ config.sentinelone_api_token = os.environ.get(
1109
+ config.sentinelone_api_token_env_var
1110
+ )
1111
+
983
1112
  # Run cartography
984
1113
  try:
985
1114
  return cartography.sync.run_with_config(self.sync, config)
cartography/config.py CHANGED
@@ -31,6 +31,8 @@ class Config:
31
31
  :type aws_best_effort_mode: bool
32
32
  :param aws_best_effort_mode: If True, AWS sync will not raise any exceptions, just log. If False (default),
33
33
  exceptions will be raised.
34
+ :type aws_cloudtrail_management_events_lookback_hours: int
35
+ :param aws_cloudtrail_management_events_lookback_hours: Number of hours back to retrieve CloudTrail management events from. Optional.
34
36
  :type azure_sync_all_subscriptions: bool
35
37
  :param azure_sync_all_subscriptions: If True, Azure sync will run for all profiles in azureProfile.json. If
36
38
  False (default), Azure sync will run using current user session via CLI credentials. Optional.
@@ -137,10 +139,28 @@ class Config:
137
139
  :param openai_org_id: OpenAI organization id. Optional.
138
140
  :type anthropic_apikey: string
139
141
  :param anthropic_apikey: Anthropic API key. Optional.
142
+ :type airbyte_client_id: str
143
+ :param airbyte_client_id: Airbyte client ID for API authentication. Optional.
144
+ :type airbyte_client_secret: str
145
+ :param airbyte_client_secret: Airbyte client secret for API authentication. Optional.
146
+ :type airbyte_api_url: str
147
+ :param airbyte_api_url: Airbyte API base URL, e.g. https://api.airbyte.com/v1. Optional.
140
148
  :type trivy_s3_bucket: str
141
149
  :param trivy_s3_bucket: The S3 bucket name containing Trivy scan results. Optional.
142
150
  :type trivy_s3_prefix: str
143
151
  :param trivy_s3_prefix: The S3 prefix path containing Trivy scan results. Optional.
152
+ :type scaleway_access_key: str
153
+ :param scaleway_access_key: Scaleway access key. Optional.
154
+ :type scaleway_secret_key: str
155
+ :param scaleway_secret_key: Scaleway secret key. Optional.
156
+ :type scaleway_org: str
157
+ :param scaleway_org: Scaleway organization id. Optional.
158
+ :type sentinelone_api_url: string
159
+ :param sentinelone_api_url: SentinelOne API URL. Optional.
160
+ :type sentinelone_api_token: string
161
+ :param sentinelone_api_token: SentinelOne API token for authentication. Optional.
162
+ :type sentinelone_account_ids: list[str]
163
+ :param sentinelone_account_ids: List of SentinelOne account IDs to sync. Optional.
144
164
  """
145
165
 
146
166
  def __init__(
@@ -155,6 +175,7 @@ class Config:
155
175
  aws_sync_all_profiles=False,
156
176
  aws_regions=None,
157
177
  aws_best_effort_mode=False,
178
+ aws_cloudtrail_management_events_lookback_hours=None,
158
179
  azure_sync_all_subscriptions=False,
159
180
  azure_sp_auth=None,
160
181
  azure_tenant_id=None,
@@ -213,8 +234,17 @@ class Config:
213
234
  openai_apikey=None,
214
235
  openai_org_id=None,
215
236
  anthropic_apikey=None,
237
+ airbyte_client_id=None,
238
+ airbyte_client_secret=None,
239
+ airbyte_api_url=None,
216
240
  trivy_s3_bucket=None,
217
241
  trivy_s3_prefix=None,
242
+ scaleway_access_key=None,
243
+ scaleway_secret_key=None,
244
+ scaleway_org=None,
245
+ sentinelone_api_url=None,
246
+ sentinelone_api_token=None,
247
+ sentinelone_account_ids=None,
218
248
  ):
219
249
  self.neo4j_uri = neo4j_uri
220
250
  self.neo4j_user = neo4j_user
@@ -226,6 +256,9 @@ class Config:
226
256
  self.aws_sync_all_profiles = aws_sync_all_profiles
227
257
  self.aws_regions = aws_regions
228
258
  self.aws_best_effort_mode = aws_best_effort_mode
259
+ self.aws_cloudtrail_management_events_lookback_hours = (
260
+ aws_cloudtrail_management_events_lookback_hours
261
+ )
229
262
  self.azure_sync_all_subscriptions = azure_sync_all_subscriptions
230
263
  self.azure_sp_auth = azure_sp_auth
231
264
  self.azure_tenant_id = azure_tenant_id
@@ -284,5 +317,14 @@ class Config:
284
317
  self.openai_apikey = openai_apikey
285
318
  self.openai_org_id = openai_org_id
286
319
  self.anthropic_apikey = anthropic_apikey
320
+ self.airbyte_client_id = airbyte_client_id
321
+ self.airbyte_client_secret = airbyte_client_secret
322
+ self.airbyte_api_url = airbyte_api_url
287
323
  self.trivy_s3_bucket = trivy_s3_bucket
288
324
  self.trivy_s3_prefix = trivy_s3_prefix
325
+ self.scaleway_access_key = scaleway_access_key
326
+ self.scaleway_secret_key = scaleway_secret_key
327
+ self.scaleway_org = scaleway_org
328
+ self.sentinelone_api_url = sentinelone_api_url
329
+ self.sentinelone_api_token = sentinelone_api_token
330
+ self.sentinelone_account_ids = sentinelone_account_ids
@@ -63,8 +63,9 @@ class CLI:
63
63
  default="bolt://localhost:7687",
64
64
  help=(
65
65
  "A valid Neo4j URI to sync against. See "
66
- "https://neo4j.com/docs/api/python-driver/current/driver.html#uri for complete documentation on the "
67
- "structure of a Neo4j URI."
66
+ "https://neo4j.com/docs/browser-manual/current/operations/dbms-connection/#uri-scheme for "
67
+ "documentation on the structure of a Neo4j URI, and "
68
+ "https://neo4j.com/docs/api/python-driver/current/ for complete documentation on the Python driver."
68
69
  ),
69
70
  )
70
71
  parser_get_state.add_argument(
@@ -0,0 +1,105 @@
1
+ import logging
2
+
3
+ import neo4j
4
+
5
+ import cartography.intel.airbyte.connections
6
+ import cartography.intel.airbyte.destinations
7
+ import cartography.intel.airbyte.organizations
8
+ import cartography.intel.airbyte.sources
9
+ import cartography.intel.airbyte.tags
10
+ import cartography.intel.airbyte.users
11
+ import cartography.intel.airbyte.workspaces
12
+ from cartography.config import Config
13
+ from cartography.intel.airbyte.util import AirbyteClient
14
+ from cartography.util import timeit
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ @timeit
20
+ def start_airbyte_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
21
+ """
22
+ If this module is configured, perform ingestion of Airbyte data. Otherwise warn and exit
23
+ :param neo4j_session: Neo4J session for database interface
24
+ :param config: A cartography.config object
25
+ :return: None
26
+ """
27
+ if (
28
+ not config.airbyte_api_url
29
+ or not config.airbyte_client_id
30
+ or not config.airbyte_client_secret
31
+ ):
32
+ logger.info(
33
+ "Airbyte import is not configured - skipping this module. "
34
+ "See docs to configure.",
35
+ )
36
+ return
37
+
38
+ # Create api session
39
+ api_client = AirbyteClient(
40
+ base_url=config.airbyte_api_url,
41
+ client_id=config.airbyte_client_id,
42
+ client_secret=config.airbyte_client_secret,
43
+ )
44
+
45
+ common_job_parameters = {
46
+ "UPDATE_TAG": config.update_tag,
47
+ }
48
+
49
+ organizations = cartography.intel.airbyte.organizations.sync(
50
+ neo4j_session,
51
+ api_client,
52
+ common_job_parameters,
53
+ )
54
+
55
+ for organization in organizations:
56
+ org_common_job_parameters = {
57
+ "UPDATE_TAG": config.update_tag,
58
+ "ORG_ID": organization["organizationId"],
59
+ }
60
+ workspaces = cartography.intel.airbyte.workspaces.sync(
61
+ neo4j_session,
62
+ api_client,
63
+ organization["organizationId"],
64
+ org_common_job_parameters,
65
+ )
66
+ workspace_ids = [workspace["workspaceId"] for workspace in workspaces]
67
+
68
+ cartography.intel.airbyte.users.sync(
69
+ neo4j_session,
70
+ api_client,
71
+ organization["organizationId"],
72
+ org_common_job_parameters,
73
+ )
74
+
75
+ cartography.intel.airbyte.sources.sync(
76
+ neo4j_session,
77
+ api_client,
78
+ organization["organizationId"],
79
+ workspace_ids,
80
+ org_common_job_parameters,
81
+ )
82
+
83
+ cartography.intel.airbyte.destinations.sync(
84
+ neo4j_session,
85
+ api_client,
86
+ organization["organizationId"],
87
+ workspace_ids,
88
+ org_common_job_parameters,
89
+ )
90
+
91
+ cartography.intel.airbyte.tags.sync(
92
+ neo4j_session,
93
+ api_client,
94
+ organization["organizationId"],
95
+ workspace_ids,
96
+ org_common_job_parameters,
97
+ )
98
+
99
+ cartography.intel.airbyte.connections.sync(
100
+ neo4j_session,
101
+ api_client,
102
+ organization["organizationId"],
103
+ workspace_ids,
104
+ org_common_job_parameters,
105
+ )
@@ -0,0 +1,120 @@
1
+ import logging
2
+ from typing import Any
3
+ from typing import Dict
4
+ from typing import List
5
+ from typing import Tuple
6
+
7
+ import neo4j
8
+
9
+ from cartography.client.core.tx import load
10
+ from cartography.graph.job import GraphJob
11
+ from cartography.intel.airbyte.util import AirbyteClient
12
+ from cartography.intel.airbyte.util import list_to_string
13
+ from cartography.models.airbyte.connection import AirbyteConnectionSchema
14
+ from cartography.models.airbyte.stream import AirbyteStreamSchema
15
+ from cartography.util import timeit
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ @timeit
21
+ def sync(
22
+ neo4j_session: neo4j.Session,
23
+ api_session: AirbyteClient,
24
+ org_id: str,
25
+ workspace_ids: List[str],
26
+ common_job_parameters: Dict[str, Any],
27
+ ) -> None:
28
+ connections = get(api_session, workspace_ids)
29
+ transformed_connections, transformed_streams = transform(connections)
30
+ load_connections(
31
+ neo4j_session,
32
+ transformed_connections,
33
+ org_id,
34
+ common_job_parameters["UPDATE_TAG"],
35
+ )
36
+ load_streams(
37
+ neo4j_session, transformed_streams, org_id, common_job_parameters["UPDATE_TAG"]
38
+ )
39
+ cleanup(neo4j_session, common_job_parameters)
40
+
41
+
42
+ @timeit
43
+ def get(
44
+ api_session: AirbyteClient,
45
+ workspace_ids: List[str],
46
+ ) -> List[Dict[str, Any]]:
47
+ return api_session.get(
48
+ "/connections",
49
+ {"workspaceIds": ",".join(workspace_ids)} if workspace_ids else None,
50
+ )
51
+
52
+
53
+ def transform(
54
+ connections: List[Dict[str, Any]],
55
+ ) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
56
+ transformed_connections = []
57
+ transformed_streams = []
58
+ for connection in connections:
59
+ connection["tags_ids"] = [tag["tagId"] for tag in connection.get("tags", [])]
60
+ transformed_connections.append(connection)
61
+ for stream in connection.get("configurations", {}).get("streams", []):
62
+ formated_stream = {
63
+ "connectionId": connection["connectionId"],
64
+ "streamId": f"{connection['connectionId']}_{stream['name']}",
65
+ "name": stream["name"],
66
+ "syncMode": stream["syncMode"],
67
+ "cursorField": list_to_string(stream.get("cursorField", [])),
68
+ "primaryKey": list_to_string(stream.get("primaryKey", [])),
69
+ "includeFiles": stream.get("includeFiles", False),
70
+ "selectedFields": list_to_string(stream.get("selectedFields", [])),
71
+ "mappers": list_to_string(stream.get("mappers", [])),
72
+ }
73
+ transformed_streams.append(formated_stream)
74
+ return transformed_connections, transformed_streams
75
+
76
+
77
+ @timeit
78
+ def load_connections(
79
+ neo4j_session: neo4j.Session,
80
+ data: List[Dict[str, Any]],
81
+ org_id: str,
82
+ update_tag: int,
83
+ ) -> None:
84
+ logger.info("Loading %d Airbyte Connections into Neo4j.", len(data))
85
+ load(
86
+ neo4j_session,
87
+ AirbyteConnectionSchema(),
88
+ data,
89
+ lastupdated=update_tag,
90
+ ORG_ID=org_id,
91
+ )
92
+
93
+
94
+ @timeit
95
+ def load_streams(
96
+ neo4j_session: neo4j.Session,
97
+ data: List[Dict[str, Any]],
98
+ org_id: str,
99
+ update_tag: int,
100
+ ) -> None:
101
+ logger.info("Loading %d Airbyte Streams into Neo4j.", len(data))
102
+ load(
103
+ neo4j_session,
104
+ AirbyteStreamSchema(),
105
+ data,
106
+ lastupdated=update_tag,
107
+ ORG_ID=org_id,
108
+ )
109
+
110
+
111
+ @timeit
112
+ def cleanup(
113
+ neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any]
114
+ ) -> None:
115
+ GraphJob.from_node_schema(AirbyteStreamSchema(), common_job_parameters).run(
116
+ neo4j_session
117
+ )
118
+ GraphJob.from_node_schema(AirbyteConnectionSchema(), common_job_parameters).run(
119
+ neo4j_session
120
+ )
@@ -0,0 +1,81 @@
1
+ import logging
2
+ from typing import Any
3
+ from typing import Dict
4
+ from typing import List
5
+
6
+ import neo4j
7
+
8
+ from cartography.client.core.tx import load
9
+ from cartography.graph.job import GraphJob
10
+ from cartography.intel.airbyte.util import AirbyteClient
11
+ from cartography.intel.airbyte.util import normalize_airbyte_config
12
+ from cartography.models.airbyte.destination import AirbyteDestinationSchema
13
+ from cartography.util import timeit
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ @timeit
19
+ def sync(
20
+ neo4j_session: neo4j.Session,
21
+ api_session: AirbyteClient,
22
+ org_id: str,
23
+ workspace_ids: List[str],
24
+ common_job_parameters: Dict[str, Any],
25
+ ) -> None:
26
+ destinations = get(api_session, workspace_ids)
27
+ transformed_destinations = transform(destinations)
28
+ load_destinations(
29
+ neo4j_session,
30
+ transformed_destinations,
31
+ org_id,
32
+ common_job_parameters["UPDATE_TAG"],
33
+ )
34
+ cleanup(neo4j_session, common_job_parameters)
35
+
36
+
37
+ @timeit
38
+ def get(
39
+ api_session: AirbyteClient,
40
+ workspace_ids: List[str],
41
+ ) -> List[Dict[str, Any]]:
42
+ return api_session.get(
43
+ "/destinations",
44
+ params={"workspaceIds": ",".join(workspace_ids)} if workspace_ids else None,
45
+ )
46
+
47
+
48
+ def transform(destinations: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
49
+ transformed_destinations = []
50
+ for destination in destinations:
51
+ destination["configuration"] = normalize_airbyte_config(
52
+ destination.get("configuration", {})
53
+ )
54
+ transformed_destinations.append(destination)
55
+ return transformed_destinations
56
+
57
+
58
+ @timeit
59
+ def load_destinations(
60
+ neo4j_session: neo4j.Session,
61
+ data: List[Dict[str, Any]],
62
+ org_id: str,
63
+ update_tag: int,
64
+ ) -> None:
65
+ logger.info("Loading %d Airbyte Destinations into Neo4j.", len(data))
66
+ load(
67
+ neo4j_session,
68
+ AirbyteDestinationSchema(),
69
+ data,
70
+ lastupdated=update_tag,
71
+ ORG_ID=org_id,
72
+ )
73
+
74
+
75
+ @timeit
76
+ def cleanup(
77
+ neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any]
78
+ ) -> None:
79
+ GraphJob.from_node_schema(AirbyteDestinationSchema(), common_job_parameters).run(
80
+ neo4j_session
81
+ )
@@ -0,0 +1,59 @@
1
+ import logging
2
+ from typing import Any
3
+ from typing import Dict
4
+ from typing import List
5
+
6
+ import neo4j
7
+
8
+ from cartography.client.core.tx import load
9
+ from cartography.graph.job import GraphJob
10
+ from cartography.intel.airbyte.util import AirbyteClient
11
+ from cartography.models.airbyte.organization import AirbyteOrganizationSchema
12
+ from cartography.util import timeit
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ @timeit
18
+ def sync(
19
+ neo4j_session: neo4j.Session,
20
+ api_session: AirbyteClient,
21
+ common_job_parameters: Dict[str, Any],
22
+ ) -> List[Dict]:
23
+ organizations = get(api_session)
24
+ load_organizations(
25
+ neo4j_session, organizations, common_job_parameters["UPDATE_TAG"]
26
+ )
27
+ cleanup(neo4j_session, common_job_parameters)
28
+ return organizations
29
+
30
+
31
+ @timeit
32
+ def get(
33
+ api_session: AirbyteClient,
34
+ ) -> List[Dict[str, Any]]:
35
+ return api_session.get("/organizations")
36
+
37
+
38
+ @timeit
39
+ def load_organizations(
40
+ neo4j_session: neo4j.Session,
41
+ data: List[Dict[str, Any]],
42
+ update_tag: int,
43
+ ) -> None:
44
+ logger.info("Loading %d Airbyte Organizations into Neo4j.", len(data))
45
+ load(
46
+ neo4j_session,
47
+ AirbyteOrganizationSchema(),
48
+ data,
49
+ lastupdated=update_tag,
50
+ )
51
+
52
+
53
+ @timeit
54
+ def cleanup(
55
+ neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any]
56
+ ) -> None:
57
+ GraphJob.from_node_schema(AirbyteOrganizationSchema(), common_job_parameters).run(
58
+ neo4j_session
59
+ )