cartography 0.106.0rc2__py3-none-any.whl → 0.107.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/_version.py +2 -2
- cartography/cli.py +131 -2
- cartography/config.py +42 -0
- cartography/driftdetect/cli.py +3 -2
- cartography/intel/airbyte/__init__.py +105 -0
- cartography/intel/airbyte/connections.py +120 -0
- cartography/intel/airbyte/destinations.py +81 -0
- cartography/intel/airbyte/organizations.py +59 -0
- cartography/intel/airbyte/sources.py +78 -0
- cartography/intel/airbyte/tags.py +64 -0
- cartography/intel/airbyte/users.py +106 -0
- cartography/intel/airbyte/util.py +122 -0
- cartography/intel/airbyte/workspaces.py +63 -0
- cartography/intel/aws/__init__.py +1 -0
- cartography/intel/aws/cloudtrail_management_events.py +364 -0
- cartography/intel/aws/codebuild.py +132 -0
- cartography/intel/aws/inspector.py +77 -48
- cartography/intel/aws/resources.py +4 -0
- cartography/intel/aws/sns.py +62 -2
- cartography/intel/entra/users.py +84 -42
- cartography/intel/scaleway/__init__.py +127 -0
- cartography/intel/scaleway/iam/__init__.py +0 -0
- cartography/intel/scaleway/iam/apikeys.py +71 -0
- cartography/intel/scaleway/iam/applications.py +71 -0
- cartography/intel/scaleway/iam/groups.py +71 -0
- cartography/intel/scaleway/iam/users.py +71 -0
- cartography/intel/scaleway/instances/__init__.py +0 -0
- cartography/intel/scaleway/instances/flexibleips.py +86 -0
- cartography/intel/scaleway/instances/instances.py +92 -0
- cartography/intel/scaleway/projects.py +79 -0
- cartography/intel/scaleway/storage/__init__.py +0 -0
- cartography/intel/scaleway/storage/snapshots.py +86 -0
- cartography/intel/scaleway/storage/volumes.py +84 -0
- cartography/intel/scaleway/utils.py +37 -0
- cartography/intel/sentinelone/__init__.py +63 -0
- cartography/intel/sentinelone/account.py +140 -0
- cartography/intel/sentinelone/agent.py +139 -0
- cartography/intel/sentinelone/api.py +113 -0
- cartography/intel/sentinelone/utils.py +9 -0
- cartography/models/airbyte/__init__.py +0 -0
- cartography/models/airbyte/connection.py +138 -0
- cartography/models/airbyte/destination.py +75 -0
- cartography/models/airbyte/organization.py +19 -0
- cartography/models/airbyte/source.py +75 -0
- cartography/models/airbyte/stream.py +74 -0
- cartography/models/airbyte/tag.py +69 -0
- cartography/models/airbyte/user.py +111 -0
- cartography/models/airbyte/workspace.py +46 -0
- cartography/models/aws/cloudtrail/management_events.py +64 -0
- cartography/models/aws/codebuild/__init__.py +0 -0
- cartography/models/aws/codebuild/project.py +49 -0
- cartography/models/aws/ecs/containers.py +19 -0
- cartography/models/aws/ecs/task_definitions.py +38 -0
- cartography/models/aws/inspector/findings.py +37 -0
- cartography/models/aws/inspector/packages.py +1 -31
- cartography/models/aws/sns/topic_subscription.py +74 -0
- cartography/models/entra/user.py +17 -51
- cartography/models/scaleway/__init__.py +0 -0
- cartography/models/scaleway/iam/__init__.py +0 -0
- cartography/models/scaleway/iam/apikey.py +96 -0
- cartography/models/scaleway/iam/application.py +52 -0
- cartography/models/scaleway/iam/group.py +95 -0
- cartography/models/scaleway/iam/user.py +60 -0
- cartography/models/scaleway/instance/__init__.py +0 -0
- cartography/models/scaleway/instance/flexibleip.py +52 -0
- cartography/models/scaleway/instance/instance.py +118 -0
- cartography/models/scaleway/organization.py +19 -0
- cartography/models/scaleway/project.py +48 -0
- cartography/models/scaleway/storage/__init__.py +0 -0
- cartography/models/scaleway/storage/snapshot.py +78 -0
- cartography/models/scaleway/storage/volume.py +51 -0
- cartography/models/sentinelone/__init__.py +1 -0
- cartography/models/sentinelone/account.py +40 -0
- cartography/models/sentinelone/agent.py +50 -0
- cartography/sync.py +11 -4
- {cartography-0.106.0rc2.dist-info → cartography-0.107.0rc2.dist-info}/METADATA +20 -16
- {cartography-0.106.0rc2.dist-info → cartography-0.107.0rc2.dist-info}/RECORD +81 -21
- {cartography-0.106.0rc2.dist-info → cartography-0.107.0rc2.dist-info}/WHEEL +0 -0
- {cartography-0.106.0rc2.dist-info → cartography-0.107.0rc2.dist-info}/entry_points.txt +0 -0
- {cartography-0.106.0rc2.dist-info → cartography-0.107.0rc2.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.106.0rc2.dist-info → cartography-0.107.0rc2.dist-info}/top_level.txt +0 -0
cartography/_version.py
CHANGED
|
@@ -17,5 +17,5 @@ __version__: str
|
|
|
17
17
|
__version_tuple__: VERSION_TUPLE
|
|
18
18
|
version_tuple: VERSION_TUPLE
|
|
19
19
|
|
|
20
|
-
__version__ = version = '0.
|
|
21
|
-
__version_tuple__ = version_tuple = (0,
|
|
20
|
+
__version__ = version = '0.107.0rc2'
|
|
21
|
+
__version_tuple__ = version_tuple = (0, 107, 0, 'rc2')
|
cartography/cli.py
CHANGED
|
@@ -71,8 +71,8 @@ class CLI:
|
|
|
71
71
|
default="bolt://localhost:7687",
|
|
72
72
|
help=(
|
|
73
73
|
"A valid Neo4j URI to sync against. See "
|
|
74
|
-
"https://neo4j.com/docs/
|
|
75
|
-
"structure of a Neo4j URI."
|
|
74
|
+
"https://neo4j.com/docs/browser-manual/current/operations/dbms-connection/#uri-scheme for complete "
|
|
75
|
+
"documentation on the structure of a Neo4j URI."
|
|
76
76
|
),
|
|
77
77
|
)
|
|
78
78
|
parser.add_argument(
|
|
@@ -182,6 +182,14 @@ class CLI:
|
|
|
182
182
|
"syncing other accounts and delay raising an exception until the very end."
|
|
183
183
|
),
|
|
184
184
|
)
|
|
185
|
+
parser.add_argument(
|
|
186
|
+
"--aws-cloudtrail-management-events-lookback-hours",
|
|
187
|
+
type=int,
|
|
188
|
+
default=None,
|
|
189
|
+
help=(
|
|
190
|
+
"Number of hours back to retrieve CloudTrail management events from. If not specified, CloudTrail management events will not be retrieved."
|
|
191
|
+
),
|
|
192
|
+
)
|
|
185
193
|
parser.add_argument(
|
|
186
194
|
"--oci-sync-all-profiles",
|
|
187
195
|
action="store_true",
|
|
@@ -637,6 +645,33 @@ class CLI:
|
|
|
637
645
|
"Required if you are using the Anthropic intel module. Ignored otherwise."
|
|
638
646
|
),
|
|
639
647
|
)
|
|
648
|
+
parser.add_argument(
|
|
649
|
+
"--airbyte-client-id",
|
|
650
|
+
type=str,
|
|
651
|
+
default=None,
|
|
652
|
+
help=(
|
|
653
|
+
"The Airbyte client ID to use for authentication. "
|
|
654
|
+
"Required if you are using the Airbyte intel module. Ignored otherwise."
|
|
655
|
+
),
|
|
656
|
+
)
|
|
657
|
+
parser.add_argument(
|
|
658
|
+
"--airbyte-client-secret-env-var",
|
|
659
|
+
type=str,
|
|
660
|
+
default=None,
|
|
661
|
+
help=(
|
|
662
|
+
"The name of an environment variable containing the Airbyte client secret for authentication. "
|
|
663
|
+
"Required if you are using the Airbyte intel module. Ignored otherwise."
|
|
664
|
+
),
|
|
665
|
+
)
|
|
666
|
+
parser.add_argument(
|
|
667
|
+
"--airbyte-api-url",
|
|
668
|
+
type=str,
|
|
669
|
+
default="https://api.airbyte.com/v1",
|
|
670
|
+
help=(
|
|
671
|
+
"The base URL for the Airbyte API (default is the public Airbyte Cloud API). "
|
|
672
|
+
"Required if you are using the Airbyte intel module. Ignored otherwise."
|
|
673
|
+
),
|
|
674
|
+
)
|
|
640
675
|
parser.add_argument(
|
|
641
676
|
"--trivy-s3-bucket",
|
|
642
677
|
type=str,
|
|
@@ -655,6 +690,59 @@ class CLI:
|
|
|
655
690
|
"Required if you are using the Trivy module. Ignored otherwise."
|
|
656
691
|
),
|
|
657
692
|
)
|
|
693
|
+
parser.add_argument(
|
|
694
|
+
"--scaleway-org",
|
|
695
|
+
type=str,
|
|
696
|
+
default=None,
|
|
697
|
+
help=(
|
|
698
|
+
"The Scaleway organization ID to sync. "
|
|
699
|
+
"Required if you are using the Scaleway intel module. Ignored otherwise."
|
|
700
|
+
),
|
|
701
|
+
)
|
|
702
|
+
parser.add_argument(
|
|
703
|
+
"--scaleway-access-key",
|
|
704
|
+
type=str,
|
|
705
|
+
default=None,
|
|
706
|
+
help=(
|
|
707
|
+
"The Scaleway access key to use for authentication. "
|
|
708
|
+
"Required if you are using the Scaleway intel module. Ignored otherwise."
|
|
709
|
+
),
|
|
710
|
+
)
|
|
711
|
+
parser.add_argument(
|
|
712
|
+
"--scaleway-secret-key-env-var",
|
|
713
|
+
type=str,
|
|
714
|
+
default=None,
|
|
715
|
+
help=(
|
|
716
|
+
"The name of an environment variable containing the Scaleway secret key for authentication. "
|
|
717
|
+
"Required if you are using the Scaleway intel module. Ignored otherwise."
|
|
718
|
+
),
|
|
719
|
+
)
|
|
720
|
+
parser.add_argument(
|
|
721
|
+
"--sentinelone-account-ids",
|
|
722
|
+
type=str,
|
|
723
|
+
default=None,
|
|
724
|
+
help=(
|
|
725
|
+
"Comma-separated list of SentinelOne account IDs to sync. "
|
|
726
|
+
"If not specified, all accessible accounts will be synced."
|
|
727
|
+
),
|
|
728
|
+
)
|
|
729
|
+
parser.add_argument(
|
|
730
|
+
"--sentinelone-api-url",
|
|
731
|
+
type=str,
|
|
732
|
+
default=None,
|
|
733
|
+
help=(
|
|
734
|
+
"SentinelOne API URL. Required if you are using the SentinelOne intel module. Ignored otherwise."
|
|
735
|
+
),
|
|
736
|
+
)
|
|
737
|
+
parser.add_argument(
|
|
738
|
+
"--sentinelone-api-token-env-var",
|
|
739
|
+
type=str,
|
|
740
|
+
default="SENTINELONE_API_TOKEN",
|
|
741
|
+
help=(
|
|
742
|
+
"The name of an environment variable containing the SentinelOne API token. "
|
|
743
|
+
"Required if you are using the SentinelOne intel module. Ignored otherwise."
|
|
744
|
+
),
|
|
745
|
+
)
|
|
658
746
|
|
|
659
747
|
return parser
|
|
660
748
|
|
|
@@ -973,6 +1061,17 @@ class CLI:
|
|
|
973
1061
|
else:
|
|
974
1062
|
config.anthropic_apikey = None
|
|
975
1063
|
|
|
1064
|
+
# Airbyte config
|
|
1065
|
+
if config.airbyte_client_id and config.airbyte_client_secret_env_var:
|
|
1066
|
+
logger.debug(
|
|
1067
|
+
f"Reading Airbyte client secret from environment variable {config.airbyte_client_secret_env_var}",
|
|
1068
|
+
)
|
|
1069
|
+
config.airbyte_client_secret = os.environ.get(
|
|
1070
|
+
config.airbyte_client_secret_env_var,
|
|
1071
|
+
)
|
|
1072
|
+
else:
|
|
1073
|
+
config.airbyte_client_secret = None
|
|
1074
|
+
|
|
976
1075
|
# Trivy config
|
|
977
1076
|
if config.trivy_s3_bucket:
|
|
978
1077
|
logger.debug(f"Trivy S3 bucket: {config.trivy_s3_bucket}")
|
|
@@ -980,6 +1079,36 @@ class CLI:
|
|
|
980
1079
|
if config.trivy_s3_prefix:
|
|
981
1080
|
logger.debug(f"Trivy S3 prefix: {config.trivy_s3_prefix}")
|
|
982
1081
|
|
|
1082
|
+
# Scaleway config
|
|
1083
|
+
if config.scaleway_secret_key_env_var:
|
|
1084
|
+
logger.debug(
|
|
1085
|
+
f"Reading Scaleway secret key from environment variable {config.scaleway_secret_key_env_var}",
|
|
1086
|
+
)
|
|
1087
|
+
config.scaleway_secret_key = os.environ.get(
|
|
1088
|
+
config.scaleway_secret_key_env_var,
|
|
1089
|
+
)
|
|
1090
|
+
else:
|
|
1091
|
+
config.scaleway_secret_key = None
|
|
1092
|
+
|
|
1093
|
+
# SentinelOne config
|
|
1094
|
+
if config.sentinelone_account_ids:
|
|
1095
|
+
config.sentinelone_account_ids = [
|
|
1096
|
+
id.strip() for id in config.sentinelone_account_ids.split(",")
|
|
1097
|
+
]
|
|
1098
|
+
logger.debug(
|
|
1099
|
+
f"Parsed {len(config.sentinelone_account_ids)} SentinelOne account IDs to sync"
|
|
1100
|
+
)
|
|
1101
|
+
else:
|
|
1102
|
+
config.sentinelone_account_ids = None
|
|
1103
|
+
|
|
1104
|
+
if config.sentinelone_api_url and config.sentinelone_api_token_env_var:
|
|
1105
|
+
logger.debug(
|
|
1106
|
+
f"Reading API token for SentinelOne from environment variable {config.sentinelone_api_token_env_var}",
|
|
1107
|
+
)
|
|
1108
|
+
config.sentinelone_api_token = os.environ.get(
|
|
1109
|
+
config.sentinelone_api_token_env_var
|
|
1110
|
+
)
|
|
1111
|
+
|
|
983
1112
|
# Run cartography
|
|
984
1113
|
try:
|
|
985
1114
|
return cartography.sync.run_with_config(self.sync, config)
|
cartography/config.py
CHANGED
|
@@ -31,6 +31,8 @@ class Config:
|
|
|
31
31
|
:type aws_best_effort_mode: bool
|
|
32
32
|
:param aws_best_effort_mode: If True, AWS sync will not raise any exceptions, just log. If False (default),
|
|
33
33
|
exceptions will be raised.
|
|
34
|
+
:type aws_cloudtrail_management_events_lookback_hours: int
|
|
35
|
+
:param aws_cloudtrail_management_events_lookback_hours: Number of hours back to retrieve CloudTrail management events from. Optional.
|
|
34
36
|
:type azure_sync_all_subscriptions: bool
|
|
35
37
|
:param azure_sync_all_subscriptions: If True, Azure sync will run for all profiles in azureProfile.json. If
|
|
36
38
|
False (default), Azure sync will run using current user session via CLI credentials. Optional.
|
|
@@ -137,10 +139,28 @@ class Config:
|
|
|
137
139
|
:param openai_org_id: OpenAI organization id. Optional.
|
|
138
140
|
:type anthropic_apikey: string
|
|
139
141
|
:param anthropic_apikey: Anthropic API key. Optional.
|
|
142
|
+
:type airbyte_client_id: str
|
|
143
|
+
:param airbyte_client_id: Airbyte client ID for API authentication. Optional.
|
|
144
|
+
:type airbyte_client_secret: str
|
|
145
|
+
:param airbyte_client_secret: Airbyte client secret for API authentication. Optional.
|
|
146
|
+
:type airbyte_api_url: str
|
|
147
|
+
:param airbyte_api_url: Airbyte API base URL, e.g. https://api.airbyte.com/v1. Optional.
|
|
140
148
|
:type trivy_s3_bucket: str
|
|
141
149
|
:param trivy_s3_bucket: The S3 bucket name containing Trivy scan results. Optional.
|
|
142
150
|
:type trivy_s3_prefix: str
|
|
143
151
|
:param trivy_s3_prefix: The S3 prefix path containing Trivy scan results. Optional.
|
|
152
|
+
:type scaleway_access_key: str
|
|
153
|
+
:param scaleway_access_key: Scaleway access key. Optional.
|
|
154
|
+
:type scaleway_secret_key: str
|
|
155
|
+
:param scaleway_secret_key: Scaleway secret key. Optional.
|
|
156
|
+
:type scaleway_org: str
|
|
157
|
+
:param scaleway_org: Scaleway organization id. Optional.
|
|
158
|
+
:type sentinelone_api_url: string
|
|
159
|
+
:param sentinelone_api_url: SentinelOne API URL. Optional.
|
|
160
|
+
:type sentinelone_api_token: string
|
|
161
|
+
:param sentinelone_api_token: SentinelOne API token for authentication. Optional.
|
|
162
|
+
:type sentinelone_account_ids: list[str]
|
|
163
|
+
:param sentinelone_account_ids: List of SentinelOne account IDs to sync. Optional.
|
|
144
164
|
"""
|
|
145
165
|
|
|
146
166
|
def __init__(
|
|
@@ -155,6 +175,7 @@ class Config:
|
|
|
155
175
|
aws_sync_all_profiles=False,
|
|
156
176
|
aws_regions=None,
|
|
157
177
|
aws_best_effort_mode=False,
|
|
178
|
+
aws_cloudtrail_management_events_lookback_hours=None,
|
|
158
179
|
azure_sync_all_subscriptions=False,
|
|
159
180
|
azure_sp_auth=None,
|
|
160
181
|
azure_tenant_id=None,
|
|
@@ -213,8 +234,17 @@ class Config:
|
|
|
213
234
|
openai_apikey=None,
|
|
214
235
|
openai_org_id=None,
|
|
215
236
|
anthropic_apikey=None,
|
|
237
|
+
airbyte_client_id=None,
|
|
238
|
+
airbyte_client_secret=None,
|
|
239
|
+
airbyte_api_url=None,
|
|
216
240
|
trivy_s3_bucket=None,
|
|
217
241
|
trivy_s3_prefix=None,
|
|
242
|
+
scaleway_access_key=None,
|
|
243
|
+
scaleway_secret_key=None,
|
|
244
|
+
scaleway_org=None,
|
|
245
|
+
sentinelone_api_url=None,
|
|
246
|
+
sentinelone_api_token=None,
|
|
247
|
+
sentinelone_account_ids=None,
|
|
218
248
|
):
|
|
219
249
|
self.neo4j_uri = neo4j_uri
|
|
220
250
|
self.neo4j_user = neo4j_user
|
|
@@ -226,6 +256,9 @@ class Config:
|
|
|
226
256
|
self.aws_sync_all_profiles = aws_sync_all_profiles
|
|
227
257
|
self.aws_regions = aws_regions
|
|
228
258
|
self.aws_best_effort_mode = aws_best_effort_mode
|
|
259
|
+
self.aws_cloudtrail_management_events_lookback_hours = (
|
|
260
|
+
aws_cloudtrail_management_events_lookback_hours
|
|
261
|
+
)
|
|
229
262
|
self.azure_sync_all_subscriptions = azure_sync_all_subscriptions
|
|
230
263
|
self.azure_sp_auth = azure_sp_auth
|
|
231
264
|
self.azure_tenant_id = azure_tenant_id
|
|
@@ -284,5 +317,14 @@ class Config:
|
|
|
284
317
|
self.openai_apikey = openai_apikey
|
|
285
318
|
self.openai_org_id = openai_org_id
|
|
286
319
|
self.anthropic_apikey = anthropic_apikey
|
|
320
|
+
self.airbyte_client_id = airbyte_client_id
|
|
321
|
+
self.airbyte_client_secret = airbyte_client_secret
|
|
322
|
+
self.airbyte_api_url = airbyte_api_url
|
|
287
323
|
self.trivy_s3_bucket = trivy_s3_bucket
|
|
288
324
|
self.trivy_s3_prefix = trivy_s3_prefix
|
|
325
|
+
self.scaleway_access_key = scaleway_access_key
|
|
326
|
+
self.scaleway_secret_key = scaleway_secret_key
|
|
327
|
+
self.scaleway_org = scaleway_org
|
|
328
|
+
self.sentinelone_api_url = sentinelone_api_url
|
|
329
|
+
self.sentinelone_api_token = sentinelone_api_token
|
|
330
|
+
self.sentinelone_account_ids = sentinelone_account_ids
|
cartography/driftdetect/cli.py
CHANGED
|
@@ -63,8 +63,9 @@ class CLI:
|
|
|
63
63
|
default="bolt://localhost:7687",
|
|
64
64
|
help=(
|
|
65
65
|
"A valid Neo4j URI to sync against. See "
|
|
66
|
-
"https://neo4j.com/docs/
|
|
67
|
-
"structure of a Neo4j URI
|
|
66
|
+
"https://neo4j.com/docs/browser-manual/current/operations/dbms-connection/#uri-scheme for "
|
|
67
|
+
"documentation on the structure of a Neo4j URI, and "
|
|
68
|
+
"https://neo4j.com/docs/api/python-driver/current/ for complete documentation on the Python driver."
|
|
68
69
|
),
|
|
69
70
|
)
|
|
70
71
|
parser_get_state.add_argument(
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
import neo4j
|
|
4
|
+
|
|
5
|
+
import cartography.intel.airbyte.connections
|
|
6
|
+
import cartography.intel.airbyte.destinations
|
|
7
|
+
import cartography.intel.airbyte.organizations
|
|
8
|
+
import cartography.intel.airbyte.sources
|
|
9
|
+
import cartography.intel.airbyte.tags
|
|
10
|
+
import cartography.intel.airbyte.users
|
|
11
|
+
import cartography.intel.airbyte.workspaces
|
|
12
|
+
from cartography.config import Config
|
|
13
|
+
from cartography.intel.airbyte.util import AirbyteClient
|
|
14
|
+
from cartography.util import timeit
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@timeit
|
|
20
|
+
def start_airbyte_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
|
|
21
|
+
"""
|
|
22
|
+
If this module is configured, perform ingestion of Airbyte data. Otherwise warn and exit
|
|
23
|
+
:param neo4j_session: Neo4J session for database interface
|
|
24
|
+
:param config: A cartography.config object
|
|
25
|
+
:return: None
|
|
26
|
+
"""
|
|
27
|
+
if (
|
|
28
|
+
not config.airbyte_api_url
|
|
29
|
+
or not config.airbyte_client_id
|
|
30
|
+
or not config.airbyte_client_secret
|
|
31
|
+
):
|
|
32
|
+
logger.info(
|
|
33
|
+
"Airbyte import is not configured - skipping this module. "
|
|
34
|
+
"See docs to configure.",
|
|
35
|
+
)
|
|
36
|
+
return
|
|
37
|
+
|
|
38
|
+
# Create api session
|
|
39
|
+
api_client = AirbyteClient(
|
|
40
|
+
base_url=config.airbyte_api_url,
|
|
41
|
+
client_id=config.airbyte_client_id,
|
|
42
|
+
client_secret=config.airbyte_client_secret,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
common_job_parameters = {
|
|
46
|
+
"UPDATE_TAG": config.update_tag,
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
organizations = cartography.intel.airbyte.organizations.sync(
|
|
50
|
+
neo4j_session,
|
|
51
|
+
api_client,
|
|
52
|
+
common_job_parameters,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
for organization in organizations:
|
|
56
|
+
org_common_job_parameters = {
|
|
57
|
+
"UPDATE_TAG": config.update_tag,
|
|
58
|
+
"ORG_ID": organization["organizationId"],
|
|
59
|
+
}
|
|
60
|
+
workspaces = cartography.intel.airbyte.workspaces.sync(
|
|
61
|
+
neo4j_session,
|
|
62
|
+
api_client,
|
|
63
|
+
organization["organizationId"],
|
|
64
|
+
org_common_job_parameters,
|
|
65
|
+
)
|
|
66
|
+
workspace_ids = [workspace["workspaceId"] for workspace in workspaces]
|
|
67
|
+
|
|
68
|
+
cartography.intel.airbyte.users.sync(
|
|
69
|
+
neo4j_session,
|
|
70
|
+
api_client,
|
|
71
|
+
organization["organizationId"],
|
|
72
|
+
org_common_job_parameters,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
cartography.intel.airbyte.sources.sync(
|
|
76
|
+
neo4j_session,
|
|
77
|
+
api_client,
|
|
78
|
+
organization["organizationId"],
|
|
79
|
+
workspace_ids,
|
|
80
|
+
org_common_job_parameters,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
cartography.intel.airbyte.destinations.sync(
|
|
84
|
+
neo4j_session,
|
|
85
|
+
api_client,
|
|
86
|
+
organization["organizationId"],
|
|
87
|
+
workspace_ids,
|
|
88
|
+
org_common_job_parameters,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
cartography.intel.airbyte.tags.sync(
|
|
92
|
+
neo4j_session,
|
|
93
|
+
api_client,
|
|
94
|
+
organization["organizationId"],
|
|
95
|
+
workspace_ids,
|
|
96
|
+
org_common_job_parameters,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
cartography.intel.airbyte.connections.sync(
|
|
100
|
+
neo4j_session,
|
|
101
|
+
api_client,
|
|
102
|
+
organization["organizationId"],
|
|
103
|
+
workspace_ids,
|
|
104
|
+
org_common_job_parameters,
|
|
105
|
+
)
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any
|
|
3
|
+
from typing import Dict
|
|
4
|
+
from typing import List
|
|
5
|
+
from typing import Tuple
|
|
6
|
+
|
|
7
|
+
import neo4j
|
|
8
|
+
|
|
9
|
+
from cartography.client.core.tx import load
|
|
10
|
+
from cartography.graph.job import GraphJob
|
|
11
|
+
from cartography.intel.airbyte.util import AirbyteClient
|
|
12
|
+
from cartography.intel.airbyte.util import list_to_string
|
|
13
|
+
from cartography.models.airbyte.connection import AirbyteConnectionSchema
|
|
14
|
+
from cartography.models.airbyte.stream import AirbyteStreamSchema
|
|
15
|
+
from cartography.util import timeit
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@timeit
|
|
21
|
+
def sync(
|
|
22
|
+
neo4j_session: neo4j.Session,
|
|
23
|
+
api_session: AirbyteClient,
|
|
24
|
+
org_id: str,
|
|
25
|
+
workspace_ids: List[str],
|
|
26
|
+
common_job_parameters: Dict[str, Any],
|
|
27
|
+
) -> None:
|
|
28
|
+
connections = get(api_session, workspace_ids)
|
|
29
|
+
transformed_connections, transformed_streams = transform(connections)
|
|
30
|
+
load_connections(
|
|
31
|
+
neo4j_session,
|
|
32
|
+
transformed_connections,
|
|
33
|
+
org_id,
|
|
34
|
+
common_job_parameters["UPDATE_TAG"],
|
|
35
|
+
)
|
|
36
|
+
load_streams(
|
|
37
|
+
neo4j_session, transformed_streams, org_id, common_job_parameters["UPDATE_TAG"]
|
|
38
|
+
)
|
|
39
|
+
cleanup(neo4j_session, common_job_parameters)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@timeit
|
|
43
|
+
def get(
|
|
44
|
+
api_session: AirbyteClient,
|
|
45
|
+
workspace_ids: List[str],
|
|
46
|
+
) -> List[Dict[str, Any]]:
|
|
47
|
+
return api_session.get(
|
|
48
|
+
"/connections",
|
|
49
|
+
{"workspaceIds": ",".join(workspace_ids)} if workspace_ids else None,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def transform(
|
|
54
|
+
connections: List[Dict[str, Any]],
|
|
55
|
+
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]]]:
|
|
56
|
+
transformed_connections = []
|
|
57
|
+
transformed_streams = []
|
|
58
|
+
for connection in connections:
|
|
59
|
+
connection["tags_ids"] = [tag["tagId"] for tag in connection.get("tags", [])]
|
|
60
|
+
transformed_connections.append(connection)
|
|
61
|
+
for stream in connection.get("configurations", {}).get("streams", []):
|
|
62
|
+
formated_stream = {
|
|
63
|
+
"connectionId": connection["connectionId"],
|
|
64
|
+
"streamId": f"{connection['connectionId']}_{stream['name']}",
|
|
65
|
+
"name": stream["name"],
|
|
66
|
+
"syncMode": stream["syncMode"],
|
|
67
|
+
"cursorField": list_to_string(stream.get("cursorField", [])),
|
|
68
|
+
"primaryKey": list_to_string(stream.get("primaryKey", [])),
|
|
69
|
+
"includeFiles": stream.get("includeFiles", False),
|
|
70
|
+
"selectedFields": list_to_string(stream.get("selectedFields", [])),
|
|
71
|
+
"mappers": list_to_string(stream.get("mappers", [])),
|
|
72
|
+
}
|
|
73
|
+
transformed_streams.append(formated_stream)
|
|
74
|
+
return transformed_connections, transformed_streams
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@timeit
|
|
78
|
+
def load_connections(
|
|
79
|
+
neo4j_session: neo4j.Session,
|
|
80
|
+
data: List[Dict[str, Any]],
|
|
81
|
+
org_id: str,
|
|
82
|
+
update_tag: int,
|
|
83
|
+
) -> None:
|
|
84
|
+
logger.info("Loading %d Airbyte Connections into Neo4j.", len(data))
|
|
85
|
+
load(
|
|
86
|
+
neo4j_session,
|
|
87
|
+
AirbyteConnectionSchema(),
|
|
88
|
+
data,
|
|
89
|
+
lastupdated=update_tag,
|
|
90
|
+
ORG_ID=org_id,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
@timeit
|
|
95
|
+
def load_streams(
|
|
96
|
+
neo4j_session: neo4j.Session,
|
|
97
|
+
data: List[Dict[str, Any]],
|
|
98
|
+
org_id: str,
|
|
99
|
+
update_tag: int,
|
|
100
|
+
) -> None:
|
|
101
|
+
logger.info("Loading %d Airbyte Streams into Neo4j.", len(data))
|
|
102
|
+
load(
|
|
103
|
+
neo4j_session,
|
|
104
|
+
AirbyteStreamSchema(),
|
|
105
|
+
data,
|
|
106
|
+
lastupdated=update_tag,
|
|
107
|
+
ORG_ID=org_id,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@timeit
|
|
112
|
+
def cleanup(
|
|
113
|
+
neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any]
|
|
114
|
+
) -> None:
|
|
115
|
+
GraphJob.from_node_schema(AirbyteStreamSchema(), common_job_parameters).run(
|
|
116
|
+
neo4j_session
|
|
117
|
+
)
|
|
118
|
+
GraphJob.from_node_schema(AirbyteConnectionSchema(), common_job_parameters).run(
|
|
119
|
+
neo4j_session
|
|
120
|
+
)
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any
|
|
3
|
+
from typing import Dict
|
|
4
|
+
from typing import List
|
|
5
|
+
|
|
6
|
+
import neo4j
|
|
7
|
+
|
|
8
|
+
from cartography.client.core.tx import load
|
|
9
|
+
from cartography.graph.job import GraphJob
|
|
10
|
+
from cartography.intel.airbyte.util import AirbyteClient
|
|
11
|
+
from cartography.intel.airbyte.util import normalize_airbyte_config
|
|
12
|
+
from cartography.models.airbyte.destination import AirbyteDestinationSchema
|
|
13
|
+
from cartography.util import timeit
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@timeit
|
|
19
|
+
def sync(
|
|
20
|
+
neo4j_session: neo4j.Session,
|
|
21
|
+
api_session: AirbyteClient,
|
|
22
|
+
org_id: str,
|
|
23
|
+
workspace_ids: List[str],
|
|
24
|
+
common_job_parameters: Dict[str, Any],
|
|
25
|
+
) -> None:
|
|
26
|
+
destinations = get(api_session, workspace_ids)
|
|
27
|
+
transformed_destinations = transform(destinations)
|
|
28
|
+
load_destinations(
|
|
29
|
+
neo4j_session,
|
|
30
|
+
transformed_destinations,
|
|
31
|
+
org_id,
|
|
32
|
+
common_job_parameters["UPDATE_TAG"],
|
|
33
|
+
)
|
|
34
|
+
cleanup(neo4j_session, common_job_parameters)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@timeit
|
|
38
|
+
def get(
|
|
39
|
+
api_session: AirbyteClient,
|
|
40
|
+
workspace_ids: List[str],
|
|
41
|
+
) -> List[Dict[str, Any]]:
|
|
42
|
+
return api_session.get(
|
|
43
|
+
"/destinations",
|
|
44
|
+
params={"workspaceIds": ",".join(workspace_ids)} if workspace_ids else None,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def transform(destinations: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
49
|
+
transformed_destinations = []
|
|
50
|
+
for destination in destinations:
|
|
51
|
+
destination["configuration"] = normalize_airbyte_config(
|
|
52
|
+
destination.get("configuration", {})
|
|
53
|
+
)
|
|
54
|
+
transformed_destinations.append(destination)
|
|
55
|
+
return transformed_destinations
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@timeit
|
|
59
|
+
def load_destinations(
|
|
60
|
+
neo4j_session: neo4j.Session,
|
|
61
|
+
data: List[Dict[str, Any]],
|
|
62
|
+
org_id: str,
|
|
63
|
+
update_tag: int,
|
|
64
|
+
) -> None:
|
|
65
|
+
logger.info("Loading %d Airbyte Destinations into Neo4j.", len(data))
|
|
66
|
+
load(
|
|
67
|
+
neo4j_session,
|
|
68
|
+
AirbyteDestinationSchema(),
|
|
69
|
+
data,
|
|
70
|
+
lastupdated=update_tag,
|
|
71
|
+
ORG_ID=org_id,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@timeit
|
|
76
|
+
def cleanup(
|
|
77
|
+
neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any]
|
|
78
|
+
) -> None:
|
|
79
|
+
GraphJob.from_node_schema(AirbyteDestinationSchema(), common_job_parameters).run(
|
|
80
|
+
neo4j_session
|
|
81
|
+
)
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any
|
|
3
|
+
from typing import Dict
|
|
4
|
+
from typing import List
|
|
5
|
+
|
|
6
|
+
import neo4j
|
|
7
|
+
|
|
8
|
+
from cartography.client.core.tx import load
|
|
9
|
+
from cartography.graph.job import GraphJob
|
|
10
|
+
from cartography.intel.airbyte.util import AirbyteClient
|
|
11
|
+
from cartography.models.airbyte.organization import AirbyteOrganizationSchema
|
|
12
|
+
from cartography.util import timeit
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@timeit
|
|
18
|
+
def sync(
|
|
19
|
+
neo4j_session: neo4j.Session,
|
|
20
|
+
api_session: AirbyteClient,
|
|
21
|
+
common_job_parameters: Dict[str, Any],
|
|
22
|
+
) -> List[Dict]:
|
|
23
|
+
organizations = get(api_session)
|
|
24
|
+
load_organizations(
|
|
25
|
+
neo4j_session, organizations, common_job_parameters["UPDATE_TAG"]
|
|
26
|
+
)
|
|
27
|
+
cleanup(neo4j_session, common_job_parameters)
|
|
28
|
+
return organizations
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@timeit
|
|
32
|
+
def get(
|
|
33
|
+
api_session: AirbyteClient,
|
|
34
|
+
) -> List[Dict[str, Any]]:
|
|
35
|
+
return api_session.get("/organizations")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@timeit
|
|
39
|
+
def load_organizations(
|
|
40
|
+
neo4j_session: neo4j.Session,
|
|
41
|
+
data: List[Dict[str, Any]],
|
|
42
|
+
update_tag: int,
|
|
43
|
+
) -> None:
|
|
44
|
+
logger.info("Loading %d Airbyte Organizations into Neo4j.", len(data))
|
|
45
|
+
load(
|
|
46
|
+
neo4j_session,
|
|
47
|
+
AirbyteOrganizationSchema(),
|
|
48
|
+
data,
|
|
49
|
+
lastupdated=update_tag,
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@timeit
|
|
54
|
+
def cleanup(
|
|
55
|
+
neo4j_session: neo4j.Session, common_job_parameters: Dict[str, Any]
|
|
56
|
+
) -> None:
|
|
57
|
+
GraphJob.from_node_schema(AirbyteOrganizationSchema(), common_job_parameters).run(
|
|
58
|
+
neo4j_session
|
|
59
|
+
)
|