cartography 0.106.0rc1__py3-none-any.whl → 0.107.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/_version.py +2 -2
- cartography/cli.py +131 -2
- cartography/client/core/tx.py +62 -0
- cartography/config.py +42 -0
- cartography/driftdetect/cli.py +3 -2
- cartography/graph/cleanupbuilder.py +47 -0
- cartography/graph/job.py +42 -0
- cartography/graph/querybuilder.py +136 -2
- cartography/graph/statement.py +1 -1
- cartography/intel/airbyte/__init__.py +105 -0
- cartography/intel/airbyte/connections.py +120 -0
- cartography/intel/airbyte/destinations.py +81 -0
- cartography/intel/airbyte/organizations.py +59 -0
- cartography/intel/airbyte/sources.py +78 -0
- cartography/intel/airbyte/tags.py +64 -0
- cartography/intel/airbyte/users.py +106 -0
- cartography/intel/airbyte/util.py +122 -0
- cartography/intel/airbyte/workspaces.py +63 -0
- cartography/intel/aws/__init__.py +1 -0
- cartography/intel/aws/cloudtrail_management_events.py +364 -0
- cartography/intel/aws/codebuild.py +132 -0
- cartography/intel/aws/efs.py +80 -0
- cartography/intel/aws/inspector.py +3 -13
- cartography/intel/aws/resources.py +4 -0
- cartography/intel/aws/sns.py +62 -2
- cartography/intel/entra/users.py +84 -42
- cartography/intel/scaleway/__init__.py +127 -0
- cartography/intel/scaleway/iam/__init__.py +0 -0
- cartography/intel/scaleway/iam/apikeys.py +71 -0
- cartography/intel/scaleway/iam/applications.py +71 -0
- cartography/intel/scaleway/iam/groups.py +71 -0
- cartography/intel/scaleway/iam/users.py +71 -0
- cartography/intel/scaleway/instances/__init__.py +0 -0
- cartography/intel/scaleway/instances/flexibleips.py +86 -0
- cartography/intel/scaleway/instances/instances.py +92 -0
- cartography/intel/scaleway/projects.py +79 -0
- cartography/intel/scaleway/storage/__init__.py +0 -0
- cartography/intel/scaleway/storage/snapshots.py +86 -0
- cartography/intel/scaleway/storage/volumes.py +84 -0
- cartography/intel/scaleway/utils.py +37 -0
- cartography/intel/sentinelone/__init__.py +63 -0
- cartography/intel/sentinelone/account.py +140 -0
- cartography/intel/sentinelone/agent.py +139 -0
- cartography/intel/sentinelone/api.py +113 -0
- cartography/intel/sentinelone/utils.py +9 -0
- cartography/models/airbyte/__init__.py +0 -0
- cartography/models/airbyte/connection.py +138 -0
- cartography/models/airbyte/destination.py +75 -0
- cartography/models/airbyte/organization.py +19 -0
- cartography/models/airbyte/source.py +75 -0
- cartography/models/airbyte/stream.py +74 -0
- cartography/models/airbyte/tag.py +69 -0
- cartography/models/airbyte/user.py +111 -0
- cartography/models/airbyte/workspace.py +46 -0
- cartography/models/aws/cloudtrail/management_events.py +64 -0
- cartography/models/aws/codebuild/__init__.py +0 -0
- cartography/models/aws/codebuild/project.py +49 -0
- cartography/models/aws/ecs/containers.py +19 -0
- cartography/models/aws/ecs/task_definitions.py +38 -0
- cartography/models/aws/efs/access_point.py +77 -0
- cartography/models/aws/sns/topic_subscription.py +74 -0
- cartography/models/core/common.py +1 -0
- cartography/models/core/relationships.py +44 -0
- cartography/models/entra/user.py +17 -51
- cartography/models/scaleway/__init__.py +0 -0
- cartography/models/scaleway/iam/__init__.py +0 -0
- cartography/models/scaleway/iam/apikey.py +96 -0
- cartography/models/scaleway/iam/application.py +52 -0
- cartography/models/scaleway/iam/group.py +95 -0
- cartography/models/scaleway/iam/user.py +60 -0
- cartography/models/scaleway/instance/__init__.py +0 -0
- cartography/models/scaleway/instance/flexibleip.py +52 -0
- cartography/models/scaleway/instance/instance.py +118 -0
- cartography/models/scaleway/organization.py +19 -0
- cartography/models/scaleway/project.py +48 -0
- cartography/models/scaleway/storage/__init__.py +0 -0
- cartography/models/scaleway/storage/snapshot.py +78 -0
- cartography/models/scaleway/storage/volume.py +51 -0
- cartography/models/sentinelone/__init__.py +1 -0
- cartography/models/sentinelone/account.py +40 -0
- cartography/models/sentinelone/agent.py +50 -0
- cartography/sync.py +11 -4
- {cartography-0.106.0rc1.dist-info → cartography-0.107.0rc1.dist-info}/METADATA +20 -16
- {cartography-0.106.0rc1.dist-info → cartography-0.107.0rc1.dist-info}/RECORD +88 -27
- {cartography-0.106.0rc1.dist-info → cartography-0.107.0rc1.dist-info}/WHEEL +0 -0
- {cartography-0.106.0rc1.dist-info → cartography-0.107.0rc1.dist-info}/entry_points.txt +0 -0
- {cartography-0.106.0rc1.dist-info → cartography-0.107.0rc1.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.106.0rc1.dist-info → cartography-0.107.0rc1.dist-info}/top_level.txt +0 -0
cartography/_version.py
CHANGED
|
@@ -17,5 +17,5 @@ __version__: str
|
|
|
17
17
|
__version_tuple__: VERSION_TUPLE
|
|
18
18
|
version_tuple: VERSION_TUPLE
|
|
19
19
|
|
|
20
|
-
__version__ = version = '0.
|
|
21
|
-
__version_tuple__ = version_tuple = (0,
|
|
20
|
+
__version__ = version = '0.107.0rc1'
|
|
21
|
+
__version_tuple__ = version_tuple = (0, 107, 0, 'rc1')
|
cartography/cli.py
CHANGED
|
@@ -71,8 +71,8 @@ class CLI:
|
|
|
71
71
|
default="bolt://localhost:7687",
|
|
72
72
|
help=(
|
|
73
73
|
"A valid Neo4j URI to sync against. See "
|
|
74
|
-
"https://neo4j.com/docs/
|
|
75
|
-
"structure of a Neo4j URI."
|
|
74
|
+
"https://neo4j.com/docs/browser-manual/current/operations/dbms-connection/#uri-scheme for complete "
|
|
75
|
+
"documentation on the structure of a Neo4j URI."
|
|
76
76
|
),
|
|
77
77
|
)
|
|
78
78
|
parser.add_argument(
|
|
@@ -182,6 +182,14 @@ class CLI:
|
|
|
182
182
|
"syncing other accounts and delay raising an exception until the very end."
|
|
183
183
|
),
|
|
184
184
|
)
|
|
185
|
+
parser.add_argument(
|
|
186
|
+
"--aws-cloudtrail-management-events-lookback-hours",
|
|
187
|
+
type=int,
|
|
188
|
+
default=None,
|
|
189
|
+
help=(
|
|
190
|
+
"Number of hours back to retrieve CloudTrail management events from. If not specified, CloudTrail management events will not be retrieved."
|
|
191
|
+
),
|
|
192
|
+
)
|
|
185
193
|
parser.add_argument(
|
|
186
194
|
"--oci-sync-all-profiles",
|
|
187
195
|
action="store_true",
|
|
@@ -637,6 +645,33 @@ class CLI:
|
|
|
637
645
|
"Required if you are using the Anthropic intel module. Ignored otherwise."
|
|
638
646
|
),
|
|
639
647
|
)
|
|
648
|
+
parser.add_argument(
|
|
649
|
+
"--airbyte-client-id",
|
|
650
|
+
type=str,
|
|
651
|
+
default=None,
|
|
652
|
+
help=(
|
|
653
|
+
"The Airbyte client ID to use for authentication. "
|
|
654
|
+
"Required if you are using the Airbyte intel module. Ignored otherwise."
|
|
655
|
+
),
|
|
656
|
+
)
|
|
657
|
+
parser.add_argument(
|
|
658
|
+
"--airbyte-client-secret-env-var",
|
|
659
|
+
type=str,
|
|
660
|
+
default=None,
|
|
661
|
+
help=(
|
|
662
|
+
"The name of an environment variable containing the Airbyte client secret for authentication. "
|
|
663
|
+
"Required if you are using the Airbyte intel module. Ignored otherwise."
|
|
664
|
+
),
|
|
665
|
+
)
|
|
666
|
+
parser.add_argument(
|
|
667
|
+
"--airbyte-api-url",
|
|
668
|
+
type=str,
|
|
669
|
+
default="https://api.airbyte.com/v1",
|
|
670
|
+
help=(
|
|
671
|
+
"The base URL for the Airbyte API (default is the public Airbyte Cloud API). "
|
|
672
|
+
"Required if you are using the Airbyte intel module. Ignored otherwise."
|
|
673
|
+
),
|
|
674
|
+
)
|
|
640
675
|
parser.add_argument(
|
|
641
676
|
"--trivy-s3-bucket",
|
|
642
677
|
type=str,
|
|
@@ -655,6 +690,59 @@ class CLI:
|
|
|
655
690
|
"Required if you are using the Trivy module. Ignored otherwise."
|
|
656
691
|
),
|
|
657
692
|
)
|
|
693
|
+
parser.add_argument(
|
|
694
|
+
"--scaleway-org",
|
|
695
|
+
type=str,
|
|
696
|
+
default=None,
|
|
697
|
+
help=(
|
|
698
|
+
"The Scaleway organization ID to sync. "
|
|
699
|
+
"Required if you are using the Scaleway intel module. Ignored otherwise."
|
|
700
|
+
),
|
|
701
|
+
)
|
|
702
|
+
parser.add_argument(
|
|
703
|
+
"--scaleway-access-key",
|
|
704
|
+
type=str,
|
|
705
|
+
default=None,
|
|
706
|
+
help=(
|
|
707
|
+
"The Scaleway access key to use for authentication. "
|
|
708
|
+
"Required if you are using the Scaleway intel module. Ignored otherwise."
|
|
709
|
+
),
|
|
710
|
+
)
|
|
711
|
+
parser.add_argument(
|
|
712
|
+
"--scaleway-secret-key-env-var",
|
|
713
|
+
type=str,
|
|
714
|
+
default=None,
|
|
715
|
+
help=(
|
|
716
|
+
"The name of an environment variable containing the Scaleway secret key for authentication. "
|
|
717
|
+
"Required if you are using the Scaleway intel module. Ignored otherwise."
|
|
718
|
+
),
|
|
719
|
+
)
|
|
720
|
+
parser.add_argument(
|
|
721
|
+
"--sentinelone-account-ids",
|
|
722
|
+
type=str,
|
|
723
|
+
default=None,
|
|
724
|
+
help=(
|
|
725
|
+
"Comma-separated list of SentinelOne account IDs to sync. "
|
|
726
|
+
"If not specified, all accessible accounts will be synced."
|
|
727
|
+
),
|
|
728
|
+
)
|
|
729
|
+
parser.add_argument(
|
|
730
|
+
"--sentinelone-api-url",
|
|
731
|
+
type=str,
|
|
732
|
+
default=None,
|
|
733
|
+
help=(
|
|
734
|
+
"SentinelOne API URL. Required if you are using the SentinelOne intel module. Ignored otherwise."
|
|
735
|
+
),
|
|
736
|
+
)
|
|
737
|
+
parser.add_argument(
|
|
738
|
+
"--sentinelone-api-token-env-var",
|
|
739
|
+
type=str,
|
|
740
|
+
default="SENTINELONE_API_TOKEN",
|
|
741
|
+
help=(
|
|
742
|
+
"The name of an environment variable containing the SentinelOne API token. "
|
|
743
|
+
"Required if you are using the SentinelOne intel module. Ignored otherwise."
|
|
744
|
+
),
|
|
745
|
+
)
|
|
658
746
|
|
|
659
747
|
return parser
|
|
660
748
|
|
|
@@ -973,6 +1061,17 @@ class CLI:
|
|
|
973
1061
|
else:
|
|
974
1062
|
config.anthropic_apikey = None
|
|
975
1063
|
|
|
1064
|
+
# Airbyte config
|
|
1065
|
+
if config.airbyte_client_id and config.airbyte_client_secret_env_var:
|
|
1066
|
+
logger.debug(
|
|
1067
|
+
f"Reading Airbyte client secret from environment variable {config.airbyte_client_secret_env_var}",
|
|
1068
|
+
)
|
|
1069
|
+
config.airbyte_client_secret = os.environ.get(
|
|
1070
|
+
config.airbyte_client_secret_env_var,
|
|
1071
|
+
)
|
|
1072
|
+
else:
|
|
1073
|
+
config.airbyte_client_secret = None
|
|
1074
|
+
|
|
976
1075
|
# Trivy config
|
|
977
1076
|
if config.trivy_s3_bucket:
|
|
978
1077
|
logger.debug(f"Trivy S3 bucket: {config.trivy_s3_bucket}")
|
|
@@ -980,6 +1079,36 @@ class CLI:
|
|
|
980
1079
|
if config.trivy_s3_prefix:
|
|
981
1080
|
logger.debug(f"Trivy S3 prefix: {config.trivy_s3_prefix}")
|
|
982
1081
|
|
|
1082
|
+
# Scaleway config
|
|
1083
|
+
if config.scaleway_secret_key_env_var:
|
|
1084
|
+
logger.debug(
|
|
1085
|
+
f"Reading Scaleway secret key from environment variable {config.scaleway_secret_key_env_var}",
|
|
1086
|
+
)
|
|
1087
|
+
config.scaleway_secret_key = os.environ.get(
|
|
1088
|
+
config.scaleway_secret_key_env_var,
|
|
1089
|
+
)
|
|
1090
|
+
else:
|
|
1091
|
+
config.scaleway_secret_key = None
|
|
1092
|
+
|
|
1093
|
+
# SentinelOne config
|
|
1094
|
+
if config.sentinelone_account_ids:
|
|
1095
|
+
config.sentinelone_account_ids = [
|
|
1096
|
+
id.strip() for id in config.sentinelone_account_ids.split(",")
|
|
1097
|
+
]
|
|
1098
|
+
logger.debug(
|
|
1099
|
+
f"Parsed {len(config.sentinelone_account_ids)} SentinelOne account IDs to sync"
|
|
1100
|
+
)
|
|
1101
|
+
else:
|
|
1102
|
+
config.sentinelone_account_ids = None
|
|
1103
|
+
|
|
1104
|
+
if config.sentinelone_api_url and config.sentinelone_api_token_env_var:
|
|
1105
|
+
logger.debug(
|
|
1106
|
+
f"Reading API token for SentinelOne from environment variable {config.sentinelone_api_token_env_var}",
|
|
1107
|
+
)
|
|
1108
|
+
config.sentinelone_api_token = os.environ.get(
|
|
1109
|
+
config.sentinelone_api_token_env_var
|
|
1110
|
+
)
|
|
1111
|
+
|
|
983
1112
|
# Run cartography
|
|
984
1113
|
try:
|
|
985
1114
|
return cartography.sync.run_with_config(self.sync, config)
|
cartography/client/core/tx.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from typing import Any
|
|
2
3
|
from typing import Dict
|
|
3
4
|
from typing import List
|
|
@@ -8,10 +9,15 @@ from typing import Union
|
|
|
8
9
|
import neo4j
|
|
9
10
|
|
|
10
11
|
from cartography.graph.querybuilder import build_create_index_queries
|
|
12
|
+
from cartography.graph.querybuilder import build_create_index_queries_for_matchlink
|
|
11
13
|
from cartography.graph.querybuilder import build_ingestion_query
|
|
14
|
+
from cartography.graph.querybuilder import build_matchlink_query
|
|
12
15
|
from cartography.models.core.nodes import CartographyNodeSchema
|
|
16
|
+
from cartography.models.core.relationships import CartographyRelSchema
|
|
13
17
|
from cartography.util import batch
|
|
14
18
|
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
15
21
|
|
|
16
22
|
def read_list_of_values_tx(
|
|
17
23
|
tx: neo4j.Transaction,
|
|
@@ -255,6 +261,25 @@ def ensure_indexes(
|
|
|
255
261
|
neo4j_session.run(query)
|
|
256
262
|
|
|
257
263
|
|
|
264
|
+
def ensure_indexes_for_matchlinks(
|
|
265
|
+
neo4j_session: neo4j.Session,
|
|
266
|
+
rel_schema: CartographyRelSchema,
|
|
267
|
+
) -> None:
|
|
268
|
+
"""
|
|
269
|
+
Creates indexes for node fields if they don't exist for the given CartographyRelSchema object.
|
|
270
|
+
This is only used for load_rels() where we match on and connect existing nodes.
|
|
271
|
+
This is not used for CartographyNodeSchema objects.
|
|
272
|
+
"""
|
|
273
|
+
queries = build_create_index_queries_for_matchlink(rel_schema)
|
|
274
|
+
logger.debug(f"CREATE INDEX queries for {rel_schema.rel_label}: {queries}")
|
|
275
|
+
for query in queries:
|
|
276
|
+
if not query.startswith("CREATE INDEX IF NOT EXISTS"):
|
|
277
|
+
raise ValueError(
|
|
278
|
+
'Query provided to `ensure_indexes_for_matchlinks()` does not start with "CREATE INDEX IF NOT EXISTS".',
|
|
279
|
+
)
|
|
280
|
+
neo4j_session.run(query)
|
|
281
|
+
|
|
282
|
+
|
|
258
283
|
def load(
|
|
259
284
|
neo4j_session: neo4j.Session,
|
|
260
285
|
node_schema: CartographyNodeSchema,
|
|
@@ -276,3 +301,40 @@ def load(
|
|
|
276
301
|
ensure_indexes(neo4j_session, node_schema)
|
|
277
302
|
ingestion_query = build_ingestion_query(node_schema)
|
|
278
303
|
load_graph_data(neo4j_session, ingestion_query, dict_list, **kwargs)
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def load_matchlinks(
|
|
307
|
+
neo4j_session: neo4j.Session,
|
|
308
|
+
rel_schema: CartographyRelSchema,
|
|
309
|
+
dict_list: list[dict[str, Any]],
|
|
310
|
+
**kwargs,
|
|
311
|
+
) -> None:
|
|
312
|
+
"""
|
|
313
|
+
Main entrypoint for intel modules to write relationships to the graph between two existing nodes.
|
|
314
|
+
:param neo4j_session: The Neo4j session
|
|
315
|
+
:param rel_schema: The CartographyRelSchema object to generate a query.
|
|
316
|
+
:param dict_list: The data to load to the graph represented as a list of dicts. The dicts must contain the source and
|
|
317
|
+
target node ids.
|
|
318
|
+
:param kwargs: Allows additional keyword args to be supplied to the Neo4j query.
|
|
319
|
+
:return: None
|
|
320
|
+
"""
|
|
321
|
+
if len(dict_list) == 0:
|
|
322
|
+
# If there is no data to load, save some time.
|
|
323
|
+
return
|
|
324
|
+
|
|
325
|
+
# Validate that required kwargs are provided for cleanup queries
|
|
326
|
+
if "_sub_resource_label" not in kwargs:
|
|
327
|
+
raise ValueError(
|
|
328
|
+
f"Required kwarg '_sub_resource_label' not provided for {rel_schema.rel_label}. "
|
|
329
|
+
"This is needed for cleanup queries."
|
|
330
|
+
)
|
|
331
|
+
if "_sub_resource_id" not in kwargs:
|
|
332
|
+
raise ValueError(
|
|
333
|
+
f"Required kwarg '_sub_resource_id' not provided for {rel_schema.rel_label}. "
|
|
334
|
+
"This is needed for cleanup queries."
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
ensure_indexes_for_matchlinks(neo4j_session, rel_schema)
|
|
338
|
+
matchlink_query = build_matchlink_query(rel_schema)
|
|
339
|
+
logger.debug(f"Matchlink query: {matchlink_query}")
|
|
340
|
+
load_graph_data(neo4j_session, matchlink_query, dict_list, **kwargs)
|
cartography/config.py
CHANGED
|
@@ -31,6 +31,8 @@ class Config:
|
|
|
31
31
|
:type aws_best_effort_mode: bool
|
|
32
32
|
:param aws_best_effort_mode: If True, AWS sync will not raise any exceptions, just log. If False (default),
|
|
33
33
|
exceptions will be raised.
|
|
34
|
+
:type aws_cloudtrail_management_events_lookback_hours: int
|
|
35
|
+
:param aws_cloudtrail_management_events_lookback_hours: Number of hours back to retrieve CloudTrail management events from. Optional.
|
|
34
36
|
:type azure_sync_all_subscriptions: bool
|
|
35
37
|
:param azure_sync_all_subscriptions: If True, Azure sync will run for all profiles in azureProfile.json. If
|
|
36
38
|
False (default), Azure sync will run using current user session via CLI credentials. Optional.
|
|
@@ -137,10 +139,28 @@ class Config:
|
|
|
137
139
|
:param openai_org_id: OpenAI organization id. Optional.
|
|
138
140
|
:type anthropic_apikey: string
|
|
139
141
|
:param anthropic_apikey: Anthropic API key. Optional.
|
|
142
|
+
:type airbyte_client_id: str
|
|
143
|
+
:param airbyte_client_id: Airbyte client ID for API authentication. Optional.
|
|
144
|
+
:type airbyte_client_secret: str
|
|
145
|
+
:param airbyte_client_secret: Airbyte client secret for API authentication. Optional.
|
|
146
|
+
:type airbyte_api_url: str
|
|
147
|
+
:param airbyte_api_url: Airbyte API base URL, e.g. https://api.airbyte.com/v1. Optional.
|
|
140
148
|
:type trivy_s3_bucket: str
|
|
141
149
|
:param trivy_s3_bucket: The S3 bucket name containing Trivy scan results. Optional.
|
|
142
150
|
:type trivy_s3_prefix: str
|
|
143
151
|
:param trivy_s3_prefix: The S3 prefix path containing Trivy scan results. Optional.
|
|
152
|
+
:type scaleway_access_key: str
|
|
153
|
+
:param scaleway_access_key: Scaleway access key. Optional.
|
|
154
|
+
:type scaleway_secret_key: str
|
|
155
|
+
:param scaleway_secret_key: Scaleway secret key. Optional.
|
|
156
|
+
:type scaleway_org: str
|
|
157
|
+
:param scaleway_org: Scaleway organization id. Optional.
|
|
158
|
+
:type sentinelone_api_url: string
|
|
159
|
+
:param sentinelone_api_url: SentinelOne API URL. Optional.
|
|
160
|
+
:type sentinelone_api_token: string
|
|
161
|
+
:param sentinelone_api_token: SentinelOne API token for authentication. Optional.
|
|
162
|
+
:type sentinelone_account_ids: list[str]
|
|
163
|
+
:param sentinelone_account_ids: List of SentinelOne account IDs to sync. Optional.
|
|
144
164
|
"""
|
|
145
165
|
|
|
146
166
|
def __init__(
|
|
@@ -155,6 +175,7 @@ class Config:
|
|
|
155
175
|
aws_sync_all_profiles=False,
|
|
156
176
|
aws_regions=None,
|
|
157
177
|
aws_best_effort_mode=False,
|
|
178
|
+
aws_cloudtrail_management_events_lookback_hours=None,
|
|
158
179
|
azure_sync_all_subscriptions=False,
|
|
159
180
|
azure_sp_auth=None,
|
|
160
181
|
azure_tenant_id=None,
|
|
@@ -213,8 +234,17 @@ class Config:
|
|
|
213
234
|
openai_apikey=None,
|
|
214
235
|
openai_org_id=None,
|
|
215
236
|
anthropic_apikey=None,
|
|
237
|
+
airbyte_client_id=None,
|
|
238
|
+
airbyte_client_secret=None,
|
|
239
|
+
airbyte_api_url=None,
|
|
216
240
|
trivy_s3_bucket=None,
|
|
217
241
|
trivy_s3_prefix=None,
|
|
242
|
+
scaleway_access_key=None,
|
|
243
|
+
scaleway_secret_key=None,
|
|
244
|
+
scaleway_org=None,
|
|
245
|
+
sentinelone_api_url=None,
|
|
246
|
+
sentinelone_api_token=None,
|
|
247
|
+
sentinelone_account_ids=None,
|
|
218
248
|
):
|
|
219
249
|
self.neo4j_uri = neo4j_uri
|
|
220
250
|
self.neo4j_user = neo4j_user
|
|
@@ -226,6 +256,9 @@ class Config:
|
|
|
226
256
|
self.aws_sync_all_profiles = aws_sync_all_profiles
|
|
227
257
|
self.aws_regions = aws_regions
|
|
228
258
|
self.aws_best_effort_mode = aws_best_effort_mode
|
|
259
|
+
self.aws_cloudtrail_management_events_lookback_hours = (
|
|
260
|
+
aws_cloudtrail_management_events_lookback_hours
|
|
261
|
+
)
|
|
229
262
|
self.azure_sync_all_subscriptions = azure_sync_all_subscriptions
|
|
230
263
|
self.azure_sp_auth = azure_sp_auth
|
|
231
264
|
self.azure_tenant_id = azure_tenant_id
|
|
@@ -284,5 +317,14 @@ class Config:
|
|
|
284
317
|
self.openai_apikey = openai_apikey
|
|
285
318
|
self.openai_org_id = openai_org_id
|
|
286
319
|
self.anthropic_apikey = anthropic_apikey
|
|
320
|
+
self.airbyte_client_id = airbyte_client_id
|
|
321
|
+
self.airbyte_client_secret = airbyte_client_secret
|
|
322
|
+
self.airbyte_api_url = airbyte_api_url
|
|
287
323
|
self.trivy_s3_bucket = trivy_s3_bucket
|
|
288
324
|
self.trivy_s3_prefix = trivy_s3_prefix
|
|
325
|
+
self.scaleway_access_key = scaleway_access_key
|
|
326
|
+
self.scaleway_secret_key = scaleway_secret_key
|
|
327
|
+
self.scaleway_org = scaleway_org
|
|
328
|
+
self.sentinelone_api_url = sentinelone_api_url
|
|
329
|
+
self.sentinelone_api_token = sentinelone_api_token
|
|
330
|
+
self.sentinelone_account_ids = sentinelone_account_ids
|
cartography/driftdetect/cli.py
CHANGED
|
@@ -63,8 +63,9 @@ class CLI:
|
|
|
63
63
|
default="bolt://localhost:7687",
|
|
64
64
|
help=(
|
|
65
65
|
"A valid Neo4j URI to sync against. See "
|
|
66
|
-
"https://neo4j.com/docs/
|
|
67
|
-
"structure of a Neo4j URI
|
|
66
|
+
"https://neo4j.com/docs/browser-manual/current/operations/dbms-connection/#uri-scheme for "
|
|
67
|
+
"documentation on the structure of a Neo4j URI, and "
|
|
68
|
+
"https://neo4j.com/docs/api/python-driver/current/ for complete documentation on the Python driver."
|
|
68
69
|
),
|
|
69
70
|
)
|
|
70
71
|
parser_get_state.add_argument(
|
|
@@ -3,6 +3,7 @@ from string import Template
|
|
|
3
3
|
from typing import Dict
|
|
4
4
|
from typing import List
|
|
5
5
|
|
|
6
|
+
from cartography.graph.querybuilder import _asdict_with_validate_relprops
|
|
6
7
|
from cartography.graph.querybuilder import _build_match_clause
|
|
7
8
|
from cartography.graph.querybuilder import rel_present_on_node_schema
|
|
8
9
|
from cartography.models.core.common import PropertyRef
|
|
@@ -334,3 +335,49 @@ def _validate_target_node_matcher_for_cleanup_job(tgm: TargetNodeMatcher):
|
|
|
334
335
|
f"{key} has set_in_kwargs=False, please check by reviewing the full stack trace to know which object"
|
|
335
336
|
f"this message was raised from. Debug information: PropertyRef name = {prop_ref.name}.",
|
|
336
337
|
)
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def build_cleanup_query_for_matchlink(rel_schema: CartographyRelSchema) -> str:
|
|
341
|
+
"""
|
|
342
|
+
Generates a cleanup query for a matchlink relationship.
|
|
343
|
+
:param rel_schema: The CartographyRelSchema object to generate a query. This CartographyRelSchema object
|
|
344
|
+
- Must have a source_node_matcher and source_node_label defined
|
|
345
|
+
- Must have a CartographyRelProperties object where _sub_resource_label and _sub_resource_id are defined
|
|
346
|
+
:return: A Neo4j query used to clean up stale matchlink relationships.
|
|
347
|
+
"""
|
|
348
|
+
if not rel_schema.source_node_matcher:
|
|
349
|
+
raise ValueError(
|
|
350
|
+
f"No source node matcher found for {rel_schema.rel_label}; returning empty list."
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
query_template = Template(
|
|
354
|
+
"""
|
|
355
|
+
MATCH (from:$source_node_label)$rel_direction[r:$rel_label]$rel_direction_end(to:$target_node_label)
|
|
356
|
+
WHERE r.lastupdated <> $UPDATE_TAG
|
|
357
|
+
AND r._sub_resource_label = $sub_resource_label
|
|
358
|
+
AND r._sub_resource_id = $sub_resource_id
|
|
359
|
+
WITH r LIMIT $LIMIT_SIZE
|
|
360
|
+
DELETE r;
|
|
361
|
+
"""
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
# Determine which way to point the arrow. INWARD is toward the source, otherwise we go toward the target.
|
|
365
|
+
if rel_schema.direction == LinkDirection.INWARD:
|
|
366
|
+
rel_direction = "<-"
|
|
367
|
+
rel_direction_end = "-"
|
|
368
|
+
else:
|
|
369
|
+
rel_direction = "-"
|
|
370
|
+
rel_direction_end = "->"
|
|
371
|
+
|
|
372
|
+
# Small hack: avoid type-checking errors by converting the rel_schema to a dict.
|
|
373
|
+
rel_props_as_dict = _asdict_with_validate_relprops(rel_schema)
|
|
374
|
+
|
|
375
|
+
return query_template.safe_substitute(
|
|
376
|
+
source_node_label=rel_schema.source_node_label,
|
|
377
|
+
target_node_label=rel_schema.target_node_label,
|
|
378
|
+
rel_label=rel_schema.rel_label,
|
|
379
|
+
rel_direction=rel_direction,
|
|
380
|
+
rel_direction_end=rel_direction_end,
|
|
381
|
+
sub_resource_label=rel_props_as_dict["_sub_resource_label"],
|
|
382
|
+
sub_resource_id=rel_props_as_dict["_sub_resource_id"],
|
|
383
|
+
)
|
cartography/graph/job.py
CHANGED
|
@@ -13,9 +13,11 @@ from typing import Union
|
|
|
13
13
|
import neo4j
|
|
14
14
|
|
|
15
15
|
from cartography.graph.cleanupbuilder import build_cleanup_queries
|
|
16
|
+
from cartography.graph.cleanupbuilder import build_cleanup_query_for_matchlink
|
|
16
17
|
from cartography.graph.statement import get_job_shortname
|
|
17
18
|
from cartography.graph.statement import GraphStatement
|
|
18
19
|
from cartography.models.core.nodes import CartographyNodeSchema
|
|
20
|
+
from cartography.models.core.relationships import CartographyRelSchema
|
|
19
21
|
|
|
20
22
|
logger = logging.getLogger(__name__)
|
|
21
23
|
|
|
@@ -176,6 +178,46 @@ class GraphJob:
|
|
|
176
178
|
node_schema.label,
|
|
177
179
|
)
|
|
178
180
|
|
|
181
|
+
@classmethod
|
|
182
|
+
def from_matchlink(
|
|
183
|
+
cls,
|
|
184
|
+
rel_schema: CartographyRelSchema,
|
|
185
|
+
sub_resource_label: str,
|
|
186
|
+
sub_resource_id: str,
|
|
187
|
+
update_tag: int,
|
|
188
|
+
) -> "GraphJob":
|
|
189
|
+
"""
|
|
190
|
+
Create a cleanup job from a CartographyRelSchema object (specifically, a MatchLink).
|
|
191
|
+
This is used for cleaning up stale links between nodes created by load_rels(). Do not use for other purposes.
|
|
192
|
+
|
|
193
|
+
Other notes:
|
|
194
|
+
- For a given rel_schema, the fields used in the rel_schema.properties._sub_resource_label.name and
|
|
195
|
+
rel_schema.properties._sub_resource_id.name must be provided as keys and values in the params dict.
|
|
196
|
+
- The rel_schema must have a source_node_matcher and target_node_matcher.
|
|
197
|
+
"""
|
|
198
|
+
cleanup_link_query = build_cleanup_query_for_matchlink(rel_schema)
|
|
199
|
+
logger.debug(f"Cleanup query: {cleanup_link_query}")
|
|
200
|
+
|
|
201
|
+
parameters = {
|
|
202
|
+
"UPDATE_TAG": update_tag,
|
|
203
|
+
"_sub_resource_label": sub_resource_label,
|
|
204
|
+
"_sub_resource_id": sub_resource_id,
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
statement = GraphStatement(
|
|
208
|
+
cleanup_link_query,
|
|
209
|
+
parameters=parameters,
|
|
210
|
+
iterative=True,
|
|
211
|
+
iterationsize=100,
|
|
212
|
+
parent_job_name=rel_schema.rel_label,
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
return cls(
|
|
216
|
+
f"Cleanup {rel_schema.rel_label} between {rel_schema.source_node_label} and {rel_schema.target_node_label}",
|
|
217
|
+
[statement],
|
|
218
|
+
rel_schema.rel_label,
|
|
219
|
+
)
|
|
220
|
+
|
|
179
221
|
@classmethod
|
|
180
222
|
def from_json_file(cls, file_path: Union[str, Path]) -> "GraphJob":
|
|
181
223
|
"""
|
|
@@ -14,6 +14,7 @@ from cartography.models.core.nodes import ExtraNodeLabels
|
|
|
14
14
|
from cartography.models.core.relationships import CartographyRelSchema
|
|
15
15
|
from cartography.models.core.relationships import LinkDirection
|
|
16
16
|
from cartography.models.core.relationships import OtherRelationships
|
|
17
|
+
from cartography.models.core.relationships import SourceNodeMatcher
|
|
17
18
|
from cartography.models.core.relationships import TargetNodeMatcher
|
|
18
19
|
|
|
19
20
|
logger = logging.getLogger(__name__)
|
|
@@ -109,10 +110,10 @@ def _build_rel_properties_statement(
|
|
|
109
110
|
return set_clause
|
|
110
111
|
|
|
111
112
|
|
|
112
|
-
def _build_match_clause(matcher: TargetNodeMatcher) -> str:
|
|
113
|
+
def _build_match_clause(matcher: TargetNodeMatcher | SourceNodeMatcher) -> str:
|
|
113
114
|
"""
|
|
114
115
|
Generate a Neo4j match statement on one or more keys and values for a given node.
|
|
115
|
-
:param matcher: A TargetNodeMatcher object
|
|
116
|
+
:param matcher: A TargetNodeMatcher or SourceNodeMatcher object
|
|
116
117
|
:return: a Neo4j match clause
|
|
117
118
|
"""
|
|
118
119
|
match = Template("$Key: $PropRef")
|
|
@@ -548,3 +549,136 @@ def build_create_index_queries(node_schema: CartographyNodeSchema) -> List[str]:
|
|
|
548
549
|
],
|
|
549
550
|
)
|
|
550
551
|
return result
|
|
552
|
+
|
|
553
|
+
|
|
554
|
+
def build_create_index_queries_for_matchlink(
|
|
555
|
+
rel_schema: CartographyRelSchema,
|
|
556
|
+
) -> list[str]:
|
|
557
|
+
"""
|
|
558
|
+
Generate queries to create indexes for the given CartographyRelSchema and all node types attached to it via its
|
|
559
|
+
relationships.
|
|
560
|
+
:param rel_schema: The CartographyRelSchema object
|
|
561
|
+
:return: A list of queries of the form `CREATE INDEX IF NOT EXISTS FOR (n:$TargetNodeLabel) ON (n.$TargetAttribute)`
|
|
562
|
+
"""
|
|
563
|
+
if not rel_schema.source_node_matcher:
|
|
564
|
+
logger.warning(
|
|
565
|
+
f"No source node matcher found for {rel_schema.rel_label}; returning empty list."
|
|
566
|
+
"Please note that build_create_index_queries_for_matchlink() is only used for load_matchlinks() where we match on "
|
|
567
|
+
"and connect existing nodes in the graph."
|
|
568
|
+
)
|
|
569
|
+
return []
|
|
570
|
+
|
|
571
|
+
index_template = Template(
|
|
572
|
+
"CREATE INDEX IF NOT EXISTS FOR (n:$NodeLabel) ON (n.$NodeAttribute);",
|
|
573
|
+
)
|
|
574
|
+
|
|
575
|
+
result = []
|
|
576
|
+
for source_key in asdict(rel_schema.source_node_matcher).keys():
|
|
577
|
+
result.append(
|
|
578
|
+
index_template.safe_substitute(
|
|
579
|
+
NodeLabel=rel_schema.source_node_label,
|
|
580
|
+
NodeAttribute=source_key,
|
|
581
|
+
),
|
|
582
|
+
)
|
|
583
|
+
for target_key in asdict(rel_schema.target_node_matcher).keys():
|
|
584
|
+
result.append(
|
|
585
|
+
index_template.safe_substitute(
|
|
586
|
+
NodeLabel=rel_schema.target_node_label,
|
|
587
|
+
NodeAttribute=target_key,
|
|
588
|
+
),
|
|
589
|
+
)
|
|
590
|
+
|
|
591
|
+
# Create a composite index for the relationship between the source and target nodes.
|
|
592
|
+
# https://neo4j.com/docs/cypher-manual/4.3/indexes-for-search-performance/#administration-indexes-create-a-composite-index-for-relationships
|
|
593
|
+
rel_index_template = Template(
|
|
594
|
+
"CREATE INDEX IF NOT EXISTS FOR ()$rel_direction[r:$RelLabel]$rel_direction_end() "
|
|
595
|
+
"ON (r.lastupdated, r._sub_resource_label, r._sub_resource_id);",
|
|
596
|
+
)
|
|
597
|
+
if rel_schema.direction == LinkDirection.INWARD:
|
|
598
|
+
result.append(
|
|
599
|
+
rel_index_template.safe_substitute(
|
|
600
|
+
RelLabel=rel_schema.rel_label,
|
|
601
|
+
rel_direction="<-",
|
|
602
|
+
rel_direction_end="-",
|
|
603
|
+
)
|
|
604
|
+
)
|
|
605
|
+
else:
|
|
606
|
+
result.append(
|
|
607
|
+
rel_index_template.safe_substitute(
|
|
608
|
+
RelLabel=rel_schema.rel_label,
|
|
609
|
+
rel_direction="-",
|
|
610
|
+
rel_direction_end="->",
|
|
611
|
+
)
|
|
612
|
+
)
|
|
613
|
+
return result
|
|
614
|
+
|
|
615
|
+
|
|
616
|
+
def build_matchlink_query(rel_schema: CartographyRelSchema) -> str:
|
|
617
|
+
"""
|
|
618
|
+
Generate a Neo4j query to link two existing nodes when given a CartographyRelSchema object.
|
|
619
|
+
This is only used for load_matchlinks().
|
|
620
|
+
:param rel_schema: The CartographyRelSchema object to generate a query. This CartographyRelSchema object
|
|
621
|
+
- Must have a source_node_matcher and source_node_label defined
|
|
622
|
+
- Must have a CartographyRelProperties object where _sub_resource_label and _sub_resource_id are defined
|
|
623
|
+
:return: A Neo4j query that can be used to link two existing nodes.
|
|
624
|
+
"""
|
|
625
|
+
if not rel_schema.source_node_matcher or not rel_schema.source_node_label:
|
|
626
|
+
raise ValueError(
|
|
627
|
+
f"No source node matcher or source node label found for {rel_schema.rel_label}. "
|
|
628
|
+
"MatchLink relationships require a source_node_matcher and source_node_label to be defined."
|
|
629
|
+
)
|
|
630
|
+
|
|
631
|
+
rel_props_as_dict = _asdict_with_validate_relprops(rel_schema)
|
|
632
|
+
|
|
633
|
+
# These are needed for the cleanup query
|
|
634
|
+
if "_sub_resource_label" not in rel_props_as_dict:
|
|
635
|
+
raise ValueError(
|
|
636
|
+
f"Expected _sub_resource_label to be defined on {rel_schema.properties.__class__.__name__}"
|
|
637
|
+
"Please include `_sub_resource_label: PropertyRef = PropertyRef('_sub_resource_label', set_in_kwargs=True)`"
|
|
638
|
+
)
|
|
639
|
+
if "_sub_resource_id" not in rel_props_as_dict:
|
|
640
|
+
raise ValueError(
|
|
641
|
+
f"Expected _sub_resource_id to be defined on {rel_schema.properties.__class__.__name__}"
|
|
642
|
+
"Please include `_sub_resource_id: PropertyRef = PropertyRef('_sub_resource_id', set_in_kwargs=True)`"
|
|
643
|
+
)
|
|
644
|
+
|
|
645
|
+
matchlink_query_template = Template(
|
|
646
|
+
"""
|
|
647
|
+
UNWIND $DictList as item
|
|
648
|
+
$source_match
|
|
649
|
+
$target_match
|
|
650
|
+
MERGE $rel
|
|
651
|
+
ON CREATE SET r.firstseen = timestamp()
|
|
652
|
+
SET
|
|
653
|
+
$set_rel_properties_statement;
|
|
654
|
+
"""
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
source_match = Template(
|
|
658
|
+
"MATCH (from:$source_node_label{$match_clause})"
|
|
659
|
+
).safe_substitute(
|
|
660
|
+
source_node_label=rel_schema.source_node_label,
|
|
661
|
+
match_clause=_build_match_clause(rel_schema.source_node_matcher),
|
|
662
|
+
)
|
|
663
|
+
|
|
664
|
+
target_match = Template(
|
|
665
|
+
"MATCH (to:$target_node_label{$match_clause})"
|
|
666
|
+
).safe_substitute(
|
|
667
|
+
target_node_label=rel_schema.target_node_label,
|
|
668
|
+
match_clause=_build_match_clause(rel_schema.target_node_matcher),
|
|
669
|
+
)
|
|
670
|
+
|
|
671
|
+
if rel_schema.direction == LinkDirection.INWARD:
|
|
672
|
+
rel = f"(from)<-[r:{rel_schema.rel_label}]-(to)"
|
|
673
|
+
else:
|
|
674
|
+
rel = f"(from)-[r:{rel_schema.rel_label}]->(to)"
|
|
675
|
+
|
|
676
|
+
return matchlink_query_template.safe_substitute(
|
|
677
|
+
source_match=source_match,
|
|
678
|
+
target_match=target_match,
|
|
679
|
+
rel=rel,
|
|
680
|
+
set_rel_properties_statement=_build_rel_properties_statement(
|
|
681
|
+
"r",
|
|
682
|
+
rel_props_as_dict,
|
|
683
|
+
),
|
|
684
|
+
)
|
cartography/graph/statement.py
CHANGED
|
@@ -56,7 +56,7 @@ class GraphStatement:
|
|
|
56
56
|
|
|
57
57
|
self.parent_job_name = parent_job_name if parent_job_name else None
|
|
58
58
|
self.parent_job_sequence_num = (
|
|
59
|
-
parent_job_sequence_num if parent_job_sequence_num else
|
|
59
|
+
parent_job_sequence_num if parent_job_sequence_num else 1
|
|
60
60
|
)
|
|
61
61
|
|
|
62
62
|
def merge_parameters(self, parameters: Dict) -> None:
|