cartography 0.107.0rc3__py3-none-any.whl → 0.108.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cartography might be problematic. Click here for more details.
- cartography/_version.py +2 -2
- cartography/cli.py +10 -0
- cartography/config.py +5 -0
- cartography/data/indexes.cypher +0 -8
- cartography/data/jobs/cleanup/github_repos_cleanup.json +2 -0
- cartography/intel/aws/__init__.py +1 -0
- cartography/intel/aws/ec2/security_groups.py +140 -122
- cartography/intel/aws/ec2/snapshots.py +47 -84
- cartography/intel/aws/guardduty.py +275 -0
- cartography/intel/aws/resources.py +2 -0
- cartography/intel/github/repos.py +370 -28
- cartography/models/aws/ec2/security_group_rules.py +109 -0
- cartography/models/aws/ec2/security_groups.py +90 -0
- cartography/models/aws/ec2/snapshots.py +58 -0
- cartography/models/aws/ec2/volumes.py +20 -0
- cartography/models/aws/guardduty/__init__.py +1 -0
- cartography/models/aws/guardduty/findings.py +102 -0
- cartography/models/github/dependencies.py +74 -0
- cartography/models/github/manifests.py +49 -0
- {cartography-0.107.0rc3.dist-info → cartography-0.108.0rc1.dist-info}/METADATA +3 -3
- {cartography-0.107.0rc3.dist-info → cartography-0.108.0rc1.dist-info}/RECORD +25 -19
- cartography/data/jobs/cleanup/aws_import_ec2_security_groupinfo_cleanup.json +0 -24
- cartography/data/jobs/cleanup/aws_import_snapshots_cleanup.json +0 -30
- {cartography-0.107.0rc3.dist-info → cartography-0.108.0rc1.dist-info}/WHEEL +0 -0
- {cartography-0.107.0rc3.dist-info → cartography-0.108.0rc1.dist-info}/entry_points.txt +0 -0
- {cartography-0.107.0rc3.dist-info → cartography-0.108.0rc1.dist-info}/licenses/LICENSE +0 -0
- {cartography-0.107.0rc3.dist-info → cartography-0.108.0rc1.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import configparser
|
|
2
2
|
import logging
|
|
3
|
+
from collections import defaultdict
|
|
3
4
|
from collections import namedtuple
|
|
4
5
|
from string import Template
|
|
5
6
|
from typing import Any
|
|
@@ -12,8 +13,12 @@ from packaging.requirements import InvalidRequirement
|
|
|
12
13
|
from packaging.requirements import Requirement
|
|
13
14
|
from packaging.utils import canonicalize_name
|
|
14
15
|
|
|
16
|
+
from cartography.client.core.tx import load as load_data
|
|
17
|
+
from cartography.graph.job import GraphJob
|
|
15
18
|
from cartography.intel.github.util import fetch_all
|
|
16
19
|
from cartography.intel.github.util import PaginatedGraphqlData
|
|
20
|
+
from cartography.models.github.dependencies import GitHubDependencySchema
|
|
21
|
+
from cartography.models.github.manifests import DependencyGraphManifestSchema
|
|
17
22
|
from cartography.util import backoff_handler
|
|
18
23
|
from cartography.util import retries_with_backoff
|
|
19
24
|
from cartography.util import run_cleanup_job
|
|
@@ -93,6 +98,18 @@ GITHUB_ORG_REPOS_PAGINATED_GRAPHQL = """
|
|
|
93
98
|
text
|
|
94
99
|
}
|
|
95
100
|
}
|
|
101
|
+
dependencyGraphManifests(first: 20) {
|
|
102
|
+
nodes {
|
|
103
|
+
blobPath
|
|
104
|
+
dependencies(first: 100) {
|
|
105
|
+
nodes {
|
|
106
|
+
packageName
|
|
107
|
+
requirements
|
|
108
|
+
packageManager
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
96
113
|
}
|
|
97
114
|
}
|
|
98
115
|
}
|
|
@@ -291,8 +308,10 @@ def transform(
|
|
|
291
308
|
:param outside_collaborators: dict of repo URL to list of outside collaborators.
|
|
292
309
|
See tests.data.github.repos.OUTSIDE_COLLABORATORS for data shape.
|
|
293
310
|
:return: Dict containing the repos, repo->language mapping, owners->repo mapping, outside collaborators->repo
|
|
294
|
-
mapping,
|
|
311
|
+
mapping, Python requirements files (if any) in a repo, manifests from GitHub's dependency graph, and all
|
|
312
|
+
dependencies from GitHub's dependency graph.
|
|
295
313
|
"""
|
|
314
|
+
logger.info(f"Processing {len(repos_json)} GitHub repositories")
|
|
296
315
|
transformed_repo_list: List[Dict] = []
|
|
297
316
|
transformed_repo_languages: List[Dict] = []
|
|
298
317
|
transformed_repo_owners: List[Dict] = []
|
|
@@ -312,6 +331,8 @@ def transform(
|
|
|
312
331
|
"WRITE": [],
|
|
313
332
|
}
|
|
314
333
|
transformed_requirements_files: List[Dict] = []
|
|
334
|
+
transformed_dependencies: List[Dict] = []
|
|
335
|
+
transformed_manifests: List[Dict] = []
|
|
315
336
|
for repo_object in repos_json:
|
|
316
337
|
_transform_repo_languages(
|
|
317
338
|
repo_object["url"],
|
|
@@ -350,6 +371,16 @@ def transform(
|
|
|
350
371
|
repo_url,
|
|
351
372
|
transformed_requirements_files,
|
|
352
373
|
)
|
|
374
|
+
_transform_dependency_manifests(
|
|
375
|
+
repo_object.get("dependencyGraphManifests"),
|
|
376
|
+
repo_url,
|
|
377
|
+
transformed_manifests,
|
|
378
|
+
)
|
|
379
|
+
_transform_dependency_graph(
|
|
380
|
+
repo_object.get("dependencyGraphManifests"),
|
|
381
|
+
repo_url,
|
|
382
|
+
transformed_dependencies,
|
|
383
|
+
)
|
|
353
384
|
results = {
|
|
354
385
|
"repos": transformed_repo_list,
|
|
355
386
|
"repo_languages": transformed_repo_languages,
|
|
@@ -357,7 +388,10 @@ def transform(
|
|
|
357
388
|
"repo_outside_collaborators": transformed_outside_collaborators,
|
|
358
389
|
"repo_direct_collaborators": transformed_direct_collaborators,
|
|
359
390
|
"python_requirements": transformed_requirements_files,
|
|
391
|
+
"dependencies": transformed_dependencies,
|
|
392
|
+
"manifests": transformed_manifests,
|
|
360
393
|
}
|
|
394
|
+
|
|
361
395
|
return results
|
|
362
396
|
|
|
363
397
|
|
|
@@ -533,6 +567,185 @@ def _transform_setup_cfg_requirements(
|
|
|
533
567
|
_transform_python_requirements(requirements_list, repo_url, out_requirements_files)
|
|
534
568
|
|
|
535
569
|
|
|
570
|
+
def _transform_dependency_manifests(
|
|
571
|
+
dependency_manifests: Optional[Dict],
|
|
572
|
+
repo_url: str,
|
|
573
|
+
out_manifests_list: List[Dict],
|
|
574
|
+
) -> None:
|
|
575
|
+
"""
|
|
576
|
+
Transform GitHub dependency graph manifests into cartography manifest format.
|
|
577
|
+
:param dependency_manifests: dependencyGraphManifests from GitHub GraphQL API
|
|
578
|
+
:param repo_url: The URL of the GitHub repo
|
|
579
|
+
:param out_manifests_list: Output array to append transformed results to
|
|
580
|
+
:return: Nothing
|
|
581
|
+
"""
|
|
582
|
+
if not dependency_manifests or not dependency_manifests.get("nodes"):
|
|
583
|
+
return
|
|
584
|
+
|
|
585
|
+
manifests_added = 0
|
|
586
|
+
|
|
587
|
+
for manifest in dependency_manifests["nodes"]:
|
|
588
|
+
blob_path = manifest.get("blobPath", "")
|
|
589
|
+
if not blob_path:
|
|
590
|
+
continue
|
|
591
|
+
|
|
592
|
+
# Count dependencies in this manifest
|
|
593
|
+
dependencies = manifest.get("dependencies", {})
|
|
594
|
+
dependencies_count = len(dependencies.get("nodes", []) if dependencies else [])
|
|
595
|
+
|
|
596
|
+
# Create unique manifest ID by combining repo URL and blob path
|
|
597
|
+
manifest_id = f"{repo_url}#{blob_path}"
|
|
598
|
+
|
|
599
|
+
# Extract filename from blob path
|
|
600
|
+
filename = blob_path.split("/")[-1] if blob_path else "None"
|
|
601
|
+
|
|
602
|
+
out_manifests_list.append(
|
|
603
|
+
{
|
|
604
|
+
"id": manifest_id,
|
|
605
|
+
"blob_path": blob_path,
|
|
606
|
+
"filename": filename,
|
|
607
|
+
"dependencies_count": dependencies_count,
|
|
608
|
+
"repo_url": repo_url,
|
|
609
|
+
}
|
|
610
|
+
)
|
|
611
|
+
manifests_added += 1
|
|
612
|
+
|
|
613
|
+
if manifests_added > 0:
|
|
614
|
+
repo_name = repo_url.split("/")[-1] if repo_url else "repository"
|
|
615
|
+
logger.info(f"Found {manifests_added} dependency manifests in {repo_name}")
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
def _transform_dependency_graph(
|
|
619
|
+
dependency_manifests: Optional[Dict],
|
|
620
|
+
repo_url: str,
|
|
621
|
+
out_dependencies_list: List[Dict],
|
|
622
|
+
) -> None:
|
|
623
|
+
"""
|
|
624
|
+
Transform GitHub dependency graph manifests into cartography dependency format.
|
|
625
|
+
:param dependency_manifests: dependencyGraphManifests from GitHub GraphQL API
|
|
626
|
+
:param repo_url: The URL of the GitHub repo
|
|
627
|
+
:param out_dependencies_list: Output array to append transformed results to
|
|
628
|
+
:return: Nothing
|
|
629
|
+
"""
|
|
630
|
+
if not dependency_manifests or not dependency_manifests.get("nodes"):
|
|
631
|
+
return
|
|
632
|
+
|
|
633
|
+
dependencies_added = 0
|
|
634
|
+
|
|
635
|
+
for manifest in dependency_manifests["nodes"]:
|
|
636
|
+
dependencies = manifest.get("dependencies", {})
|
|
637
|
+
if not dependencies or not dependencies.get("nodes"):
|
|
638
|
+
continue
|
|
639
|
+
|
|
640
|
+
manifest_path = manifest.get("blobPath", "")
|
|
641
|
+
|
|
642
|
+
for dep in dependencies["nodes"]:
|
|
643
|
+
package_name = dep.get("packageName")
|
|
644
|
+
if not package_name:
|
|
645
|
+
continue
|
|
646
|
+
|
|
647
|
+
requirements = dep.get("requirements", "")
|
|
648
|
+
package_manager = dep.get("packageManager", "").upper()
|
|
649
|
+
|
|
650
|
+
# Extract version from requirements string if available
|
|
651
|
+
pinned_version = _extract_version_from_requirements(requirements)
|
|
652
|
+
|
|
653
|
+
# Create ecosystem-specific canonical name
|
|
654
|
+
canonical_name = _canonicalize_dependency_name(
|
|
655
|
+
package_name, package_manager
|
|
656
|
+
)
|
|
657
|
+
|
|
658
|
+
# Create ecosystem identifier
|
|
659
|
+
ecosystem = package_manager.lower() if package_manager else "unknown"
|
|
660
|
+
|
|
661
|
+
# Create simple dependency ID using canonical name and version
|
|
662
|
+
# This allows the same dependency to be shared across multiple repos
|
|
663
|
+
dependency_id = (
|
|
664
|
+
f"{canonical_name}|{pinned_version}"
|
|
665
|
+
if pinned_version
|
|
666
|
+
else canonical_name
|
|
667
|
+
)
|
|
668
|
+
|
|
669
|
+
# Normalize requirements field (prefer None over empty string)
|
|
670
|
+
normalized_requirements = requirements if requirements else None
|
|
671
|
+
|
|
672
|
+
# Create manifest ID for the HAS_DEP relationship
|
|
673
|
+
manifest_id = f"{repo_url}#{manifest_path}"
|
|
674
|
+
|
|
675
|
+
out_dependencies_list.append(
|
|
676
|
+
{
|
|
677
|
+
"id": dependency_id,
|
|
678
|
+
"name": canonical_name,
|
|
679
|
+
"original_name": package_name, # Keep original for reference
|
|
680
|
+
"version": pinned_version,
|
|
681
|
+
"requirements": normalized_requirements,
|
|
682
|
+
"ecosystem": ecosystem,
|
|
683
|
+
"package_manager": package_manager,
|
|
684
|
+
"manifest_path": manifest_path,
|
|
685
|
+
"manifest_id": manifest_id,
|
|
686
|
+
"repo_url": repo_url,
|
|
687
|
+
# Add separate fields for easier querying
|
|
688
|
+
"repo_name": repo_url.split("/")[-1] if repo_url else "",
|
|
689
|
+
"manifest_file": (
|
|
690
|
+
manifest_path.split("/")[-1] if manifest_path else ""
|
|
691
|
+
),
|
|
692
|
+
}
|
|
693
|
+
)
|
|
694
|
+
dependencies_added += 1
|
|
695
|
+
|
|
696
|
+
if dependencies_added > 0:
|
|
697
|
+
repo_name = repo_url.split("/")[-1] if repo_url else "repository"
|
|
698
|
+
logger.info(f"Found {dependencies_added} dependencies in {repo_name}")
|
|
699
|
+
|
|
700
|
+
|
|
701
|
+
def _extract_version_from_requirements(requirements: Optional[str]) -> Optional[str]:
|
|
702
|
+
"""
|
|
703
|
+
Extract a pinned version from a requirements string if it exists.
|
|
704
|
+
Examples: "1.2.3" -> "1.2.3", "^1.2.3" -> None, ">=1.0,<2.0" -> None
|
|
705
|
+
"""
|
|
706
|
+
if not requirements or not requirements.strip():
|
|
707
|
+
return None
|
|
708
|
+
|
|
709
|
+
# Handle exact version specifications (no operators)
|
|
710
|
+
if requirements and not any(
|
|
711
|
+
op in requirements for op in ["^", "~", ">", "<", "=", "*"]
|
|
712
|
+
):
|
|
713
|
+
stripped = requirements.strip()
|
|
714
|
+
return stripped if stripped else None
|
|
715
|
+
|
|
716
|
+
# Handle == specifications
|
|
717
|
+
if "==" in requirements:
|
|
718
|
+
parts = requirements.split("==")
|
|
719
|
+
if len(parts) == 2:
|
|
720
|
+
version = parts[1].strip()
|
|
721
|
+
# Remove any trailing constraints
|
|
722
|
+
version = version.split(",")[0].split(" ")[0]
|
|
723
|
+
return version if version else None
|
|
724
|
+
|
|
725
|
+
return None
|
|
726
|
+
|
|
727
|
+
|
|
728
|
+
def _canonicalize_dependency_name(name: str, package_manager: Optional[str]) -> str:
|
|
729
|
+
"""
|
|
730
|
+
Canonicalize dependency names based on ecosystem conventions.
|
|
731
|
+
"""
|
|
732
|
+
if not name:
|
|
733
|
+
return name
|
|
734
|
+
|
|
735
|
+
# For Python packages, use existing canonicalization
|
|
736
|
+
if package_manager in ["PIP", "CONDA"]:
|
|
737
|
+
try:
|
|
738
|
+
from packaging.utils import canonicalize_name
|
|
739
|
+
|
|
740
|
+
return str(canonicalize_name(name))
|
|
741
|
+
except ImportError:
|
|
742
|
+
# Fallback if packaging not available
|
|
743
|
+
return name.lower().replace("_", "-")
|
|
744
|
+
|
|
745
|
+
# For other ecosystems, use lowercase
|
|
746
|
+
return name.lower()
|
|
747
|
+
|
|
748
|
+
|
|
536
749
|
def _transform_python_requirements(
|
|
537
750
|
requirements_list: List[str],
|
|
538
751
|
repo_url: str,
|
|
@@ -785,6 +998,136 @@ def load_collaborators(
|
|
|
785
998
|
)
|
|
786
999
|
|
|
787
1000
|
|
|
1001
|
+
@timeit
|
|
1002
|
+
def load_python_requirements(
|
|
1003
|
+
neo4j_session: neo4j.Session,
|
|
1004
|
+
update_tag: int,
|
|
1005
|
+
requirements_objects: List[Dict],
|
|
1006
|
+
) -> None:
|
|
1007
|
+
query = """
|
|
1008
|
+
UNWIND $Requirements AS req
|
|
1009
|
+
MERGE (lib:PythonLibrary:Dependency{id: req.id})
|
|
1010
|
+
ON CREATE SET lib.firstseen = timestamp(),
|
|
1011
|
+
lib.name = req.name
|
|
1012
|
+
SET lib.lastupdated = $UpdateTag,
|
|
1013
|
+
lib.version = req.version
|
|
1014
|
+
|
|
1015
|
+
WITH lib, req
|
|
1016
|
+
MATCH (repo:GitHubRepository{id: req.repo_url})
|
|
1017
|
+
MERGE (repo)-[r:REQUIRES]->(lib)
|
|
1018
|
+
ON CREATE SET r.firstseen = timestamp()
|
|
1019
|
+
SET r.lastupdated = $UpdateTag,
|
|
1020
|
+
r.specifier = req.specifier
|
|
1021
|
+
"""
|
|
1022
|
+
neo4j_session.run(
|
|
1023
|
+
query,
|
|
1024
|
+
Requirements=requirements_objects,
|
|
1025
|
+
UpdateTag=update_tag,
|
|
1026
|
+
)
|
|
1027
|
+
|
|
1028
|
+
|
|
1029
|
+
@timeit
|
|
1030
|
+
def load_github_dependencies(
|
|
1031
|
+
neo4j_session: neo4j.Session,
|
|
1032
|
+
update_tag: int,
|
|
1033
|
+
dependencies: List[Dict],
|
|
1034
|
+
) -> None:
|
|
1035
|
+
"""
|
|
1036
|
+
Ingest GitHub dependency data into Neo4j using the new data model
|
|
1037
|
+
:param neo4j_session: Neo4J session object for server communication
|
|
1038
|
+
:param update_tag: Timestamp used to determine data freshness
|
|
1039
|
+
:param dependencies: List of dependency objects from GitHub's dependency graph
|
|
1040
|
+
:return: Nothing
|
|
1041
|
+
"""
|
|
1042
|
+
# Group dependencies by both repo_url and manifest_id for schema-based loading
|
|
1043
|
+
dependencies_by_repo_and_manifest = defaultdict(list)
|
|
1044
|
+
|
|
1045
|
+
for dep in dependencies:
|
|
1046
|
+
repo_url = dep["repo_url"]
|
|
1047
|
+
manifest_id = dep["manifest_id"]
|
|
1048
|
+
# Create a key combining both repo_url and manifest_id
|
|
1049
|
+
group_key = (repo_url, manifest_id)
|
|
1050
|
+
# Remove repo_url and manifest_id from the dependency object since we'll pass them as kwargs
|
|
1051
|
+
dep_without_kwargs = {
|
|
1052
|
+
k: v for k, v in dep.items() if k not in ["repo_url", "manifest_id"]
|
|
1053
|
+
}
|
|
1054
|
+
dependencies_by_repo_and_manifest[group_key].append(dep_without_kwargs)
|
|
1055
|
+
|
|
1056
|
+
# Load dependencies for each repository/manifest combination separately
|
|
1057
|
+
for (
|
|
1058
|
+
repo_url,
|
|
1059
|
+
manifest_id,
|
|
1060
|
+
), group_dependencies in dependencies_by_repo_and_manifest.items():
|
|
1061
|
+
load_data(
|
|
1062
|
+
neo4j_session,
|
|
1063
|
+
GitHubDependencySchema(),
|
|
1064
|
+
group_dependencies,
|
|
1065
|
+
lastupdated=update_tag,
|
|
1066
|
+
repo_url=repo_url,
|
|
1067
|
+
manifest_id=manifest_id,
|
|
1068
|
+
)
|
|
1069
|
+
|
|
1070
|
+
|
|
1071
|
+
@timeit
|
|
1072
|
+
def load_github_dependency_manifests(
|
|
1073
|
+
neo4j_session: neo4j.Session,
|
|
1074
|
+
update_tag: int,
|
|
1075
|
+
manifests: List[Dict],
|
|
1076
|
+
) -> None:
|
|
1077
|
+
"""
|
|
1078
|
+
Ingest GitHub dependency manifests into Neo4j
|
|
1079
|
+
"""
|
|
1080
|
+
manifests_by_repo = defaultdict(list)
|
|
1081
|
+
|
|
1082
|
+
for manifest in manifests:
|
|
1083
|
+
repo_url = manifest["repo_url"]
|
|
1084
|
+
manifests_by_repo[repo_url].append(manifest)
|
|
1085
|
+
|
|
1086
|
+
# Load manifests for each repository separately
|
|
1087
|
+
for repo_url, repo_manifests in manifests_by_repo.items():
|
|
1088
|
+
load_data(
|
|
1089
|
+
neo4j_session,
|
|
1090
|
+
DependencyGraphManifestSchema(),
|
|
1091
|
+
repo_manifests,
|
|
1092
|
+
lastupdated=update_tag,
|
|
1093
|
+
repo_url=repo_url,
|
|
1094
|
+
)
|
|
1095
|
+
|
|
1096
|
+
|
|
1097
|
+
@timeit
|
|
1098
|
+
def cleanup_github_dependencies(
|
|
1099
|
+
neo4j_session: neo4j.Session,
|
|
1100
|
+
common_job_parameters: Dict[str, Any],
|
|
1101
|
+
repo_urls: List[str],
|
|
1102
|
+
) -> None:
|
|
1103
|
+
# Run cleanup for each repository separately
|
|
1104
|
+
for repo_url in repo_urls:
|
|
1105
|
+
cleanup_params = {**common_job_parameters, "repo_url": repo_url}
|
|
1106
|
+
GraphJob.from_node_schema(GitHubDependencySchema(), cleanup_params).run(
|
|
1107
|
+
neo4j_session
|
|
1108
|
+
)
|
|
1109
|
+
|
|
1110
|
+
|
|
1111
|
+
@timeit
|
|
1112
|
+
def cleanup_github_manifests(
|
|
1113
|
+
neo4j_session: neo4j.Session,
|
|
1114
|
+
common_job_parameters: Dict[str, Any],
|
|
1115
|
+
repo_urls: List[str],
|
|
1116
|
+
) -> None:
|
|
1117
|
+
"""
|
|
1118
|
+
Delete GitHub dependency manifests and their relationships from the graph if they were not updated in the last sync.
|
|
1119
|
+
:param neo4j_session: Neo4j session
|
|
1120
|
+
:param common_job_parameters: Common job parameters containing UPDATE_TAG
|
|
1121
|
+
:param repo_urls: List of repository URLs to clean up manifests for
|
|
1122
|
+
"""
|
|
1123
|
+
# Run cleanup for each repository separately
|
|
1124
|
+
for repo_url in repo_urls:
|
|
1125
|
+
cleanup_params = {**common_job_parameters, "repo_url": repo_url}
|
|
1126
|
+
GraphJob.from_node_schema(DependencyGraphManifestSchema(), cleanup_params).run(
|
|
1127
|
+
neo4j_session
|
|
1128
|
+
)
|
|
1129
|
+
|
|
1130
|
+
|
|
788
1131
|
@timeit
|
|
789
1132
|
def load(
|
|
790
1133
|
neo4j_session: neo4j.Session,
|
|
@@ -823,33 +1166,15 @@ def load(
|
|
|
823
1166
|
common_job_parameters["UPDATE_TAG"],
|
|
824
1167
|
repo_data["python_requirements"],
|
|
825
1168
|
)
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
UNWIND $Requirements AS req
|
|
836
|
-
MERGE (lib:PythonLibrary:Dependency{id: req.id})
|
|
837
|
-
ON CREATE SET lib.firstseen = timestamp(),
|
|
838
|
-
lib.name = req.name
|
|
839
|
-
SET lib.lastupdated = $UpdateTag,
|
|
840
|
-
lib.version = req.version
|
|
841
|
-
|
|
842
|
-
WITH lib, req
|
|
843
|
-
MATCH (repo:GitHubRepository{id: req.repo_url})
|
|
844
|
-
MERGE (repo)-[r:REQUIRES]->(lib)
|
|
845
|
-
ON CREATE SET r.firstseen = timestamp()
|
|
846
|
-
SET r.lastupdated = $UpdateTag,
|
|
847
|
-
r.specifier = req.specifier
|
|
848
|
-
"""
|
|
849
|
-
neo4j_session.run(
|
|
850
|
-
query,
|
|
851
|
-
Requirements=requirements_objects,
|
|
852
|
-
UpdateTag=update_tag,
|
|
1169
|
+
load_github_dependency_manifests(
|
|
1170
|
+
neo4j_session,
|
|
1171
|
+
common_job_parameters["UPDATE_TAG"],
|
|
1172
|
+
repo_data["manifests"],
|
|
1173
|
+
)
|
|
1174
|
+
load_github_dependencies(
|
|
1175
|
+
neo4j_session,
|
|
1176
|
+
common_job_parameters["UPDATE_TAG"],
|
|
1177
|
+
repo_data["dependencies"],
|
|
853
1178
|
)
|
|
854
1179
|
|
|
855
1180
|
|
|
@@ -896,4 +1221,21 @@ def sync(
|
|
|
896
1221
|
)
|
|
897
1222
|
repo_data = transform(repos_json, direct_collabs, outside_collabs)
|
|
898
1223
|
load(neo4j_session, common_job_parameters, repo_data)
|
|
1224
|
+
|
|
1225
|
+
# Collect repository URLs that have dependencies for cleanup
|
|
1226
|
+
repo_urls_with_dependencies = list(
|
|
1227
|
+
{dep["repo_url"] for dep in repo_data["dependencies"]}
|
|
1228
|
+
)
|
|
1229
|
+
cleanup_github_dependencies(
|
|
1230
|
+
neo4j_session, common_job_parameters, repo_urls_with_dependencies
|
|
1231
|
+
)
|
|
1232
|
+
|
|
1233
|
+
# Collect repository URLs that have manifests for cleanup
|
|
1234
|
+
repo_urls_with_manifests = list(
|
|
1235
|
+
{manifest["repo_url"] for manifest in repo_data["manifests"]}
|
|
1236
|
+
)
|
|
1237
|
+
cleanup_github_manifests(
|
|
1238
|
+
neo4j_session, common_job_parameters, repo_urls_with_manifests
|
|
1239
|
+
)
|
|
1240
|
+
|
|
899
1241
|
run_cleanup_job("github_repos_cleanup.json", neo4j_session, common_job_parameters)
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
from cartography.models.core.common import PropertyRef
|
|
4
|
+
from cartography.models.core.nodes import CartographyNodeProperties
|
|
5
|
+
from cartography.models.core.nodes import CartographyNodeSchema
|
|
6
|
+
from cartography.models.core.nodes import ExtraNodeLabels
|
|
7
|
+
from cartography.models.core.relationships import CartographyRelProperties
|
|
8
|
+
from cartography.models.core.relationships import CartographyRelSchema
|
|
9
|
+
from cartography.models.core.relationships import LinkDirection
|
|
10
|
+
from cartography.models.core.relationships import make_target_node_matcher
|
|
11
|
+
from cartography.models.core.relationships import OtherRelationships
|
|
12
|
+
from cartography.models.core.relationships import TargetNodeMatcher
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(frozen=True)
|
|
16
|
+
class IpRuleNodeProperties(CartographyNodeProperties):
|
|
17
|
+
id: PropertyRef = PropertyRef("RuleId")
|
|
18
|
+
ruleid: PropertyRef = PropertyRef("RuleId", extra_index=True)
|
|
19
|
+
groupid: PropertyRef = PropertyRef("GroupId", extra_index=True)
|
|
20
|
+
protocol: PropertyRef = PropertyRef("Protocol")
|
|
21
|
+
fromport: PropertyRef = PropertyRef("FromPort")
|
|
22
|
+
toport: PropertyRef = PropertyRef("ToPort")
|
|
23
|
+
lastupdated: PropertyRef = PropertyRef("lastupdated", set_in_kwargs=True)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass(frozen=True)
|
|
27
|
+
class IpRuleToAWSAccountRelProperties(CartographyRelProperties):
|
|
28
|
+
lastupdated: PropertyRef = PropertyRef("lastupdated", set_in_kwargs=True)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass(frozen=True)
|
|
32
|
+
class IpRuleToAWSAccountRel(CartographyRelSchema):
|
|
33
|
+
target_node_label: str = "AWSAccount"
|
|
34
|
+
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
|
|
35
|
+
{"id": PropertyRef("AWS_ID", set_in_kwargs=True)}
|
|
36
|
+
)
|
|
37
|
+
direction: LinkDirection = LinkDirection.INWARD
|
|
38
|
+
rel_label: str = "RESOURCE"
|
|
39
|
+
properties: IpRuleToAWSAccountRelProperties = IpRuleToAWSAccountRelProperties()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass(frozen=True)
|
|
43
|
+
class IpRuleToSecurityGroupRelProperties(CartographyRelProperties):
|
|
44
|
+
lastupdated: PropertyRef = PropertyRef("lastupdated", set_in_kwargs=True)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass(frozen=True)
|
|
48
|
+
class IpRuleToSecurityGroupRel(CartographyRelSchema):
|
|
49
|
+
target_node_label: str = "EC2SecurityGroup"
|
|
50
|
+
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
|
|
51
|
+
{"groupid": PropertyRef("GroupId")}
|
|
52
|
+
)
|
|
53
|
+
direction: LinkDirection = LinkDirection.OUTWARD
|
|
54
|
+
rel_label: str = "MEMBER_OF_EC2_SECURITY_GROUP"
|
|
55
|
+
properties: IpRuleToSecurityGroupRelProperties = (
|
|
56
|
+
IpRuleToSecurityGroupRelProperties()
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass(frozen=True)
|
|
61
|
+
class IpRangeNodeProperties(CartographyNodeProperties):
|
|
62
|
+
id: PropertyRef = PropertyRef("RangeId")
|
|
63
|
+
range: PropertyRef = PropertyRef("RangeId")
|
|
64
|
+
lastupdated: PropertyRef = PropertyRef("lastupdated", set_in_kwargs=True)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@dataclass(frozen=True)
|
|
68
|
+
class IpRangeToIpRuleRelProperties(CartographyRelProperties):
|
|
69
|
+
lastupdated: PropertyRef = PropertyRef("lastupdated", set_in_kwargs=True)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@dataclass(frozen=True)
|
|
73
|
+
class IpRangeToIpRuleRel(CartographyRelSchema):
|
|
74
|
+
target_node_label: str = "IpRule"
|
|
75
|
+
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
|
|
76
|
+
{"ruleid": PropertyRef("RuleId")}
|
|
77
|
+
)
|
|
78
|
+
direction: LinkDirection = LinkDirection.OUTWARD
|
|
79
|
+
rel_label: str = "MEMBER_OF_IP_RULE"
|
|
80
|
+
properties: IpRangeToIpRuleRelProperties = IpRangeToIpRuleRelProperties()
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@dataclass(frozen=True)
|
|
84
|
+
class IpRuleSchema(CartographyNodeSchema):
|
|
85
|
+
label: str = "IpRule"
|
|
86
|
+
properties: IpRuleNodeProperties = IpRuleNodeProperties()
|
|
87
|
+
sub_resource_relationship: IpRuleToAWSAccountRel = IpRuleToAWSAccountRel()
|
|
88
|
+
other_relationships: OtherRelationships = OtherRelationships(
|
|
89
|
+
[IpRuleToSecurityGroupRel()]
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@dataclass(frozen=True)
|
|
94
|
+
class IpPermissionInboundSchema(CartographyNodeSchema):
|
|
95
|
+
label: str = "IpRule"
|
|
96
|
+
extra_node_labels: ExtraNodeLabels = ExtraNodeLabels(["IpPermissionInbound"])
|
|
97
|
+
properties: IpRuleNodeProperties = IpRuleNodeProperties()
|
|
98
|
+
sub_resource_relationship: IpRuleToAWSAccountRel = IpRuleToAWSAccountRel()
|
|
99
|
+
other_relationships: OtherRelationships = OtherRelationships(
|
|
100
|
+
[IpRuleToSecurityGroupRel()]
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@dataclass(frozen=True)
|
|
105
|
+
class IpRangeSchema(CartographyNodeSchema):
|
|
106
|
+
label: str = "IpRange"
|
|
107
|
+
properties: IpRangeNodeProperties = IpRangeNodeProperties()
|
|
108
|
+
sub_resource_relationship: IpRuleToAWSAccountRel = IpRuleToAWSAccountRel()
|
|
109
|
+
other_relationships: OtherRelationships = OtherRelationships([IpRangeToIpRuleRel()])
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
from cartography.models.core.common import PropertyRef
|
|
4
|
+
from cartography.models.core.nodes import CartographyNodeProperties
|
|
5
|
+
from cartography.models.core.nodes import CartographyNodeSchema
|
|
6
|
+
from cartography.models.core.relationships import CartographyRelProperties
|
|
7
|
+
from cartography.models.core.relationships import CartographyRelSchema
|
|
8
|
+
from cartography.models.core.relationships import LinkDirection
|
|
9
|
+
from cartography.models.core.relationships import make_target_node_matcher
|
|
10
|
+
from cartography.models.core.relationships import OtherRelationships
|
|
11
|
+
from cartography.models.core.relationships import TargetNodeMatcher
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass(frozen=True)
|
|
15
|
+
class EC2SecurityGroupNodeProperties(CartographyNodeProperties):
|
|
16
|
+
id: PropertyRef = PropertyRef("GroupId")
|
|
17
|
+
groupid: PropertyRef = PropertyRef("GroupId", extra_index=True)
|
|
18
|
+
name: PropertyRef = PropertyRef("GroupName")
|
|
19
|
+
description: PropertyRef = PropertyRef("Description")
|
|
20
|
+
region: PropertyRef = PropertyRef("Region", set_in_kwargs=True)
|
|
21
|
+
lastupdated: PropertyRef = PropertyRef("lastupdated", set_in_kwargs=True)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass(frozen=True)
|
|
25
|
+
class EC2SecurityGroupToAWSAccountRelProperties(CartographyRelProperties):
|
|
26
|
+
lastupdated: PropertyRef = PropertyRef("lastupdated", set_in_kwargs=True)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass(frozen=True)
|
|
30
|
+
class EC2SecurityGroupToAWSAccountRel(CartographyRelSchema):
|
|
31
|
+
target_node_label: str = "AWSAccount"
|
|
32
|
+
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
|
|
33
|
+
{"id": PropertyRef("AWS_ID", set_in_kwargs=True)}
|
|
34
|
+
)
|
|
35
|
+
direction: LinkDirection = LinkDirection.INWARD
|
|
36
|
+
rel_label: str = "RESOURCE"
|
|
37
|
+
properties: EC2SecurityGroupToAWSAccountRelProperties = (
|
|
38
|
+
EC2SecurityGroupToAWSAccountRelProperties()
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass(frozen=True)
|
|
43
|
+
class EC2SecurityGroupToVpcRelProperties(CartographyRelProperties):
|
|
44
|
+
lastupdated: PropertyRef = PropertyRef("lastupdated", set_in_kwargs=True)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass(frozen=True)
|
|
48
|
+
class EC2SecurityGroupToVpcRel(CartographyRelSchema):
|
|
49
|
+
target_node_label: str = "AWSVpc"
|
|
50
|
+
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
|
|
51
|
+
{"vpcid": PropertyRef("VpcId")}
|
|
52
|
+
)
|
|
53
|
+
direction: LinkDirection = LinkDirection.OUTWARD
|
|
54
|
+
rel_label: str = "MEMBER_OF_EC2_SECURITY_GROUP"
|
|
55
|
+
properties: EC2SecurityGroupToVpcRelProperties = (
|
|
56
|
+
EC2SecurityGroupToVpcRelProperties()
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass(frozen=True)
|
|
61
|
+
class EC2SecurityGroupToSourceGroupRelProperties(CartographyRelProperties):
|
|
62
|
+
lastupdated: PropertyRef = PropertyRef("lastupdated", set_in_kwargs=True)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dataclass(frozen=True)
|
|
66
|
+
class EC2SecurityGroupToSourceGroupRel(CartographyRelSchema):
|
|
67
|
+
target_node_label: str = "EC2SecurityGroup"
|
|
68
|
+
target_node_matcher: TargetNodeMatcher = make_target_node_matcher(
|
|
69
|
+
{"groupid": PropertyRef("SOURCE_GROUP_IDS", one_to_many=True)}
|
|
70
|
+
)
|
|
71
|
+
direction: LinkDirection = LinkDirection.OUTWARD
|
|
72
|
+
rel_label: str = "ALLOWS_TRAFFIC_FROM"
|
|
73
|
+
properties: EC2SecurityGroupToSourceGroupRelProperties = (
|
|
74
|
+
EC2SecurityGroupToSourceGroupRelProperties()
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass(frozen=True)
|
|
79
|
+
class EC2SecurityGroupSchema(CartographyNodeSchema):
|
|
80
|
+
label: str = "EC2SecurityGroup"
|
|
81
|
+
properties: EC2SecurityGroupNodeProperties = EC2SecurityGroupNodeProperties()
|
|
82
|
+
sub_resource_relationship: EC2SecurityGroupToAWSAccountRel = (
|
|
83
|
+
EC2SecurityGroupToAWSAccountRel()
|
|
84
|
+
)
|
|
85
|
+
other_relationships: OtherRelationships = OtherRelationships(
|
|
86
|
+
[
|
|
87
|
+
EC2SecurityGroupToVpcRel(),
|
|
88
|
+
EC2SecurityGroupToSourceGroupRel(),
|
|
89
|
+
]
|
|
90
|
+
)
|