cartography 0.94.0rc2__py3-none-any.whl → 0.95.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cartography might be problematic. Click here for more details.

Files changed (34) hide show
  1. cartography/cli.py +42 -24
  2. cartography/config.py +12 -8
  3. cartography/data/indexes.cypher +0 -2
  4. cartography/data/jobs/scoped_analysis/semgrep_sca_risk_analysis.json +13 -13
  5. cartography/driftdetect/cli.py +1 -1
  6. cartography/graph/job.py +8 -1
  7. cartography/intel/aws/permission_relationships.py +6 -2
  8. cartography/intel/gcp/__init__.py +110 -23
  9. cartography/intel/kandji/__init__.py +1 -1
  10. cartography/intel/semgrep/__init__.py +9 -2
  11. cartography/intel/semgrep/dependencies.py +201 -0
  12. cartography/intel/semgrep/deployment.py +67 -0
  13. cartography/intel/semgrep/findings.py +126 -110
  14. cartography/intel/snipeit/__init__.py +30 -0
  15. cartography/intel/snipeit/asset.py +74 -0
  16. cartography/intel/snipeit/user.py +75 -0
  17. cartography/intel/snipeit/util.py +35 -0
  18. cartography/models/semgrep/dependencies.py +77 -0
  19. cartography/models/semgrep/findings.py +3 -1
  20. cartography/models/snipeit/__init__.py +0 -0
  21. cartography/models/snipeit/asset.py +81 -0
  22. cartography/models/snipeit/tenant.py +17 -0
  23. cartography/models/snipeit/user.py +49 -0
  24. cartography/sync.py +2 -2
  25. {cartography-0.94.0rc2.dist-info → cartography-0.95.0.dist-info}/LICENSE +1 -1
  26. {cartography-0.94.0rc2.dist-info → cartography-0.95.0.dist-info}/METADATA +3 -5
  27. {cartography-0.94.0rc2.dist-info → cartography-0.95.0.dist-info}/RECORD +30 -23
  28. {cartography-0.94.0rc2.dist-info → cartography-0.95.0.dist-info}/WHEEL +1 -1
  29. cartography/data/jobs/cleanup/crxcavator_import_cleanup.json +0 -18
  30. cartography/intel/crxcavator/__init__.py +0 -44
  31. cartography/intel/crxcavator/crxcavator.py +0 -329
  32. cartography-0.94.0rc2.dist-info/NOTICE +0 -4
  33. {cartography-0.94.0rc2.dist-info → cartography-0.95.0.dist-info}/entry_points.txt +0 -0
  34. {cartography-0.94.0rc2.dist-info → cartography-0.95.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,201 @@
1
+ import logging
2
+ from typing import Any
3
+ from typing import Callable
4
+ from typing import Dict
5
+ from typing import List
6
+
7
+ import neo4j
8
+ import requests
9
+ from requests.exceptions import HTTPError
10
+ from requests.exceptions import ReadTimeout
11
+
12
+ from cartography.client.core.tx import load
13
+ from cartography.graph.job import GraphJob
14
+ from cartography.models.semgrep.dependencies import SemgrepGoLibrarySchema
15
+ from cartography.stats import get_stats_client
16
+ from cartography.util import merge_module_sync_metadata
17
+ from cartography.util import timeit
18
+
19
+ logger = logging.getLogger(__name__)
20
+ stat_handler = get_stats_client(__name__)
21
+ _PAGE_SIZE = 10000
22
+ _TIMEOUT = (60, 60)
23
+ _MAX_RETRIES = 3
24
+
25
+
26
+ @timeit
27
+ def get_dependencies(semgrep_app_token: str, deployment_id: str, ecosystems: List[str]) -> List[Dict[str, Any]]:
28
+ """
29
+ Gets all dependencies for the given ecosystems within the given Semgrep deployment ID.
30
+ param: semgrep_app_token: The Semgrep App token to use for authentication.
31
+ param: deployment_id: The Semgrep deployment ID to use for retrieving dependencies.
32
+ param: ecosystems: One or more ecosystems to import dependencies from, e.g. "gomod" or "pypi".
33
+ The list of supported ecosystems is defined here:
34
+ https://semgrep.dev/api/v1/docs/#tag/SupplyChainService/operation/semgrep_app.products.sca.handlers.dependency.list_dependencies_conexxion
35
+ """
36
+ all_deps = []
37
+ deps_url = f"https://semgrep.dev/api/v1/deployments/{deployment_id}/dependencies"
38
+ has_more = True
39
+ page = 0
40
+ retries = 0
41
+ headers = {
42
+ "Content-Type": "application/json",
43
+ "Authorization": f"Bearer {semgrep_app_token}",
44
+ }
45
+
46
+ request_data: dict[str, Any] = {
47
+ "pageSize": _PAGE_SIZE,
48
+ "dependencyFilter": {
49
+ "ecosystem": ecosystems,
50
+ },
51
+ }
52
+
53
+ logger.info(f"Retrieving Semgrep dependencies for deployment '{deployment_id}'.")
54
+ while has_more:
55
+ try:
56
+ response = requests.post(deps_url, json=request_data, headers=headers, timeout=_TIMEOUT)
57
+ response.raise_for_status()
58
+ data = response.json()
59
+ except (ReadTimeout, HTTPError):
60
+ logger.warning(f"Failed to retrieve Semgrep dependencies for page {page}. Retrying...")
61
+ retries += 1
62
+ if retries >= _MAX_RETRIES:
63
+ raise
64
+ continue
65
+ deps = data.get("dependencies", [])
66
+ has_more = data.get("hasMore", False)
67
+ logger.info(f"Processed page {page} of Semgrep dependencies.")
68
+ all_deps.extend(deps)
69
+ retries = 0
70
+ page += 1
71
+ request_data["cursor"] = data.get("cursor")
72
+
73
+ logger.info(f"Retrieved {len(all_deps)} Semgrep dependencies in {page} pages.")
74
+ return all_deps
75
+
76
+
77
+ def transform_dependencies(raw_deps: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
78
+ """
79
+ Transforms the raw dependencies response from Semgrep API into a list of dicts
80
+ that can be used to create the Dependency nodes.
81
+ """
82
+
83
+ """
84
+ sample raw_dep as of November 2024:
85
+ {
86
+ "repositoryId": "123456",
87
+ "definedAt": {
88
+ "path": "go.mod",
89
+ "startLine": "6",
90
+ "endLine": "6",
91
+ "url": "https://github.com/org/repo-name/blob/00000000000000000000000000000000/go.mod#L6",
92
+ "committedAt": "1970-01-01T00:00:00Z",
93
+ "startCol": "0",
94
+ "endCol": "0"
95
+ },
96
+ "transitivity": "DIRECT",
97
+ "package": {
98
+ "name": "github.com/foo/bar",
99
+ "versionSpecifier": "1.2.3"
100
+ },
101
+ "ecosystem": "gomod",
102
+ "licenses": [],
103
+ "pathToTransitivity": []
104
+ },
105
+ """
106
+ deps = []
107
+ for raw_dep in raw_deps:
108
+
109
+ # We could call a different endpoint to get all repo IDs and store a mapping of repo ID to URL,
110
+ # but it's much simpler to just extract the URL from the definedAt field.
111
+ repo_url = raw_dep["definedAt"]["url"].split("/blob/", 1)[0]
112
+
113
+ name = raw_dep["package"]["name"]
114
+ version = raw_dep["package"]["versionSpecifier"]
115
+ id = f"{name}|{version}"
116
+
117
+ # As of November 2024, Semgrep does not import dependencies with version specifiers such as >, <, etc.
118
+ # For now, hardcode the specifier to ==<version> to align with GitHub-sourced Python dependencies.
119
+ # If Semgrep eventually supports version specifiers, update this line accordingly.
120
+ specifier = f"=={version}"
121
+
122
+ deps.append({
123
+ # existing dependency properties:
124
+ "id": id,
125
+ "name": name,
126
+ "specifier": specifier,
127
+ "version": version,
128
+ "repo_url": repo_url,
129
+
130
+ # Semgrep-specific properties:
131
+ "ecosystem": raw_dep["ecosystem"],
132
+ "transitivity": raw_dep["transitivity"].lower(),
133
+ "url": raw_dep["definedAt"]["url"],
134
+ })
135
+
136
+ return deps
137
+
138
+
139
+ @timeit
140
+ def load_dependencies(
141
+ neo4j_session: neo4j.Session,
142
+ dependency_schema: Callable,
143
+ dependencies: List[Dict],
144
+ deployment_id: str,
145
+ update_tag: int,
146
+ ) -> None:
147
+ logger.info(f"Loading {len(dependencies)} {dependency_schema().label} objects into the graph.")
148
+ load(
149
+ neo4j_session,
150
+ dependency_schema(),
151
+ dependencies,
152
+ lastupdated=update_tag,
153
+ DEPLOYMENT_ID=deployment_id,
154
+ )
155
+
156
+
157
+ @timeit
158
+ def cleanup(
159
+ neo4j_session: neo4j.Session,
160
+ common_job_parameters: Dict[str, Any],
161
+ ) -> None:
162
+ logger.info("Running Semgrep Go Library cleanup job.")
163
+ go_libraries_cleanup_job = GraphJob.from_node_schema(
164
+ SemgrepGoLibrarySchema(), common_job_parameters,
165
+ )
166
+ go_libraries_cleanup_job.run(neo4j_session)
167
+
168
+
169
+ @timeit
170
+ def sync_dependencies(
171
+ neo4j_session: neo4j.Session,
172
+ semgrep_app_token: str,
173
+ update_tag: int,
174
+ common_job_parameters: Dict[str, Any],
175
+ ) -> None:
176
+
177
+ deployment_id = common_job_parameters.get("DEPLOYMENT_ID")
178
+ if not deployment_id:
179
+ logger.warning(
180
+ "Missing Semgrep deployment ID, ensure that sync_deployment() has been called."
181
+ "Skipping Semgrep dependencies sync job.",
182
+ )
183
+ return
184
+
185
+ logger.info("Running Semgrep dependencies sync job.")
186
+
187
+ # fetch and load dependencies for the Go ecosystem
188
+ raw_go_deps = get_dependencies(semgrep_app_token, deployment_id, ecosystems=["gomod"])
189
+ go_deps = transform_dependencies(raw_go_deps)
190
+ load_dependencies(neo4j_session, SemgrepGoLibrarySchema, go_deps, deployment_id, update_tag)
191
+
192
+ cleanup(neo4j_session, common_job_parameters)
193
+
194
+ merge_module_sync_metadata(
195
+ neo4j_session=neo4j_session,
196
+ group_type='Semgrep',
197
+ group_id=deployment_id,
198
+ synced_type='SemgrepDependency',
199
+ update_tag=update_tag,
200
+ stat_handler=stat_handler,
201
+ )
@@ -0,0 +1,67 @@
1
+ import logging
2
+ from typing import Any
3
+ from typing import Dict
4
+
5
+ import neo4j
6
+ import requests
7
+
8
+ from cartography.client.core.tx import load
9
+ from cartography.models.semgrep.deployment import SemgrepDeploymentSchema
10
+ from cartography.stats import get_stats_client
11
+ from cartography.util import timeit
12
+
13
+ logger = logging.getLogger(__name__)
14
+ stat_handler = get_stats_client(__name__)
15
+ _TIMEOUT = (60, 60)
16
+
17
+
18
+ @timeit
19
+ def get_deployment(semgrep_app_token: str) -> Dict[str, Any]:
20
+ """
21
+ Gets the deployment associated with the passed Semgrep App token.
22
+ param: semgrep_app_token: The Semgrep App token to use for authentication.
23
+ """
24
+ deployment = {}
25
+ deployment_url = "https://semgrep.dev/api/v1/deployments"
26
+ headers = {
27
+ "Content-Type": "application/json",
28
+ "Authorization": f"Bearer {semgrep_app_token}",
29
+ }
30
+ response = requests.get(deployment_url, headers=headers, timeout=_TIMEOUT)
31
+ response.raise_for_status()
32
+
33
+ data = response.json()
34
+ deployment["id"] = data["deployments"][0]["id"]
35
+ deployment["name"] = data["deployments"][0]["name"]
36
+ deployment["slug"] = data["deployments"][0]["slug"]
37
+
38
+ return deployment
39
+
40
+
41
+ @timeit
42
+ def load_semgrep_deployment(
43
+ neo4j_session: neo4j.Session, deployment: Dict[str, Any], update_tag: int,
44
+ ) -> None:
45
+ logger.info(f"Loading SemgrepDeployment {deployment} into the graph.")
46
+ load(
47
+ neo4j_session,
48
+ SemgrepDeploymentSchema(),
49
+ [deployment],
50
+ lastupdated=update_tag,
51
+ )
52
+
53
+
54
+ @timeit
55
+ def sync_deployment(
56
+ neo4j_session: neo4j.Session,
57
+ semgrep_app_token: str,
58
+ update_tag: int,
59
+ common_job_parameters: Dict[str, Any],
60
+ ) -> None:
61
+
62
+ semgrep_deployment = get_deployment(semgrep_app_token)
63
+ deployment_id = semgrep_deployment["id"]
64
+ deployment_slug = semgrep_deployment["slug"]
65
+ load_semgrep_deployment(neo4j_session, semgrep_deployment, update_tag)
66
+ common_job_parameters["DEPLOYMENT_ID"] = deployment_id
67
+ common_job_parameters["DEPLOYMENT_SLUG"] = deployment_slug
@@ -3,14 +3,14 @@ from typing import Any
3
3
  from typing import Dict
4
4
  from typing import List
5
5
  from typing import Tuple
6
- from urllib.error import HTTPError
7
6
 
8
7
  import neo4j
9
8
  import requests
9
+ from requests.exceptions import HTTPError
10
+ from requests.exceptions import ReadTimeout
10
11
 
11
12
  from cartography.client.core.tx import load
12
13
  from cartography.graph.job import GraphJob
13
- from cartography.models.semgrep.deployment import SemgrepDeploymentSchema
14
14
  from cartography.models.semgrep.findings import SemgrepSCAFindingSchema
15
15
  from cartography.models.semgrep.locations import SemgrepSCALocationSchema
16
16
  from cartography.stats import get_stats_client
@@ -20,88 +20,97 @@ from cartography.util import timeit
20
20
 
21
21
  logger = logging.getLogger(__name__)
22
22
  stat_handler = get_stats_client(__name__)
23
+ _PAGE_SIZE = 500
23
24
  _TIMEOUT = (60, 60)
24
25
  _MAX_RETRIES = 3
25
26
 
26
27
 
27
28
  @timeit
28
- def get_deployment(semgrep_app_token: str) -> Dict[str, Any]:
29
- """
30
- Gets the deployment associated with the passed Semgrep App token.
31
- param: semgrep_app_token: The Semgrep App token to use for authentication.
32
- """
33
- deployment = {}
34
- deployment_url = "https://semgrep.dev/api/v1/deployments"
35
- headers = {
36
- "Content-Type": "application/json",
37
- "Authorization": f"Bearer {semgrep_app_token}",
38
- }
39
- response = requests.get(deployment_url, headers=headers, timeout=_TIMEOUT)
40
- response.raise_for_status()
41
-
42
- data = response.json()
43
- deployment["id"] = data["deployments"][0]["id"]
44
- deployment["name"] = data["deployments"][0]["name"]
45
- deployment["slug"] = data["deployments"][0]["slug"]
46
-
47
- return deployment
48
-
49
-
50
- @timeit
51
- def get_sca_vulns(semgrep_app_token: str, deployment_id: str) -> List[Dict[str, Any]]:
29
+ def get_sca_vulns(semgrep_app_token: str, deployment_slug: str) -> List[Dict[str, Any]]:
52
30
  """
53
31
  Gets the SCA vulns associated with the passed Semgrep App token and deployment id.
54
32
  param: semgrep_app_token: The Semgrep App token to use for authentication.
55
- param: deployment_id: The Semgrep deployment id to use for retrieving SCA vulns.
33
+ param: deployment_slug: The Semgrep deployment slug to use for retrieving SCA vulns.
56
34
  """
57
35
  all_vulns = []
58
- sca_url = f"https://semgrep.dev/api/v1/deployments/{deployment_id}/ssc-vulns"
36
+ sca_url = f"https://semgrep.dev/api/v1/deployments/{deployment_slug}/findings"
59
37
  has_more = True
60
- cursor: Dict[str, str] = {}
61
- page = 1
38
+ page = 0
62
39
  retries = 0
63
40
  headers = {
64
41
  "Content-Type": "application/json",
65
42
  "Authorization": f"Bearer {semgrep_app_token}",
66
43
  }
67
44
 
68
- request_data = {
69
- "deploymentId": deployment_id,
70
- "pageSize": 100,
71
- "exposure": ["UNREACHABLE", "REACHABLE", "UNKNOWN_EXPOSURE"],
72
- "refs": ["_default"],
45
+ request_data: dict[str, Any] = {
46
+ "page": page,
47
+ "page_size": _PAGE_SIZE,
48
+ "issue_type": "sca",
49
+ "exposures": "reachable,always_reachable,conditionally_reachable,unreachable,unknown",
50
+ "ref": "_default",
51
+ "dedup": "true",
73
52
  }
74
-
53
+ logger.info(f"Retrieving Semgrep SCA vulns for deployment '{deployment_slug}'.")
75
54
  while has_more:
76
55
 
77
- if cursor:
78
- request_data.update({
79
- "cursor": {
80
- "vulnOffset": cursor["vulnOffset"],
81
- "issueOffset": cursor["issueOffset"],
82
- },
83
- })
84
56
  try:
85
- response = requests.post(sca_url, json=request_data, headers=headers, timeout=_TIMEOUT)
57
+ response = requests.get(sca_url, params=request_data, headers=headers, timeout=_TIMEOUT)
86
58
  response.raise_for_status()
87
59
  data = response.json()
88
- except HTTPError as e:
60
+ except (ReadTimeout, HTTPError):
89
61
  logger.warning(f"Failed to retrieve Semgrep SCA vulns for page {page}. Retrying...")
90
62
  retries += 1
91
63
  if retries >= _MAX_RETRIES:
92
- raise e
64
+ raise
93
65
  continue
94
- vulns = data["vulns"]
95
- cursor = data.get("cursor")
96
- has_more = data.get("hasMore", False)
66
+ vulns = data["findings"]
67
+ has_more = len(vulns) > 0
97
68
  if page % 10 == 0:
98
- logger.info(f"Processed {page} pages of Semgrep SCA vulnerabilities so far.")
69
+ logger.info(f"Processed page {page} of Semgrep SCA vulnerabilities.")
99
70
  all_vulns.extend(vulns)
100
71
  retries = 0
72
+ page += 1
73
+ request_data["page"] = page
101
74
 
75
+ logger.info(f"Retrieved {len(all_vulns)} Semgrep SCA vulns in {page} pages.")
102
76
  return all_vulns
103
77
 
104
78
 
79
+ def _get_vuln_class(vuln: Dict) -> str:
80
+ vulnerability_classes = vuln["rule"].get("vulnerability_classes", [])
81
+ if vulnerability_classes:
82
+ return vulnerability_classes[0]
83
+ return "Other"
84
+
85
+
86
+ def _determine_exposure(vuln: Dict[str, Any]) -> str | None:
87
+ # See Semgrep reachability types:
88
+ # https://semgrep.dev/docs/semgrep-supply-chain/overview#types-of-semgrep-supply-chain-findings
89
+ reachability_types = {
90
+ "NO REACHABILITY ANALYSIS": 2,
91
+ "UNREACHABLE": 2,
92
+ "REACHABLE": 0,
93
+ "ALWAYS REACHABLE": 0,
94
+ "CONDITIONALLY REACHABLE": 1,
95
+ }
96
+ reachable_flag = vuln["reachability"]
97
+ if reachable_flag and reachable_flag.upper() in reachability_types:
98
+ reach_score = reachability_types[reachable_flag.upper()]
99
+ if reach_score < reachability_types["UNREACHABLE"]:
100
+ return "REACHABLE"
101
+ else:
102
+ return "UNREACHABLE"
103
+ return None
104
+
105
+
106
+ def _build_vuln_url(vuln: str) -> str | None:
107
+ if 'CVE' in vuln:
108
+ return f"https://nvd.nist.gov/vuln/detail/{vuln}"
109
+ if 'GHSA' in vuln:
110
+ return f"https://github.com/advisories/{vuln}"
111
+ return None
112
+
113
+
105
114
  def transform_sca_vulns(raw_vulns: List[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], List[Dict[str, str]]]:
106
115
  """
107
116
  Transforms the raw SCA vulns response from Semgrep API into a list of dicts
@@ -112,60 +121,60 @@ def transform_sca_vulns(raw_vulns: List[Dict[str, Any]]) -> Tuple[List[Dict[str,
112
121
  for vuln in raw_vulns:
113
122
  sca_vuln: Dict[str, Any] = {}
114
123
  # Mandatory fields
115
- sca_vuln["id"] = vuln["groupKey"]
116
- sca_vuln["repositoryName"] = vuln["repositoryName"]
117
- sca_vuln["ruleId"] = vuln["advisory"]["ruleId"]
118
- sca_vuln["title"] = vuln["advisory"]["title"]
119
- sca_vuln["description"] = vuln["advisory"]["description"]
120
- sca_vuln["ecosystem"] = vuln["advisory"]["ecosystem"]
121
- sca_vuln["severity"] = vuln["advisory"]["severity"]
122
- sca_vuln["reachability"] = vuln["advisory"]["reachability"]
123
- sca_vuln["reachableIf"] = vuln["advisory"]["reachableIf"]
124
- sca_vuln["exposureType"] = vuln["exposureType"]
125
- dependency = f"{vuln['matchedDependency']['name']}|{vuln['matchedDependency']['versionSpecifier']}"
124
+ repository_name = vuln["repository"]["name"]
125
+ rule_id = vuln["rule"]["name"]
126
+ vulnerability_class = _get_vuln_class(vuln)
127
+ package = vuln['found_dependency']['package']
128
+ sca_vuln["id"] = vuln["id"]
129
+ sca_vuln["repositoryName"] = repository_name
130
+ sca_vuln["branch"] = vuln["ref"]
131
+ sca_vuln["ruleId"] = rule_id
132
+ sca_vuln["title"] = package + ":" + vulnerability_class
133
+ sca_vuln["description"] = vuln["rule"]["message"]
134
+ sca_vuln["ecosystem"] = vuln["found_dependency"]["ecosystem"]
135
+ sca_vuln["severity"] = vuln["severity"].upper()
136
+ sca_vuln["reachability"] = vuln["reachability"].upper() # Check done to determine rechabilitity
137
+ sca_vuln["reachableIf"] = vuln["reachable_condition"].upper() if vuln["reachable_condition"] else None
138
+ sca_vuln["exposureType"] = _determine_exposure(vuln) # Determintes if reachable or unreachable
139
+ dependency = f"{package}|{vuln['found_dependency']['version']}"
126
140
  sca_vuln["matchedDependency"] = dependency
127
- sca_vuln["dependencyFileLocation_path"] = vuln["dependencyFileLocation"]["path"]
128
- sca_vuln["dependencyFileLocation_url"] = vuln["dependencyFileLocation"]["url"]
129
- # Optional fields
130
- sca_vuln["transitivity"] = vuln.get("transitivity", None)
131
- cves = vuln.get("advisory", {}).get("references", {}).get("cveIds")
132
- if len(cves) > 0:
133
- # Take the first CVE
134
- sca_vuln["cveId"] = vuln["advisory"]["references"]["cveIds"][0]
135
- if vuln.get('closestSafeDependency'):
136
- dep_fix = f"{vuln['closestSafeDependency']['name']}|{vuln['closestSafeDependency']['versionSpecifier']}"
141
+ dep_url = vuln["found_dependency"]["lockfile_line_url"]
142
+ if dep_url: # Lock file can be null, need to set
143
+ dep_file = dep_url.split("/")[-1].split("#")[0]
144
+ sca_vuln["dependencyFileLocation_path"] = dep_file
145
+ sca_vuln["dependencyFileLocation_url"] = dep_url
146
+ else:
147
+ if sca_vuln.get("location"):
148
+ sca_vuln["dependencyFileLocation_path"] = sca_vuln["location"]["file_path"]
149
+ sca_vuln["transitivity"] = vuln["found_dependency"]["transitivity"].upper()
150
+ if vuln.get("vulnerability_identifier"):
151
+ vuln_id = vuln["vulnerability_identifier"].upper()
152
+ sca_vuln["cveId"] = vuln_id
153
+ sca_vuln["ref_urls"] = [_build_vuln_url(vuln_id)]
154
+ if vuln.get('fix_recommendations') and len(vuln['fix_recommendations']) > 0:
155
+ fix = vuln['fix_recommendations'][0]
156
+ dep_fix = f"{fix['package']}|{fix['version']}"
137
157
  sca_vuln["closestSafeDependency"] = dep_fix
138
- if vuln["advisory"].get("references", {}).get("urls", []):
139
- sca_vuln["ref_urls"] = vuln["advisory"].get("references", {}).get("urls", [])
140
- sca_vuln["openedAt"] = vuln.get("openedAt", None)
141
- sca_vuln["announcedAt"] = vuln.get("announcedAt", None)
142
- sca_vuln["fixStatus"] = vuln["triage"]["status"]
143
- for usage in vuln.get("usages", []):
158
+ sca_vuln["openedAt"] = vuln["created_at"]
159
+ sca_vuln["fixStatus"] = vuln["status"]
160
+ sca_vuln["triageStatus"] = vuln["triage_state"]
161
+ sca_vuln["confidence"] = vuln["confidence"]
162
+ usage = vuln.get("usage")
163
+ if usage:
144
164
  usage_dict = {}
165
+ url = usage["location"]["url"]
145
166
  usage_dict["SCA_ID"] = sca_vuln["id"]
146
- usage_dict["findingId"] = usage["findingId"]
167
+ usage_dict["findingId"] = hash(url.split("github.com/")[-1])
147
168
  usage_dict["path"] = usage["location"]["path"]
148
- usage_dict["startLine"] = usage["location"]["startLine"]
149
- usage_dict["startCol"] = usage["location"]["startCol"]
150
- usage_dict["endLine"] = usage["location"]["endLine"]
151
- usage_dict["endCol"] = usage["location"]["endCol"]
152
- usage_dict["url"] = usage["location"]["url"]
169
+ usage_dict["startLine"] = usage["location"]["start_line"]
170
+ usage_dict["startCol"] = usage["location"]["start_col"]
171
+ usage_dict["endLine"] = usage["location"]["end_line"]
172
+ usage_dict["endCol"] = usage["location"]["end_col"]
173
+ usage_dict["url"] = url
153
174
  usages.append(usage_dict)
154
175
  vulns.append(sca_vuln)
155
- return vulns, usages
156
176
 
157
-
158
- @timeit
159
- def load_semgrep_deployment(
160
- neo4j_session: neo4j.Session, deployment: Dict[str, Any], update_tag: int,
161
- ) -> None:
162
- logger.info(f"Loading Semgrep deployment info {deployment} into the graph...")
163
- load(
164
- neo4j_session,
165
- SemgrepDeploymentSchema(),
166
- [deployment],
167
- lastupdated=update_tag,
168
- )
177
+ return vulns, usages
169
178
 
170
179
 
171
180
  @timeit
@@ -175,7 +184,7 @@ def load_semgrep_sca_vulns(
175
184
  deployment_id: str,
176
185
  update_tag: int,
177
186
  ) -> None:
178
- logger.info(f"Loading {len(vulns)} Semgrep SCA vulns info into the graph.")
187
+ logger.info(f"Loading {len(vulns)} SemgrepSCAFinding objects into the graph.")
179
188
  load(
180
189
  neo4j_session,
181
190
  SemgrepSCAFindingSchema(),
@@ -192,7 +201,7 @@ def load_semgrep_sca_usages(
192
201
  deployment_id: str,
193
202
  update_tag: int,
194
203
  ) -> None:
195
- logger.info(f"Loading {len(usages)} Semgrep SCA usages info into the graph.")
204
+ logger.info(f"Loading {len(usages)} SemgrepSCALocation objects into the graph.")
196
205
  load(
197
206
  neo4j_session,
198
207
  SemgrepSCALocationSchema(),
@@ -219,25 +228,32 @@ def cleanup(
219
228
 
220
229
 
221
230
  @timeit
222
- def sync(
223
- neo4j_sesion: neo4j.Session,
231
+ def sync_findings(
232
+ neo4j_session: neo4j.Session,
224
233
  semgrep_app_token: str,
225
234
  update_tag: int,
226
235
  common_job_parameters: Dict[str, Any],
227
236
  ) -> None:
237
+
238
+ deployment_id = common_job_parameters.get("DEPLOYMENT_ID")
239
+ deployment_slug = common_job_parameters.get("DEPLOYMENT_SLUG")
240
+ if not deployment_id or not deployment_slug:
241
+ logger.warning(
242
+ "Missing Semgrep deployment ID or slug, ensure that sync_deployment() has been called."
243
+ "Skipping SCA findings sync job.",
244
+ )
245
+ return
246
+
228
247
  logger.info("Running Semgrep SCA findings sync job.")
229
- semgrep_deployment = get_deployment(semgrep_app_token)
230
- deployment_id = semgrep_deployment["id"]
231
- load_semgrep_deployment(neo4j_sesion, semgrep_deployment, update_tag)
232
- common_job_parameters["DEPLOYMENT_ID"] = deployment_id
233
- raw_vulns = get_sca_vulns(semgrep_app_token, deployment_id)
248
+ raw_vulns = get_sca_vulns(semgrep_app_token, deployment_slug)
234
249
  vulns, usages = transform_sca_vulns(raw_vulns)
235
- load_semgrep_sca_vulns(neo4j_sesion, vulns, deployment_id, update_tag)
236
- load_semgrep_sca_usages(neo4j_sesion, usages, deployment_id, update_tag)
237
- run_scoped_analysis_job('semgrep_sca_risk_analysis.json', neo4j_sesion, common_job_parameters)
238
- cleanup(neo4j_sesion, common_job_parameters)
250
+ load_semgrep_sca_vulns(neo4j_session, vulns, deployment_id, update_tag)
251
+ load_semgrep_sca_usages(neo4j_session, usages, deployment_id, update_tag)
252
+ run_scoped_analysis_job('semgrep_sca_risk_analysis.json', neo4j_session, common_job_parameters)
253
+
254
+ cleanup(neo4j_session, common_job_parameters)
239
255
  merge_module_sync_metadata(
240
- neo4j_session=neo4j_sesion,
256
+ neo4j_session=neo4j_session,
241
257
  group_type='Semgrep',
242
258
  group_id=deployment_id,
243
259
  synced_type='SCA',
@@ -0,0 +1,30 @@
1
+ import logging
2
+
3
+ import neo4j
4
+
5
+ from cartography.config import Config
6
+ from cartography.intel.snipeit import asset
7
+ from cartography.intel.snipeit import user
8
+ from cartography.stats import get_stats_client
9
+ from cartography.util import timeit
10
+
11
+ logger = logging.getLogger(__name__)
12
+ stat_handler = get_stats_client(__name__)
13
+
14
+
15
+ @timeit
16
+ def start_snipeit_ingestion(neo4j_session: neo4j.Session, config: Config) -> None:
17
+ if config.snipeit_base_uri is None or config.snipeit_token is None or config.snipeit_tenant_id is None:
18
+ logger.warning(
19
+ "Required parameter(s) missing. Skipping sync.",
20
+ )
21
+ return
22
+
23
+ common_job_parameters = {
24
+ "UPDATE_TAG": config.update_tag,
25
+ "TENANT_ID": config.snipeit_tenant_id,
26
+ }
27
+
28
+ # Ingest SnipeIT users and assets
29
+ user.sync(neo4j_session, common_job_parameters, config.snipeit_base_uri, config.snipeit_token)
30
+ asset.sync(neo4j_session, common_job_parameters, config.snipeit_base_uri, config.snipeit_token)