owasp-depscan 5.4.8__py3-none-any.whl → 6.0.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of owasp-depscan might be problematic. Click here for more details.

Files changed (34) hide show
  1. depscan/__init__.py +8 -0
  2. depscan/cli.py +719 -827
  3. depscan/cli_options.py +302 -0
  4. depscan/lib/audit.py +3 -1
  5. depscan/lib/bom.py +390 -288
  6. depscan/lib/config.py +86 -337
  7. depscan/lib/explainer.py +363 -98
  8. depscan/lib/license.py +11 -10
  9. depscan/lib/logger.py +65 -17
  10. depscan/lib/package_query/__init__.py +0 -0
  11. depscan/lib/package_query/cargo_pkg.py +124 -0
  12. depscan/lib/package_query/metadata.py +170 -0
  13. depscan/lib/package_query/npm_pkg.py +345 -0
  14. depscan/lib/package_query/pkg_query.py +195 -0
  15. depscan/lib/package_query/pypi_pkg.py +113 -0
  16. depscan/lib/tomlparse.py +116 -0
  17. depscan/lib/utils.py +34 -188
  18. owasp_depscan-6.0.0a2.dist-info/METADATA +390 -0
  19. {owasp_depscan-5.4.8.dist-info → owasp_depscan-6.0.0a2.dist-info}/RECORD +28 -25
  20. {owasp_depscan-5.4.8.dist-info → owasp_depscan-6.0.0a2.dist-info}/WHEEL +1 -1
  21. vendor/choosealicense.com/_licenses/cern-ohl-p-2.0.txt +1 -1
  22. vendor/choosealicense.com/_licenses/cern-ohl-s-2.0.txt +1 -1
  23. vendor/choosealicense.com/_licenses/cern-ohl-w-2.0.txt +2 -2
  24. vendor/choosealicense.com/_licenses/mit-0.txt +1 -1
  25. vendor/spdx/json/licenses.json +904 -677
  26. depscan/lib/analysis.py +0 -1550
  27. depscan/lib/csaf.py +0 -1860
  28. depscan/lib/normalize.py +0 -312
  29. depscan/lib/orasclient.py +0 -142
  30. depscan/lib/pkg_query.py +0 -532
  31. owasp_depscan-5.4.8.dist-info/METADATA +0 -580
  32. {owasp_depscan-5.4.8.dist-info → owasp_depscan-6.0.0a2.dist-info}/entry_points.txt +0 -0
  33. {owasp_depscan-5.4.8.dist-info → owasp_depscan-6.0.0a2.dist-info/licenses}/LICENSE +0 -0
  34. {owasp_depscan-5.4.8.dist-info → owasp_depscan-6.0.0a2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,345 @@
1
+ from datetime import datetime
2
+
3
+ from depscan.lib import config
4
+ from depscan.lib.package_query.pkg_query import httpclient, compute_time_risks, calculate_risk_score
5
+
6
+ def search_npm(keywords=None, insecure_only=False, unstable_only=False, pages=1, popularity=1.0, size=250):
7
+ pkg_list = []
8
+ for page in range(0, pages):
9
+ from_value = page * 250
10
+ registry_search_url = f"{config.NPM_SERVER}/-/v1/search?popularity={popularity}&size={size}&from={from_value}"
11
+ if insecure_only:
12
+ registry_search_url = f"{registry_search_url}&text=is:insecure"
13
+ elif unstable_only:
14
+ registry_search_url = f"{registry_search_url}&text=is:unstable"
15
+ elif keywords:
16
+ registry_search_url = f"{registry_search_url}&text=keywords:{','.join(keywords)}"
17
+ else:
18
+ registry_search_url = f"{registry_search_url}&text=not:insecure"
19
+ try:
20
+ r = httpclient.get(
21
+ url=registry_search_url,
22
+ follow_redirects=True,
23
+ timeout=config.request_timeout_sec,
24
+ )
25
+ result = r.json()
26
+ if result and not r.is_error and result.get("objects"):
27
+ for aobj in result.get("objects"):
28
+ if aobj and aobj.get("package"):
29
+ package = aobj.get("package")
30
+ flags = aobj.get("flags", {})
31
+ name = package.get("name")
32
+ if name.startswith("@types/"):
33
+ continue
34
+ is_pkg_insecure = True if flags.get("insecure", 0) == 1 else False
35
+ pkg_list.append(
36
+ {
37
+ "name": name,
38
+ "version": package.get("version"),
39
+ "purl": f'pkg:npm/{package.get("name").replace("@", "%40")}@{package.get("version")}',
40
+ "insecure": is_pkg_insecure,
41
+ "unstable": flags.get("unstable", False)
42
+ }
43
+ )
44
+ except Exception as e:
45
+ print(e)
46
+ pass
47
+ return pkg_list
48
+
49
+
50
+ def get_npm_download_stats(name, period="last-year"):
51
+ """
52
+ Method to download npm stats
53
+
54
+ :param name: Package name
55
+ :param period: Stats period
56
+ """
57
+ stats_url = f"https://api.npmjs.org/downloads/point/{period}/{name}"
58
+ try:
59
+ r = httpclient.get(
60
+ url=stats_url,
61
+ follow_redirects=True,
62
+ timeout=config.request_timeout_sec,
63
+ )
64
+ return r.json()
65
+ except Exception:
66
+ return {}
67
+
68
+
69
+ def npm_pkg_risk(pkg_metadata, is_private_pkg, scope, pkg):
70
+ """
71
+ Calculate various npm package risks based on the metadata from npm. The
72
+ keys in the risk_metrics dict is based on the parameters specified in
73
+ config.py and has a _risk suffix. Eg: config.pkg_min_versions would
74
+ result in a boolean pkg_min_versions_risk and pkg_min_versions_value
75
+
76
+ :param pkg_metadata: A dict containing the metadata of the npm package.
77
+ :param is_private_pkg: Boolean to indicate if this package is private
78
+ :param scope: Package scope
79
+ :param pkg: Package object
80
+
81
+ :return: A dict containing the calculated risks and score.
82
+ """
83
+ # Some default values to ensure the structure is non-empty
84
+ risk_metrics = {
85
+ "pkg_deprecated_risk": False,
86
+ "pkg_version_deprecated_risk": False,
87
+ "pkg_version_missing_risk": False,
88
+ "pkg_includes_binary_risk": False,
89
+ "pkg_min_versions_risk": False,
90
+ "created_now_quarantine_seconds_risk": False,
91
+ "latest_now_max_seconds_risk": False,
92
+ "mod_create_min_seconds_risk": False,
93
+ "pkg_min_maintainers_risk": False,
94
+ "pkg_node_version_risk": False,
95
+ "pkg_private_on_public_registry_risk": False,
96
+ }
97
+ # Is the private package available publicly? Dependency confusion.
98
+ if is_private_pkg and pkg_metadata:
99
+ risk_metrics["pkg_private_on_public_registry_risk"] = True
100
+ risk_metrics["pkg_private_on_public_registry_value"] = 1
101
+ versions = pkg_metadata.get("versions", {})
102
+ latest_version = pkg_metadata.get("dist-tags", {}).get("latest")
103
+ engines_block_dict = versions.get(latest_version, {}).get("engines", {})
104
+ # Check for scripts block
105
+ scripts_block_dict = versions.get(latest_version, {}).get("scripts", {})
106
+ bin_block_dict = versions.get(latest_version, {}).get("bin", {})
107
+ theversion = None
108
+ if pkg:
109
+ if pkg.get("version"):
110
+ theversion = versions.get(pkg.get("version"), {})
111
+ # Check if the version exists in the registry
112
+ if not theversion:
113
+ risk_metrics["pkg_version_missing_risk"] = True
114
+ risk_metrics["pkg_version_missing_value"] = 1
115
+ # Proceed with the rest of checks using the latest version
116
+ if not theversion:
117
+ theversion = versions.get(latest_version, {})
118
+ # Get the version specific engines and scripts block
119
+ if theversion.get("engines"):
120
+ engines_block_dict = theversion.get("engines")
121
+ if theversion.get("scripts"):
122
+ scripts_block_dict = theversion.get("scripts")
123
+ if theversion.get("bin"):
124
+ bin_block_dict = theversion.get("bin")
125
+ # Check if there is any binary downloaded and offered
126
+ if theversion.get("binary"):
127
+ risk_metrics["pkg_includes_binary_risk"] = True
128
+ risk_metrics["pkg_includes_binary_value"] = 1
129
+ # Capture the remote host
130
+ if theversion["binary"].get("host"):
131
+ risk_metrics["pkg_includes_binary_info"] = (
132
+ f'Host: {theversion["binary"].get("host")}\nBinary: {theversion["binary"].get("module_name")}'
133
+ )
134
+ # For some packages,
135
+ elif theversion["binary"].get("napi_versions"):
136
+ if theversion.get("repository", {}).get("url"):
137
+ risk_metrics["pkg_includes_binary_info"] = (
138
+ f'Repository: {theversion.get("repository").get("url")}'
139
+ )
140
+ elif theversion.get("homepage"):
141
+ risk_metrics["pkg_includes_binary_info"] = (
142
+ f'Homepage: {theversion.get("homepage")}'
143
+ )
144
+ elif bin_block_dict and maybe_binary_npm_package(pkg.get("name")):
145
+ # See #317
146
+ risk_metrics["pkg_includes_binary_risk"] = True
147
+ risk_metrics["pkg_includes_binary_value"] = len(
148
+ bin_block_dict.keys()
149
+ )
150
+ bin_block_desc = ""
151
+ for k, v in bin_block_dict.items():
152
+ bin_block_desc = f"{bin_block_desc}\n{k}: {v}"
153
+ if bin_block_desc:
154
+ risk_metrics["pkg_includes_binary_info"] = (
155
+ f"Binary commands:{bin_block_desc}"
156
+ )
157
+ # Look for slsa attestations
158
+ if theversion.get("dist", {}).get("attestations") and theversion.get(
159
+ "dist", {}
160
+ ).get("signatures"):
161
+ attestations = theversion.get("dist").get("attestations")
162
+ signatures = theversion.get("dist").get("signatures")
163
+ if (
164
+ attestations.get("url").startswith(
165
+ "https://registry.npmjs.org/"
166
+ )
167
+ and attestations.get("provenance", {}).get("predicateType", "")
168
+ == "https://slsa.dev/provenance/v1"
169
+ ):
170
+ risk_metrics["pkg_attested_check"] = True
171
+ risk_metrics["pkg_attested_value"] = len(signatures)
172
+ risk_metrics["pkg_attested_info"] = "\n".join(
173
+ [sig.get("keyid") for sig in signatures]
174
+ )
175
+ # In some packages like biomejs, there would be no binary section
176
+ # case 1: optional dependencies section might have a bunch of packages for each os
177
+ # case 2: prebuild, prebuild-install, prebuildify in dependencies
178
+ # case 3: there could be a libc attribute
179
+ # case 4: fileCount <= 2 and size > 20 MB
180
+ if not theversion.get("binary"):
181
+ binary_count = 1
182
+ if theversion.get("bin"):
183
+ binary_count = max(len(theversion.get("bin", {}).keys()), 1)
184
+ for opkg in theversion.get("optionalDependencies", {}).keys():
185
+ if (
186
+ "linux" in opkg
187
+ or "darwin" in opkg
188
+ or "win32" in opkg
189
+ or "arm64" in opkg
190
+ or "musl" in opkg
191
+ ):
192
+ risk_metrics["pkg_includes_binary_risk"] = True
193
+ risk_metrics["pkg_includes_binary_value"] = binary_count
194
+ break
195
+ # Eg: pkg:npm/zeromq@6.0.0-beta.19
196
+ dev_deps = list(theversion.get("devDependencies", {}).keys())
197
+ direct_deps = list(theversion.get("dependencies", {}).keys())
198
+ if "prebuild" in " ".join(dev_deps) or "prebuild" in " ".join(
199
+ direct_deps
200
+ ):
201
+ risk_metrics["pkg_includes_binary_risk"] = True
202
+ risk_metrics["pkg_includes_binary_value"] = binary_count
203
+ if not risk_metrics.get("pkg_includes_binary_risk"):
204
+ if theversion.get("libc"):
205
+ risk_metrics["pkg_includes_binary_risk"] = True
206
+ risk_metrics["pkg_includes_binary_value"] = len(
207
+ theversion.get("libc", [])
208
+ )
209
+ elif (
210
+ theversion.get("dist", {}).get("fileCount", 0) <= 2
211
+ and theversion.get("dist", {}).get("unpackedSize")
212
+ and (
213
+ theversion.get("dist").get("unpackedSize", 0)
214
+ / (1000 * 1000)
215
+ )
216
+ > 20
217
+ ):
218
+ risk_metrics["pkg_includes_binary_risk"] = True
219
+ risk_metrics["pkg_includes_binary_value"] = 1
220
+ is_deprecated = (
221
+ versions.get(latest_version, {}).get("deprecated", None) is not None
222
+ )
223
+ is_version_deprecated = (
224
+ True if theversion and theversion.get("deprecated") else False
225
+ )
226
+ # Is the package deprecated
227
+ if is_deprecated:
228
+ risk_metrics["pkg_deprecated_risk"] = True
229
+ risk_metrics["pkg_deprecated_value"] = 1
230
+ elif is_version_deprecated:
231
+ risk_metrics["pkg_version_deprecated_risk"] = True
232
+ risk_metrics["pkg_version_deprecated_value"] = 1
233
+ # The deprecation reason for a specific version are often useful
234
+ risk_metrics["pkg_version_deprecated_info"] = theversion.get(
235
+ "deprecated"
236
+ )
237
+ scripts_block_list = []
238
+ # There are some packages on npm with incorrectly configured scripts
239
+ # block Good news is that the install portion would only for if the
240
+ # scripts block is an object/dict
241
+ if isinstance(scripts_block_dict, dict):
242
+ scripts_block_list = [
243
+ block
244
+ for block in scripts_block_dict.keys()
245
+ if block in ("preinstall", "postinstall", "prebuild")
246
+ ]
247
+ # Detect the use of prebuild-install
248
+ # https://github.com/prebuild/prebuild-install
249
+ # https://github.com/prebuild/prebuildify
250
+ if not risk_metrics.get("pkg_includes_binary_risk"):
251
+ if scripts_block_dict.get("prebuild", "").startswith("prebuild"):
252
+ risk_metrics["pkg_includes_binary_risk"] = True
253
+ risk_metrics["pkg_includes_binary_value"] = 1
254
+ # If the package has fewer than minimum number of versions
255
+ if len(versions) < config.pkg_min_versions:
256
+ risk_metrics["pkg_min_versions_risk"] = True
257
+ risk_metrics["pkg_min_versions_value"] = len(versions)
258
+ # Time related checks
259
+ time_info = pkg_metadata.get("time", {})
260
+ modified = time_info.get("modified", "").replace("Z", "")
261
+ created = time_info.get("created", "").replace("Z", "")
262
+ if not modified and pkg_metadata.get("mtime"):
263
+ modified = pkg_metadata.get("mtime").replace("Z", "")
264
+ if not created and pkg_metadata.get("ctime"):
265
+ created = pkg_metadata.get("ctime").replace("Z", "")
266
+ latest_version_time = time_info.get(latest_version, "").replace("Z", "")
267
+ if time_info and modified and created and latest_version_time:
268
+ modified_dt = datetime.fromisoformat(modified)
269
+ created_dt = datetime.fromisoformat(created)
270
+ latest_version_time_dt = datetime.fromisoformat(latest_version_time)
271
+ mod_create_diff = modified_dt - created_dt
272
+ latest_now_diff = datetime.now() - latest_version_time_dt
273
+ created_now_diff = datetime.now() - created_dt
274
+ risk_metrics = compute_time_risks(
275
+ risk_metrics, created_now_diff, mod_create_diff, latest_now_diff
276
+ )
277
+
278
+ # Maintainers count related risk. Ignore packages that are past
279
+ # quarantine period
280
+ maintainers = pkg_metadata.get("maintainers", [])
281
+ if len(maintainers) < config.pkg_min_maintainers and risk_metrics.get(
282
+ "created_now_quarantine_seconds_risk"
283
+ ):
284
+ risk_metrics["pkg_min_maintainers_risk"] = True
285
+ risk_metrics["pkg_min_maintainers_value"] = len(maintainers)
286
+ # Check for install scripts risk only for those packages with
287
+ # maintainers risk
288
+ if scripts_block_list:
289
+ risk_metrics["pkg_install_scripts_risk"] = True
290
+ risk_metrics["pkg_install_scripts_value"] = len(scripts_block_list)
291
+
292
+ # Users count related risk. Ignore packages that are past quarantine period
293
+ users = pkg_metadata.get("users", [])
294
+ if (
295
+ users
296
+ and len(users) < config.pkg_min_users
297
+ and risk_metrics.get("created_now_quarantine_seconds_risk")
298
+ ):
299
+ risk_metrics["pkg_min_users_risk"] = True
300
+ risk_metrics["pkg_min_users_value"] = len(users)
301
+ # Node engine version There are packages with incorrect node engine
302
+ # specification which we can ignore for now
303
+ if (
304
+ engines_block_dict
305
+ and isinstance(engines_block_dict, dict)
306
+ and engines_block_dict.get("node")
307
+ and isinstance(engines_block_dict.get("node"), str)
308
+ ):
309
+ node_version_spec = engines_block_dict.get("node")
310
+ node_version = (
311
+ node_version_spec.replace(">= ", "")
312
+ .replace(">=", "")
313
+ .replace("> ", "")
314
+ .replace(">", "")
315
+ .replace("~ ", "")
316
+ .replace("~", "")
317
+ .split(" ")[0]
318
+ )
319
+ for ver in config.pkg_node_version.split(","):
320
+ if node_version.startswith(ver):
321
+ risk_metrics["pkg_node_version_risk"] = True
322
+ risk_metrics["pkg_node_version_value"] = 1
323
+ break
324
+ # Add package scope related weight
325
+ if scope:
326
+ risk_metrics[f"pkg_{scope}_scope_risk"] = True
327
+ risk_metrics[f"pkg_{scope}_scope_value"] = 1
328
+
329
+ risk_metrics["risk_score"] = calculate_risk_score(risk_metrics)
330
+ return risk_metrics
331
+
332
+
333
+ def maybe_binary_npm_package(name: str) -> bool:
334
+ """
335
+ Check if a package might be a binary by checking the naming conventions.
336
+
337
+ :param name: Packagename
338
+ :returns: boolean
339
+ """
340
+ if not name:
341
+ return False
342
+ for bin_suffix in config.NPM_BINARY_PACKAGES_SUFFIXES:
343
+ if name.endswith(bin_suffix):
344
+ return True
345
+ return False
@@ -0,0 +1,195 @@
1
+ import math
2
+ import os
3
+
4
+ from depscan.lib import config
5
+ from depscan.lib.logger import LOG
6
+
7
+ try:
8
+ if os.getenv("DEPSCAN_CACHE_HOST") or os.getenv("DEPSCAN_CACHE_PORT"):
9
+ import hishel
10
+ import redis
11
+
12
+ storage = hishel.RedisStorage(
13
+ ttl=config.get_int_from_env("DEPSCAN_CACHE_TTL", 36000),
14
+ client=redis.Redis(
15
+ host=os.getenv("DEPSCAN_CACHE_HOST", "127.0.0.1"),
16
+ port=config.get_int_from_env("DEPSCAN_CACHE_PORT", 6379),
17
+ ),
18
+ )
19
+ httpclient = hishel.CacheClient(storage=storage)
20
+ LOG.debug("valkey cache activated.")
21
+ else:
22
+ import httpx
23
+
24
+ httpclient = httpx
25
+ except ImportError:
26
+ import httpx
27
+
28
+ httpclient = httpx
29
+
30
+
31
+ def get_lookup_url(registry_type, pkg):
32
+ """
33
+ Generating the lookup URL based on the registry type and package
34
+ information.
35
+
36
+ :param registry_type: The type of registry ("npm" or "pypi")
37
+ :param pkg: Dict or string of package information
38
+ :returns: Package name, lookup URL
39
+ """
40
+ vendor = None
41
+ if isinstance(pkg, dict):
42
+ vendor = pkg.get("vendor")
43
+ name = pkg.get("name")
44
+ else:
45
+ tmp_a = pkg.split("|")
46
+ name = tmp_a[len(tmp_a) - 2]
47
+ if len(tmp_a) == 3:
48
+ vendor = tmp_a[0]
49
+ key = name
50
+ # Prefix vendor for npm
51
+ if registry_type == "npm":
52
+ if vendor and vendor != "npm":
53
+ # npm expects namespaces to start with an @
54
+ if not vendor.startswith("@"):
55
+ vendor = "@" + vendor
56
+ key = f"{vendor}/{name}"
57
+ return key, f"{config.NPM_SERVER}/{key}"
58
+ if registry_type == "pypi":
59
+ return key, f"{config.PYPI_SERVER}/{key}/json"
60
+ if registry_type == "cargo":
61
+ return key, f"{config.CARGO_SERVER}/{key}"
62
+ return None, None
63
+
64
+
65
+ def get_category_score(
66
+ param, max_value=config.DEFAULT_MAX_VALUE, weight=config.DEFAULT_WEIGHT
67
+ ):
68
+ """
69
+ Return parameter score given its current value, max value and
70
+ parameter weight.
71
+
72
+ :param param: The current value of the parameter
73
+ :param max_value: The maximum value of the parameter
74
+ :param weight: The weight of the parameter
75
+ :return: The calculated score as a float value
76
+ """
77
+ try:
78
+ param = float(param)
79
+ except ValueError:
80
+ param = 0
81
+ try:
82
+ max_value = float(max_value)
83
+ except ValueError:
84
+ max_value = config.DEFAULT_MAX_VALUE
85
+ try:
86
+ weight = float(weight)
87
+ except ValueError:
88
+ weight = config.DEFAULT_WEIGHT
89
+ return (
90
+ 0
91
+ if weight == 0 or math.log(1 + max(param, max_value)) == 0
92
+ else (math.log(1 + param) / math.log(1 + max(param, max_value)))
93
+ * weight
94
+ )
95
+
96
+
97
+ def calculate_risk_score(risk_metrics):
98
+ """
99
+ Method to calculate a total risk score based on risk metrics. This is
100
+ based on a weighted formula and might require customization based on use
101
+ cases
102
+
103
+ :param risk_metrics: Dict containing many risk metrics
104
+ :return: The calculated total risk score
105
+ """
106
+ if not risk_metrics:
107
+ return 0
108
+ num_risks = 0
109
+ working_score = 0
110
+ total_weight = 0
111
+ for k, v in risk_metrics.items():
112
+ # Is the _risk key set to True
113
+ if k.endswith("_risk") and v is True:
114
+ risk_category = k.replace("_risk", "")
115
+ risk_category_value = risk_metrics.get(f"{risk_category}_value", 0)
116
+ risk_category_max = getattr(
117
+ config, f"{risk_category}_max", config.DEFAULT_MAX_VALUE
118
+ )
119
+ risk_category_weight = getattr(
120
+ config, f"{risk_category}_weight", config.DEFAULT_WEIGHT
121
+ )
122
+ risk_category_base = getattr(config, f"{risk_category}", 0)
123
+ value = risk_category_value
124
+ if (
125
+ risk_category_base
126
+ and (
127
+ isinstance(risk_category_base, float)
128
+ or isinstance(risk_category_base, int)
129
+ )
130
+ and risk_category_base > risk_category_value
131
+ ):
132
+ value = risk_category_base - risk_category_value
133
+ elif risk_category_max and risk_category_max > risk_category_value:
134
+ value = risk_category_max - risk_category_value
135
+ cat_score = get_category_score(
136
+ value, risk_category_max, risk_category_weight
137
+ )
138
+ total_weight += risk_category_weight
139
+ working_score += min(cat_score, 1)
140
+ num_risks += 1
141
+ working_score = round(working_score * total_weight / config.total_weight, 5)
142
+ working_score = max(min(working_score, 1), 0)
143
+ return working_score
144
+
145
+
146
+ def compute_time_risks(
147
+ risk_metrics, created_now_diff, mod_create_diff, latest_now_diff
148
+ ):
149
+ """
150
+ Compute risks based on creation, modified and time elapsed
151
+
152
+ :param risk_metrics: A dict containing the risk metrics for the package.
153
+ :param created_now_diff: Time difference from creation of the package and
154
+ the current time.
155
+ :param mod_create_diff: Time difference from
156
+ modification and creation of the package.
157
+ :param latest_now_diff: Time difference between the latest version of the
158
+ package and the current
159
+ time.
160
+ :return: The updated risk metrics dictionary with the calculated
161
+ risks and values.
162
+ """
163
+ # Check if the package is at least 1 year old. Quarantine period.
164
+ if created_now_diff.total_seconds() < config.created_now_quarantine_seconds:
165
+ risk_metrics["created_now_quarantine_seconds_risk"] = True
166
+ risk_metrics["created_now_quarantine_seconds_value"] = (
167
+ latest_now_diff.total_seconds()
168
+ )
169
+
170
+ # Check for the maximum seconds difference between latest version and now
171
+ if latest_now_diff.total_seconds() > config.latest_now_max_seconds:
172
+ risk_metrics["latest_now_max_seconds_risk"] = True
173
+ risk_metrics["latest_now_max_seconds_value"] = (
174
+ latest_now_diff.total_seconds()
175
+ )
176
+ # Since the package is quite old we can relax the min versions risk
177
+ risk_metrics["pkg_min_versions_risk"] = False
178
+ else:
179
+ # Check for the minimum seconds difference between creation and
180
+ # modified date This check catches several old npm packages that was
181
+ # created and immediately updated within a day To reduce noise we
182
+ # check for the age first and perform this check only for newish
183
+ # packages
184
+ if mod_create_diff.total_seconds() < config.mod_create_min_seconds:
185
+ risk_metrics["mod_create_min_seconds_risk"] = True
186
+ risk_metrics["mod_create_min_seconds_value"] = (
187
+ mod_create_diff.total_seconds()
188
+ )
189
+ # Check for the minimum seconds difference between latest version and now
190
+ if latest_now_diff.total_seconds() < config.latest_now_min_seconds:
191
+ risk_metrics["latest_now_min_seconds_risk"] = True
192
+ risk_metrics["latest_now_min_seconds_value"] = (
193
+ latest_now_diff.total_seconds()
194
+ )
195
+ return risk_metrics
@@ -0,0 +1,113 @@
1
+ from datetime import datetime
2
+
3
+ from depscan.lib import config
4
+ from semver import Version
5
+ from depscan.lib.package_query.pkg_query import compute_time_risks, calculate_risk_score
6
+
7
+
8
+ def pypi_pkg_risk(pkg_metadata, is_private_pkg, scope, pkg):
9
+ """
10
+ Calculate various package risks based on the metadata from pypi.
11
+
12
+ :param pkg_metadata: A dict containing the metadata of the package from PyPI
13
+ :param is_private_pkg: Boolean to indicate if this package is private
14
+ :param scope: Package scope
15
+ :param pkg: Package object
16
+
17
+ :return: Dict of risk metrics and corresponding PyPI values.
18
+ """
19
+ risk_metrics = {
20
+ "pkg_deprecated_risk": False,
21
+ "pkg_version_deprecated_risk": False,
22
+ "pkg_version_missing_risk": False,
23
+ "pkg_min_versions_risk": False,
24
+ "created_now_quarantine_seconds_risk": False,
25
+ "latest_now_max_seconds_risk": False,
26
+ "mod_create_min_seconds_risk": False,
27
+ "pkg_min_maintainers_risk": False,
28
+ "pkg_private_on_public_registry_risk": False,
29
+ }
30
+ info = pkg_metadata.get("info", {})
31
+ versions_dict = pkg_metadata.get("releases", {})
32
+ versions = [ver[0] for k, ver in versions_dict.items() if ver]
33
+ is_deprecated = info.get("yanked") and info.get("yanked_reason")
34
+ is_version_deprecated = False
35
+ if not is_deprecated and pkg and pkg.get("version"):
36
+ theversion = versions_dict.get(pkg.get("version"), [])
37
+ if isinstance(theversion, list) and len(theversion) > 0:
38
+ theversion = theversion[0]
39
+ elif theversion and theversion.get("yanked"):
40
+ is_version_deprecated = True
41
+ # Check if the version exists in the registry
42
+ if not theversion:
43
+ risk_metrics["pkg_version_missing_risk"] = True
44
+ risk_metrics["pkg_version_missing_value"] = 1
45
+ # Some packages like pypi:azure only mention deprecated in the description
46
+ # without yanking the package
47
+ pkg_description = info.get("description", "").lower()
48
+ if not is_deprecated and (
49
+ "is deprecated" in pkg_description
50
+ or "no longer maintained" in pkg_description
51
+ ):
52
+ is_deprecated = True
53
+ latest_deprecated = False
54
+ version_nums = list(versions_dict.keys())
55
+ # Ignore empty versions without metadata. Thanks pypi
56
+ version_nums = [ver for ver in version_nums if versions_dict.get(ver)]
57
+ try:
58
+ first_version_num = min(
59
+ version_nums,
60
+ key=lambda x: Version.parse(x, optional_minor_and_patch=True),
61
+ )
62
+ latest_version_num = max(
63
+ version_nums,
64
+ key=lambda x: Version.parse(x, optional_minor_and_patch=True),
65
+ )
66
+ except (ValueError, TypeError):
67
+ first_version_num = version_nums[0]
68
+ latest_version_num = version_nums[-1]
69
+ first_version = versions_dict.get(first_version_num)[0]
70
+ latest_version = versions_dict.get(latest_version_num)[0]
71
+
72
+ # Is the private package available publicly? Dependency confusion.
73
+ if is_private_pkg and pkg_metadata:
74
+ risk_metrics["pkg_private_on_public_registry_risk"] = True
75
+ risk_metrics["pkg_private_on_public_registry_value"] = 1
76
+
77
+ # If the package has fewer than minimum number of versions
78
+ if len(versions):
79
+ if len(versions) < config.pkg_min_versions:
80
+ risk_metrics["pkg_min_versions_risk"] = True
81
+ risk_metrics["pkg_min_versions_value"] = len(versions)
82
+ # Check if the latest version is deprecated
83
+ if latest_version and latest_version.get("yanked"):
84
+ latest_deprecated = True
85
+
86
+ # Created and modified time related checks
87
+ if first_version and latest_version:
88
+ created = first_version.get("upload_time")
89
+ modified = latest_version.get("upload_time")
90
+ if created and modified:
91
+ modified_dt = datetime.fromisoformat(modified)
92
+ created_dt = datetime.fromisoformat(created)
93
+ mod_create_diff = modified_dt - created_dt
94
+ latest_now_diff = datetime.now() - modified_dt
95
+ created_now_diff = datetime.now() - created_dt
96
+ risk_metrics = compute_time_risks(
97
+ risk_metrics, created_now_diff, mod_create_diff, latest_now_diff
98
+ )
99
+
100
+ # Is the package deprecated
101
+ if is_deprecated or latest_deprecated:
102
+ risk_metrics["pkg_deprecated_risk"] = True
103
+ risk_metrics["pkg_deprecated_value"] = 1
104
+ elif is_version_deprecated:
105
+ risk_metrics["pkg_version_deprecated_risk"] = True
106
+ risk_metrics["pkg_version_deprecated_value"] = 1
107
+ # Add package scope related weight
108
+ if scope:
109
+ risk_metrics[f"pkg_{scope}_scope_risk"] = True
110
+ risk_metrics[f"pkg_{scope}_scope_value"] = 1
111
+
112
+ risk_metrics["risk_score"] = calculate_risk_score(risk_metrics)
113
+ return risk_metrics