owasp-depscan 5.5.0__py3-none-any.whl → 6.0.0a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of owasp-depscan might be problematic. Click here for more details.

Files changed (34) hide show
  1. depscan/__init__.py +8 -0
  2. depscan/cli.py +719 -827
  3. depscan/cli_options.py +302 -0
  4. depscan/lib/audit.py +3 -1
  5. depscan/lib/bom.py +390 -288
  6. depscan/lib/config.py +86 -337
  7. depscan/lib/explainer.py +363 -98
  8. depscan/lib/license.py +11 -10
  9. depscan/lib/logger.py +65 -17
  10. depscan/lib/package_query/__init__.py +0 -0
  11. depscan/lib/package_query/cargo_pkg.py +124 -0
  12. depscan/lib/package_query/metadata.py +170 -0
  13. depscan/lib/package_query/npm_pkg.py +345 -0
  14. depscan/lib/package_query/pkg_query.py +195 -0
  15. depscan/lib/package_query/pypi_pkg.py +113 -0
  16. depscan/lib/tomlparse.py +116 -0
  17. depscan/lib/utils.py +34 -188
  18. owasp_depscan-6.0.0a2.dist-info/METADATA +390 -0
  19. {owasp_depscan-5.5.0.dist-info → owasp_depscan-6.0.0a2.dist-info}/RECORD +28 -25
  20. {owasp_depscan-5.5.0.dist-info → owasp_depscan-6.0.0a2.dist-info}/WHEEL +1 -1
  21. vendor/choosealicense.com/_licenses/cern-ohl-p-2.0.txt +1 -1
  22. vendor/choosealicense.com/_licenses/cern-ohl-s-2.0.txt +1 -1
  23. vendor/choosealicense.com/_licenses/cern-ohl-w-2.0.txt +2 -2
  24. vendor/choosealicense.com/_licenses/mit-0.txt +1 -1
  25. vendor/spdx/json/licenses.json +904 -677
  26. depscan/lib/analysis.py +0 -1554
  27. depscan/lib/csaf.py +0 -1860
  28. depscan/lib/normalize.py +0 -312
  29. depscan/lib/orasclient.py +0 -142
  30. depscan/lib/pkg_query.py +0 -532
  31. owasp_depscan-5.5.0.dist-info/METADATA +0 -580
  32. {owasp_depscan-5.5.0.dist-info → owasp_depscan-6.0.0a2.dist-info}/entry_points.txt +0 -0
  33. {owasp_depscan-5.5.0.dist-info → owasp_depscan-6.0.0a2.dist-info/licenses}/LICENSE +0 -0
  34. {owasp_depscan-5.5.0.dist-info → owasp_depscan-6.0.0a2.dist-info}/top_level.txt +0 -0
depscan/lib/pkg_query.py DELETED
@@ -1,532 +0,0 @@
1
- import math
2
- from datetime import datetime
3
- from semver import Version
4
-
5
- import httpx
6
- from rich.progress import Progress
7
-
8
- from depscan.lib import config
9
- from depscan.lib.logger import LOG, console
10
-
11
-
12
- def get_lookup_url(registry_type, pkg):
13
- """
14
- Generating the lookup URL based on the registry type and package
15
- information.
16
-
17
- :param registry_type: The type of registry ("npm" or "pypi")
18
- :param pkg: Dict or string of package information
19
- :returns: Package name, lookup URL
20
- """
21
- vendor = None
22
- if isinstance(pkg, dict):
23
- vendor = pkg.get("vendor")
24
- name = pkg.get("name")
25
- else:
26
- tmpA = pkg.split("|")
27
- name = tmpA[len(tmpA) - 2]
28
- if len(tmpA) == 3:
29
- vendor = tmpA[0]
30
- key = name
31
- # Prefix vendor for npm
32
- if registry_type == "npm":
33
- if vendor and vendor != "npm":
34
- # npm expects namespaces to start with an @
35
- if not vendor.startswith("@"):
36
- vendor = "@" + vendor
37
- key = f"{vendor}/{name}"
38
- return key, f"{config.npm_server}/{key}"
39
- elif registry_type == "pypi":
40
- return key, f"{config.pypi_server}/{key}/json"
41
- return None, None
42
-
43
-
44
- def metadata_from_registry(
45
- registry_type, scoped_pkgs, pkg_list, private_ns=None
46
- ):
47
- """
48
- Method to query registry for the package metadata
49
-
50
- :param registry_type: The type of registry to query
51
- :param scoped_pkgs: Dictionary of lists of packages per scope
52
- :param pkg_list: List of package dictionaries
53
- :param private_ns: Private namespace
54
- :return: A dict of package metadata, risk metrics, and private package
55
- flag for each package
56
- """
57
- metadata_dict = {}
58
- # Circuit breaker flag to break the risk audit in case of many api errors
59
- circuit_breaker = False
60
- # Track the api failures count
61
- failure_count = 0
62
- done_count = 0
63
- with Progress(
64
- console=console,
65
- transient=True,
66
- redirect_stderr=False,
67
- redirect_stdout=False,
68
- refresh_per_second=1,
69
- ) as progress:
70
- task = progress.add_task(
71
- "[green] Auditing packages", total=len(pkg_list)
72
- )
73
- for pkg in pkg_list:
74
- if circuit_breaker:
75
- LOG.info(
76
- "Risk audited has been interrupted due to frequent api "
77
- "errors. Please try again later."
78
- )
79
- progress.stop()
80
- return {}
81
- scope = pkg.get("scope", "").lower()
82
- key, lookup_url = get_lookup_url(registry_type, pkg)
83
- if not key or not lookup_url or key.startswith("https://"):
84
- progress.advance(task)
85
- continue
86
- progress.update(task, description=f"Checking {key}")
87
- try:
88
- r = httpx.get(
89
- url=lookup_url,
90
- follow_redirects=True,
91
- timeout=config.request_timeout_sec,
92
- )
93
- json_data = r.json()
94
- # Npm returns this error if the package is not found
95
- if (
96
- json_data.get("code") == "MethodNotAllowedError"
97
- or r.status_code > 400
98
- ):
99
- continue
100
- is_private_pkg = False
101
- if private_ns:
102
- namespace_prefixes = private_ns.split(",")
103
- for ns in namespace_prefixes:
104
- if key.lower().startswith(
105
- ns.lower()
106
- ) or key.lower().startswith("@" + ns.lower()):
107
- is_private_pkg = True
108
- break
109
- risk_metrics = {}
110
- if registry_type == "npm":
111
- risk_metrics = npm_pkg_risk(
112
- json_data, is_private_pkg, scope
113
- )
114
- elif registry_type == "pypi":
115
- project_type_pkg = f"python:{key}".lower()
116
- required_pkgs = scoped_pkgs.get("required", [])
117
- optional_pkgs = scoped_pkgs.get("optional", [])
118
- excluded_pkgs = scoped_pkgs.get("excluded", [])
119
- if (
120
- pkg.get("purl") in required_pkgs
121
- or project_type_pkg in required_pkgs
122
- ):
123
- scope = "required"
124
- elif (
125
- pkg.get("purl") in optional_pkgs
126
- or project_type_pkg in optional_pkgs
127
- ):
128
- scope = "optional"
129
- elif (
130
- pkg.get("purl") in excluded_pkgs
131
- or project_type_pkg in excluded_pkgs
132
- ):
133
- scope = "excluded"
134
- risk_metrics = pypi_pkg_risk(
135
- json_data, is_private_pkg, scope, pkg
136
- )
137
- metadata_dict[key] = {
138
- "scope": scope,
139
- "pkg_metadata": json_data,
140
- "risk_metrics": risk_metrics,
141
- "is_private_pkg": is_private_pkg,
142
- }
143
- except Exception as e:
144
- LOG.debug(e)
145
- failure_count += 1
146
- progress.advance(task)
147
- done_count += 1
148
- if failure_count >= config.max_request_failures:
149
- circuit_breaker = True
150
- LOG.debug(
151
- "Retrieved package metadata for %d/%d packages. Failures count %d",
152
- done_count,
153
- len(pkg_list),
154
- failure_count,
155
- )
156
- return metadata_dict
157
-
158
-
159
- def npm_metadata(scoped_pkgs, pkg_list, private_ns=None):
160
- """
161
- Method to query npm for the package metadata
162
-
163
- :param scoped_pkgs: Dictionary of lists of packages per scope
164
- :param pkg_list: List of package dictionaries
165
- :param private_ns: Private namespace
166
- :return: A dict of package metadata, risk metrics, and private package
167
- flag for each package
168
- """
169
- return metadata_from_registry("npm", scoped_pkgs, pkg_list, private_ns)
170
-
171
-
172
- def pypi_metadata(scoped_pkgs, pkg_list, private_ns=None):
173
- """
174
- Method to query pypi for the package metadata
175
-
176
- :param scoped_pkgs: Dictionary of lists of packages per scope
177
- :param pkg_list: List of package dictionaries
178
- :param private_ns: Private namespace
179
- :return: A dict of package metadata, risk metrics, and private package
180
- flag for each package
181
- """
182
- return metadata_from_registry("pypi", scoped_pkgs, pkg_list, private_ns)
183
-
184
-
185
- def get_category_score(
186
- param, max_value=config.default_max_value, weight=config.default_weight
187
- ):
188
- """
189
- Return parameter score given its current value, max value and
190
- parameter weight.
191
-
192
- :param param: The current value of the parameter
193
- :param max_value: The maximum value of the parameter
194
- :param weight: The weight of the parameter
195
- :return: The calculated score as a float value
196
- """
197
- try:
198
- param = float(param)
199
- except ValueError:
200
- param = 0
201
- try:
202
- max_value = float(max_value)
203
- except ValueError:
204
- max_value = config.default_max_value
205
- try:
206
- weight = float(weight)
207
- except ValueError:
208
- weight = config.default_weight
209
- return (math.log(1 + param) / math.log(1 + max(param, max_value))) * weight
210
-
211
-
212
- def calculate_risk_score(risk_metrics):
213
- """
214
- Method to calculate a total risk score based on risk metrics. This is
215
- based on a weighted formula and might require customization based on use
216
- cases
217
-
218
- :param risk_metrics: Dict containing many risk metrics
219
- :return: The calculated total risk score
220
- """
221
- if not risk_metrics:
222
- return 0
223
- num_risks = 0
224
- working_score = 0
225
- total_weight = 0
226
- for k, v in risk_metrics.items():
227
- # Is the _risk key set to True
228
- if k.endswith("_risk") and v is True:
229
- risk_category = k.replace("_risk", "")
230
- risk_category_value = risk_metrics.get(f"{risk_category}_value", 0)
231
- risk_category_max = getattr(
232
- config, f"{risk_category}_max", config.default_max_value
233
- )
234
- risk_category_weight = getattr(
235
- config, f"{risk_category}_weight", config.default_weight
236
- )
237
- risk_category_base = getattr(config, f"{risk_category}", 0)
238
- value = risk_category_value
239
- if (
240
- risk_category_base
241
- and (
242
- isinstance(risk_category_base, float)
243
- or isinstance(risk_category_base, int)
244
- )
245
- and risk_category_base > risk_category_value
246
- ):
247
- value = risk_category_base - risk_category_value
248
- elif risk_category_max and risk_category_max > risk_category_value:
249
- value = risk_category_max - risk_category_value
250
- cat_score = get_category_score(
251
- value, risk_category_max, risk_category_weight
252
- )
253
- total_weight += risk_category_weight
254
- working_score += min(cat_score, 1)
255
- num_risks += 1
256
- working_score = round(working_score * total_weight / config.total_weight, 5)
257
- working_score = max(min(working_score, 1), 0)
258
- return working_score
259
-
260
-
261
- def compute_time_risks(
262
- risk_metrics, created_now_diff, mod_create_diff, latest_now_diff
263
- ):
264
- """
265
- Compute risks based on creation, modified and time elapsed
266
-
267
- :param risk_metrics: A dict containing the risk metrics for the package.
268
- :param created_now_diff: Time difference from creation of the package and
269
- the current time.
270
- :param mod_create_diff: Time difference from
271
- modification and creation of the package.
272
- :param latest_now_diff: Time difference between the latest version of the
273
- package and the current
274
- time.
275
- :return: The updated risk metrics dictionary with the calculated
276
- risks and values.
277
- """
278
- # Check if the package is at least 1 year old. Quarantine period.
279
- if created_now_diff.total_seconds() < config.created_now_quarantine_seconds:
280
- risk_metrics["created_now_quarantine_seconds_risk"] = True
281
- risk_metrics["created_now_quarantine_seconds_value"] = (
282
- latest_now_diff.total_seconds()
283
- )
284
-
285
- # Check for the maximum seconds difference between latest version and now
286
- if latest_now_diff.total_seconds() > config.latest_now_max_seconds:
287
- risk_metrics["latest_now_max_seconds_risk"] = True
288
- risk_metrics["latest_now_max_seconds_value"] = (
289
- latest_now_diff.total_seconds()
290
- )
291
- # Since the package is quite old we can relax the min versions risk
292
- risk_metrics["pkg_min_versions_risk"] = False
293
- else:
294
- # Check for the minimum seconds difference between creation and
295
- # modified date This check catches several old npm packages that was
296
- # created and immediately updated within a day To reduce noise we
297
- # check for the age first and perform this check only for newish
298
- # packages
299
- if mod_create_diff.total_seconds() < config.mod_create_min_seconds:
300
- risk_metrics["mod_create_min_seconds_risk"] = True
301
- risk_metrics["mod_create_min_seconds_value"] = (
302
- mod_create_diff.total_seconds()
303
- )
304
- # Check for the minimum seconds difference between latest version and now
305
- if latest_now_diff.total_seconds() < config.latest_now_min_seconds:
306
- risk_metrics["latest_now_min_seconds_risk"] = True
307
- risk_metrics["latest_now_min_seconds_value"] = (
308
- latest_now_diff.total_seconds()
309
- )
310
- return risk_metrics
311
-
312
-
313
- def pypi_pkg_risk(pkg_metadata, is_private_pkg, scope, pkg):
314
- """
315
- Calculate various package risks based on the metadata from pypi.
316
-
317
- :param pkg_metadata: A dict containing the metadata of the package from PyPI
318
- :param is_private_pkg: Boolean to indicate if this package is private
319
- :param scope: Package scope
320
- :return: Dict of risk metrics and corresponding PyPI values.
321
- """
322
- risk_metrics = {
323
- "pkg_deprecated_risk": False,
324
- "pkg_min_versions_risk": False,
325
- "created_now_quarantine_seconds_risk": False,
326
- "latest_now_max_seconds_risk": False,
327
- "mod_create_min_seconds_risk": False,
328
- "pkg_min_maintainers_risk": False,
329
- "pkg_private_on_public_registry_risk": False,
330
- }
331
- info = pkg_metadata.get("info", {})
332
- versions_dict = pkg_metadata.get("releases", {})
333
- versions = [ver[0] for k, ver in versions_dict.items() if ver]
334
- is_deprecated = info.get("yanked") and info.get("yanked_reason")
335
- if not is_deprecated and pkg and pkg.get("version"):
336
- theversion = versions_dict.get(pkg.get("version"), [])
337
- if isinstance(theversion, list):
338
- theversion = theversion[0]
339
- if theversion.get("yanked"):
340
- is_deprecated = True
341
- # Some packages like pypi:azure only mention deprecated in the description
342
- # without yanking the package
343
- pkg_description = info.get("description", "").lower()
344
- if not is_deprecated and (
345
- "is deprecated" in pkg_description
346
- or "no longer maintained" in pkg_description
347
- ):
348
- is_deprecated = True
349
- latest_deprecated = False
350
- version_nums = list(versions_dict.keys())
351
- # Ignore empty versions without metadata. Thanks pypi
352
- version_nums = [ver for ver in version_nums if versions_dict.get(ver)]
353
- try:
354
- first_version_num = min(
355
- version_nums,
356
- key=lambda x: Version.parse(x, optional_minor_and_patch=True),
357
- )
358
- latest_version_num = max(
359
- version_nums,
360
- key=lambda x: Version.parse(x, optional_minor_and_patch=True),
361
- )
362
- except (ValueError, TypeError):
363
- first_version_num = version_nums[0]
364
- latest_version_num = version_nums[-1]
365
- first_version = versions_dict.get(first_version_num)[0]
366
- latest_version = versions_dict.get(latest_version_num)[0]
367
- # Is the private package available publicly? Dependency confusion.
368
- if is_private_pkg and pkg_metadata:
369
- risk_metrics["pkg_private_on_public_registry_risk"] = True
370
- risk_metrics["pkg_private_on_public_registry_value"] = 1
371
-
372
- # If the package has fewer than minimum number of versions
373
- if len(versions):
374
- if len(versions) < config.pkg_min_versions:
375
- risk_metrics["pkg_min_versions_risk"] = True
376
- risk_metrics["pkg_min_versions_value"] = len(versions)
377
- # Check if the latest version is deprecated
378
- if latest_version and latest_version.get("yanked"):
379
- latest_deprecated = True
380
-
381
- # Created and modified time related checks
382
- if first_version and latest_version:
383
- created = first_version.get("upload_time")
384
- modified = latest_version.get("upload_time")
385
- if created and modified:
386
- modified_dt = datetime.fromisoformat(modified)
387
- created_dt = datetime.fromisoformat(created)
388
- mod_create_diff = modified_dt - created_dt
389
- latest_now_diff = datetime.now() - modified_dt
390
- created_now_diff = datetime.now() - created_dt
391
- risk_metrics = compute_time_risks(
392
- risk_metrics, created_now_diff, mod_create_diff, latest_now_diff
393
- )
394
-
395
- # Is the package deprecated
396
- if is_deprecated or latest_deprecated:
397
- risk_metrics["pkg_deprecated_risk"] = True
398
- risk_metrics["pkg_deprecated_value"] = 1
399
- # Add package scope related weight
400
- if scope:
401
- risk_metrics[f"pkg_{scope}_scope_risk"] = True
402
- risk_metrics[f"pkg_{scope}_scope_value"] = 1
403
-
404
- risk_metrics["risk_score"] = calculate_risk_score(risk_metrics)
405
- return risk_metrics
406
-
407
-
408
- def npm_pkg_risk(pkg_metadata, is_private_pkg, scope):
409
- """
410
- Calculate various npm package risks based on the metadata from npm. The
411
- keys in the risk_metrics dict is based on the parameters specified in
412
- config.py and has a _risk suffix. Eg: config.pkg_min_versions would
413
- result in a boolean pkg_min_versions_risk and pkg_min_versions_value
414
-
415
- :param pkg_metadata: A dict containing the metadata of the npm package.
416
- :param is_private_pkg: Boolean to indicate if this package is private
417
- :param scope: Package scope
418
- :return: A dict containing the calculated risks and score.
419
- """
420
- # Some default values to ensure the structure is non-empty
421
- risk_metrics = {
422
- "pkg_deprecated_risk": False,
423
- "pkg_min_versions_risk": False,
424
- "created_now_quarantine_seconds_risk": False,
425
- "latest_now_max_seconds_risk": False,
426
- "mod_create_min_seconds_risk": False,
427
- "pkg_min_maintainers_risk": False,
428
- "pkg_node_version_risk": False,
429
- "pkg_private_on_public_registry_risk": False,
430
- }
431
- # Is the private package available publicly? Dependency confusion.
432
- if is_private_pkg and pkg_metadata:
433
- risk_metrics["pkg_private_on_public_registry_risk"] = True
434
- risk_metrics["pkg_private_on_public_registry_value"] = 1
435
- versions = pkg_metadata.get("versions", {})
436
- latest_version = pkg_metadata.get("dist-tags", {}).get("latest")
437
- engines_block_dict = versions.get(latest_version, {}).get("engines", {})
438
- # Check for scripts block
439
- scripts_block_dict = versions.get(latest_version, {}).get("scripts", {})
440
- is_deprecated = versions.get(latest_version, {}).get("deprecated", None)
441
- # Is the package deprecated
442
- if is_deprecated:
443
- risk_metrics["pkg_deprecated_risk"] = True
444
- risk_metrics["pkg_deprecated_value"] = 1
445
- scripts_block_list = []
446
- # There are some packages on npm with incorrectly configured scripts
447
- # block Good news is that the install portion would only for if the
448
- # scripts block is an object/dict
449
- if isinstance(scripts_block_dict, dict):
450
- scripts_block_list = [
451
- block
452
- for block in scripts_block_dict.keys()
453
- if "preinstall" in block or "postinstall" in block
454
- ]
455
-
456
- # If the package has fewer than minimum number of versions
457
- if len(versions) < config.pkg_min_versions:
458
- risk_metrics["pkg_min_versions_risk"] = True
459
- risk_metrics["pkg_min_versions_value"] = len(versions)
460
- # Time related checks
461
- time_info = pkg_metadata.get("time", {})
462
- modified = time_info.get("modified", "").replace("Z", "")
463
- created = time_info.get("created", "").replace("Z", "")
464
- if not modified and pkg_metadata.get("mtime"):
465
- modified = pkg_metadata.get("mtime").replace("Z", "")
466
- if not created and pkg_metadata.get("ctime"):
467
- created = pkg_metadata.get("ctime").replace("Z", "")
468
- latest_version_time = time_info.get(latest_version, "").replace("Z", "")
469
- if time_info and modified and created and latest_version_time:
470
- modified_dt = datetime.fromisoformat(modified)
471
- created_dt = datetime.fromisoformat(created)
472
- latest_version_time_dt = datetime.fromisoformat(latest_version_time)
473
- mod_create_diff = modified_dt - created_dt
474
- latest_now_diff = datetime.now() - latest_version_time_dt
475
- created_now_diff = datetime.now() - created_dt
476
- risk_metrics = compute_time_risks(
477
- risk_metrics, created_now_diff, mod_create_diff, latest_now_diff
478
- )
479
-
480
- # Maintainers count related risk. Ignore packages that are past
481
- # quarantine period
482
- maintainers = pkg_metadata.get("maintainers", [])
483
- if len(maintainers) < config.pkg_min_maintainers and risk_metrics.get(
484
- "created_now_quarantine_seconds_risk"
485
- ):
486
- risk_metrics["pkg_min_maintainers_risk"] = True
487
- risk_metrics["pkg_min_maintainers_value"] = len(maintainers)
488
- # Check for install scripts risk only for those packages with
489
- # maintainers risk
490
- if scripts_block_list:
491
- risk_metrics["pkg_install_scripts_risk"] = True
492
- risk_metrics["pkg_install_scripts_value"] = len(scripts_block_list)
493
-
494
- # Users count related risk. Ignore packages that are past quarantine period
495
- users = pkg_metadata.get("users", [])
496
- if (
497
- users
498
- and len(users) < config.pkg_min_users
499
- and risk_metrics.get("created_now_quarantine_seconds_risk")
500
- ):
501
- risk_metrics["pkg_min_users_risk"] = True
502
- risk_metrics["pkg_min_users_value"] = len(users)
503
- # Node engine version There are packages with incorrect node engine
504
- # specification which we can ignore for now
505
- if (
506
- engines_block_dict
507
- and isinstance(engines_block_dict, dict)
508
- and engines_block_dict.get("node")
509
- and isinstance(engines_block_dict.get("node"), str)
510
- ):
511
- node_version_spec = engines_block_dict.get("node")
512
- node_version = (
513
- node_version_spec.replace(">= ", "")
514
- .replace(">=", "")
515
- .replace("> ", "")
516
- .replace(">", "")
517
- .replace("~ ", "")
518
- .replace("~", "")
519
- .split(" ")[0]
520
- )
521
- for ver in config.pkg_node_version.split(","):
522
- if node_version.startswith(ver):
523
- risk_metrics["pkg_node_version_risk"] = True
524
- risk_metrics["pkg_node_version_value"] = 1
525
- break
526
- # Add package scope related weight
527
- if scope:
528
- risk_metrics[f"pkg_{scope}_scope_risk"] = True
529
- risk_metrics[f"pkg_{scope}_scope_value"] = 1
530
-
531
- risk_metrics["risk_score"] = calculate_risk_score(risk_metrics)
532
- return risk_metrics