pdfalyzer 1.16.11__tar.gz → 1.16.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pdfalyzer might be problematic. Click here for more details.

Files changed (48) hide show
  1. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/CHANGELOG.md +6 -0
  2. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/PKG-INFO +14 -7
  3. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/README.md +9 -2
  4. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/__init__.py +9 -3
  5. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/output/tables/decoding_stats_table.py +2 -2
  6. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pyproject.toml +8 -4
  7. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/.pdfalyzer.example +0 -0
  8. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/LICENSE +0 -0
  9. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/__main__.py +0 -0
  10. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/binary/binary_scanner.py +0 -0
  11. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/config.py +0 -0
  12. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/decorators/document_model_printer.py +0 -0
  13. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/decorators/indeterminate_node.py +0 -0
  14. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/decorators/pdf_object_properties.py +0 -0
  15. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/decorators/pdf_tree_node.py +0 -0
  16. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/decorators/pdf_tree_verifier.py +0 -0
  17. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/detection/constants/binary_regexes.py +0 -0
  18. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/detection/constants/javascript_reserved_keywords.py +0 -0
  19. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/detection/javascript_hunter.py +0 -0
  20. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/detection/yaralyzer_helper.py +0 -0
  21. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/font_info.py +0 -0
  22. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/helpers/dict_helper.py +0 -0
  23. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/helpers/filesystem_helper.py +0 -0
  24. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/helpers/number_helper.py +0 -0
  25. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/helpers/pdf_object_helper.py +8 -8
  26. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/helpers/rich_text_helper.py +0 -0
  27. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/helpers/string_helper.py +0 -0
  28. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/output/character_mapping.py +0 -0
  29. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/output/layout.py +0 -0
  30. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/output/pdfalyzer_presenter.py +0 -0
  31. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/output/styles/node_colors.py +0 -0
  32. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/output/styles/rich_theme.py +0 -0
  33. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/output/tables/font_summary_table.py +0 -0
  34. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/output/tables/pdf_node_rich_table.py +0 -0
  35. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/output/tables/stream_objects_table.py +0 -0
  36. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/pdf_object_relationship.py +0 -0
  37. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/pdfalyzer.py +0 -0
  38. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/util/adobe_strings.py +0 -0
  39. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/util/argument_parser.py +0 -0
  40. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/util/debugging.py +0 -0
  41. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/util/exceptions.py +0 -0
  42. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/util/pdf_parser_manager.py +0 -0
  43. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/yara_rules/PDF.yara +0 -0
  44. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/yara_rules/PDF_binary_stream.yara +0 -0
  45. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/yara_rules/__init.py__ +0 -0
  46. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/yara_rules/didier_stevens.yara +0 -0
  47. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/yara_rules/lprat.static_file_analysis.yara +0 -0
  48. {pdfalyzer-1.16.11 → pdfalyzer-1.16.13}/pdfalyzer/yara_rules/pdf_malware.yara +0 -0
@@ -1,5 +1,11 @@
1
1
  # NEXT RELEASE
2
2
 
3
+ ### 1.16.13
4
+ * Bump `yaralyzer` to v1.0.7 and fix reference to yaralyzer's renamed `prefix_with_style()` method
5
+
6
+ ### 1.16.12
7
+ * Bump `PyPDF` to v6.0.0
8
+
3
9
  ### 1.16.11
4
10
  * Fix typo in `combine_pdfs` help
5
11
  * Add some more PyPi classifiers
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pdfalyzer
3
- Version: 1.16.11
3
+ Version: 1.16.13
4
4
  Summary: PDF analysis tool. Scan a PDF with YARA rules, visualize its inner tree-like data structure in living color (lots of colors), force decodes of suspicious font binaries, and more.
5
5
  Home-page: https://github.com/michelcrypt4d4mus/pdfalyzer
6
6
  License: GPL-3.0-or-later
7
- Keywords: ascii art,binary,color,cybersecurity,DFIR,encoding,font,infosec,maldoc,malicious pdf,malware,malware analysis,pdf,pdfs,pdf analysis,pypdf,threat assessment,visualization,yara
7
+ Keywords: ascii art,binary,color,cybersecurity,DFIR,encoding,font,infosec,maldoc,malicious pdf,malware,malware analysis,pdf,pdfs,pdf analysis,pypdf,threat assessment,threat hunting,threat intelligence,threat research,threatintel,visualization,yara
8
8
  Author: Michel de Cryptadamus
9
9
  Author-email: michel@cryptadamus.com
10
- Requires-Python: >=3.9.2,<4.0.0
10
+ Requires-Python: >=3.9.2,<4.0
11
11
  Classifier: Development Status :: 5 - Production/Stable
12
12
  Classifier: Environment :: Console
13
13
  Classifier: Intended Audience :: Information Technology
@@ -23,8 +23,8 @@ Classifier: Topic :: Artistic Software
23
23
  Classifier: Topic :: Scientific/Engineering :: Visualization
24
24
  Classifier: Topic :: Security
25
25
  Requires-Dist: anytree (>=2.13,<3.0)
26
- Requires-Dist: pypdf (>=5.9.0,<6.0.0)
27
- Requires-Dist: yaralyzer (>=1.0.4,<2.0.0)
26
+ Requires-Dist: pypdf (>=6.0.0,<7.0.0)
27
+ Requires-Dist: yaralyzer (>=1.0.7,<2.0.0)
28
28
  Project-URL: Changelog, https://github.com/michelcrypt4d4mus/pdfalyzer/blob/master/CHANGELOG.md
29
29
  Project-URL: Documentation, https://github.com/michelcrypt4d4mus/pdfalyzer
30
30
  Project-URL: Repository, https://github.com/michelcrypt4d4mus/pdfalyzer
@@ -65,10 +65,12 @@ If you're looking for one of these things this may be the tool for you.
65
65
  ### What It Don't Do
66
66
  This tool is mostly for examining/working with a PDF's data and logical structure. As such it doesn't have much to offer as far as extracting text, rendering[^3], writing, etc. etc.
67
67
 
68
+ If you suspect you are dealing with a malcious PDF you can safely run `pdfalyze` on it; embedded javascript etc. will not be executed. If you want to actually look at the contents of a suspect PDF you can use [`dangerzone`](https://dangerzone.rocks/) to sanitize the contents with extreme prejudice before opening it.
69
+
68
70
  -------------
69
71
 
70
72
  # Installation
71
-
73
+ #### All Platforms
72
74
  Installation with [pipx](https://pypa.github.io/pipx/)[^4] is preferred though `pip3` / `pip` should also work.
73
75
  ```sh
74
76
  pipx install pdfalyzer
@@ -76,7 +78,12 @@ pipx install pdfalyzer
76
78
 
77
79
  See [PyPDF installation notes](https://github.com/py-pdf/pypdf#installation) about `PyCryptodome` if you plan to `pdfalyze` any files that use AES encryption.
78
80
 
79
- If you are on macOS someone out there was kind enough to make [The Pdfalyzer available via homebrew](https://formulae.brew.sh/formula/pdfalyzer) so `brew install pdfalyzer` should work.
81
+ #### macOS Homebrew
82
+ If you are on macOS and use `homebrew` someone out there was kind enough to make [The Pdfalyzer available via homebrew](https://formulae.brew.sh/formula/pdfalyzer) so this should work:
83
+
84
+ ```sh
85
+ brew install pdfalyzer
86
+ ```
80
87
 
81
88
  ### Troubleshooting
82
89
  1. If you used `pip3` instead of `pipx` and have an issue you should try to install with `pipx`.
@@ -33,10 +33,12 @@ If you're looking for one of these things this may be the tool for you.
33
33
  ### What It Don't Do
34
34
  This tool is mostly for examining/working with a PDF's data and logical structure. As such it doesn't have much to offer as far as extracting text, rendering[^3], writing, etc. etc.
35
35
 
36
+ If you suspect you are dealing with a malcious PDF you can safely run `pdfalyze` on it; embedded javascript etc. will not be executed. If you want to actually look at the contents of a suspect PDF you can use [`dangerzone`](https://dangerzone.rocks/) to sanitize the contents with extreme prejudice before opening it.
37
+
36
38
  -------------
37
39
 
38
40
  # Installation
39
-
41
+ #### All Platforms
40
42
  Installation with [pipx](https://pypa.github.io/pipx/)[^4] is preferred though `pip3` / `pip` should also work.
41
43
  ```sh
42
44
  pipx install pdfalyzer
@@ -44,7 +46,12 @@ pipx install pdfalyzer
44
46
 
45
47
  See [PyPDF installation notes](https://github.com/py-pdf/pypdf#installation) about `PyCryptodome` if you plan to `pdfalyze` any files that use AES encryption.
46
48
 
47
- If you are on macOS someone out there was kind enough to make [The Pdfalyzer available via homebrew](https://formulae.brew.sh/formula/pdfalyzer) so `brew install pdfalyzer` should work.
49
+ #### macOS Homebrew
50
+ If you are on macOS and use `homebrew` someone out there was kind enough to make [The Pdfalyzer available via homebrew](https://formulae.brew.sh/formula/pdfalyzer) so this should work:
51
+
52
+ ```sh
53
+ brew install pdfalyzer
54
+ ```
48
55
 
49
56
  ### Troubleshooting
50
57
  1. If you used `pip3` instead of `pipx` and have an issue you should try to install with `pipx`.
@@ -19,7 +19,7 @@ if not environ.get('INVOKED_BY_PYTEST', False):
19
19
  from rich.columns import Columns
20
20
  from rich.panel import Panel
21
21
  from rich.text import Text
22
- from yaralyzer.helpers.rich_text_helper import prefix_with_plain_text_obj
22
+ from yaralyzer.helpers.rich_text_helper import prefix_with_style
23
23
  from yaralyzer.output.file_export import invoke_rich_export
24
24
  from yaralyzer.output.rich_console import console
25
25
  from yaralyzer.util.logging import log_and_print
@@ -83,7 +83,7 @@ def pdfalyzer_show_color_theme() -> None:
83
83
  console.print(Panel('The Pdfalyzer Color Theme', style='reverse'))
84
84
 
85
85
  colors = [
86
- prefix_with_plain_text_obj(name[:MAX_THEME_COL_SIZE], style=str(style)).append(' ')
86
+ prefix_with_style(name[:MAX_THEME_COL_SIZE], style=str(style)).append(' ')
87
87
  for name, style in PDFALYZER_THEME_DICT.items()
88
88
  if name not in ['reset', 'repr_url']
89
89
  ]
@@ -93,7 +93,7 @@ def pdfalyzer_show_color_theme() -> None:
93
93
 
94
94
  def combine_pdfs():
95
95
  """
96
- Utility method to combine multiple PDFs into one. Invocable with 'combine_pdfs PDF1 [PDF2...]'.
96
+ Script method to combine multiple PDFs into one. Invocable with 'combine_pdfs PDF1 [PDF2...]'.
97
97
  Example: https://github.com/py-pdf/pypdf/blob/main/docs/user/merging-pdfs.md
98
98
  """
99
99
  args = parse_combine_pdfs_args()
@@ -130,3 +130,9 @@ def combine_pdfs():
130
130
  txt = Text('').append(f" -> Wrote ")
131
131
  txt.append(str(file_size_in_mb(args.output_file)), style='cyan').append(" megabytes\n")
132
132
  print_highlighted(txt)
133
+
134
+
135
+ # TODO: migrate this functionality from clown_sort
136
+ # def extract_pages_from_pdf() -> None:
137
+ # args = parse_pdf_page_extraction_args()
138
+ # PdfFile(args.pdf_file).extract_page_range(args.page_range, destination_dir=args.destination_dir)
@@ -5,7 +5,7 @@ from numbers import Number
5
5
 
6
6
  from rich.table import Table
7
7
  from rich.text import Text
8
- from yaralyzer.helpers.rich_text_helper import CENTER, na_txt, prefix_with_plain_text_obj
8
+ from yaralyzer.helpers.rich_text_helper import CENTER, na_txt, prefix_with_style
9
9
 
10
10
  from pdfalyzer.binary.binary_scanner import BinaryScanner
11
11
  from pdfalyzer.helpers.rich_text_helper import pct_txt
@@ -60,7 +60,7 @@ def build_decoding_stats_table(scanner: BinaryScanner) -> Table:
60
60
 
61
61
  def _new_decoding_stats_table(title) -> Table:
62
62
  """Build an empty table for displaying decoding stats"""
63
- title = prefix_with_plain_text_obj(title, style='blue underline')
63
+ title = prefix_with_style(title, style='blue underline')
64
64
  title.append(": Decoding Attempts Summary Statistics", style='bright_white bold')
65
65
 
66
66
  table = Table(
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "pdfalyzer"
3
- version = "1.16.11"
3
+ version = "1.16.13"
4
4
  description = "PDF analysis tool. Scan a PDF with YARA rules, visualize its inner tree-like data structure in living color (lots of colors), force decodes of suspicious font binaries, and more."
5
5
  authors = ["Michel de Cryptadamus <michel@cryptadamus.com>"]
6
6
  license = "GPL-3.0-or-later"
@@ -49,6 +49,10 @@ keywords = [
49
49
  "pdf analysis",
50
50
  "pypdf",
51
51
  "threat assessment",
52
+ "threat hunting",
53
+ "threat intelligence",
54
+ "threat research",
55
+ "threatintel",
52
56
  "visualization",
53
57
  "yara"
54
58
  ]
@@ -62,10 +66,10 @@ packages = [
62
66
  # Dependencies #
63
67
  #####################
64
68
  [tool.poetry.dependencies]
65
- python = "^3.9.2"
69
+ python = "^3.9,>=3.9.2"
66
70
  anytree = "~=2.13"
67
- pypdf = "^5.9.0"
68
- yaralyzer = "^1.0.4"
71
+ pypdf = "^6.0.0"
72
+ yaralyzer = "^1.0.7"
69
73
 
70
74
  [tool.poetry.group.dev.dependencies]
71
75
  flake8 = "^7.3.0"
File without changes
@@ -8,19 +8,14 @@ from pypdf.generic import IndirectObject, PdfObject
8
8
  from pdfalyzer.pdf_object_relationship import PdfObjectRelationship
9
9
 
10
10
 
11
- def pdf_object_id(pdf_object) -> Optional[int]:
12
- """Return the ID of an IndirectObject and None for everything else"""
13
- return pdf_object.idnum if isinstance(pdf_object, IndirectObject) else None
14
-
15
-
16
11
  def does_list_have_any_references(_list) -> bool:
17
12
  """Return true if any element of _list is an IndirectObject."""
18
13
  return any(isinstance(item, IndirectObject) for item in _list)
19
14
 
20
15
 
21
- def _sort_pdf_object_refs(refs: List[PdfObjectRelationship]) -> List[PdfObjectRelationship]:
22
- """Sort a list of PdfObjectRelationship objects by their to_obj's idnum. Only used by pytest."""
23
- return sorted(refs, key=lambda ref: ref.to_obj.idnum)
16
+ def pdf_object_id(pdf_object) -> Optional[int]:
17
+ """Return the ID of an IndirectObject and None for everything else"""
18
+ return pdf_object.idnum if isinstance(pdf_object, IndirectObject) else None
24
19
 
25
20
 
26
21
  def pypdf_class_name(obj: PdfObject) -> str:
@@ -28,3 +23,8 @@ def pypdf_class_name(obj: PdfObject) -> str:
28
23
  class_pkgs = type(obj).__name__.split('.')
29
24
  class_pkgs.reverse()
30
25
  return class_pkgs[0].removesuffix('Object')
26
+
27
+
28
+ def _sort_pdf_object_refs(refs: List[PdfObjectRelationship]) -> List[PdfObjectRelationship]:
29
+ """Sort a list of PdfObjectRelationship objects by their to_obj's idnum. Only used by pytest."""
30
+ return sorted(refs, key=lambda ref: ref.to_obj.idnum)