pdfalyzer 1.16.10__tar.gz → 1.16.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pdfalyzer might be problematic. Click here for more details.

Files changed (48) hide show
  1. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/CHANGELOG.md +8 -0
  2. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/PKG-INFO +17 -7
  3. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/README.md +9 -2
  4. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/__init__.py +9 -6
  5. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/binary/binary_scanner.py +15 -11
  6. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/decorators/pdf_object_properties.py +13 -12
  7. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/decorators/pdf_tree_node.py +8 -5
  8. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/decorators/pdf_tree_verifier.py +7 -4
  9. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/detection/constants/binary_regexes.py +7 -7
  10. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/detection/yaralyzer_helper.py +1 -3
  11. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/font_info.py +10 -10
  12. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/helpers/filesystem_helper.py +6 -6
  13. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/helpers/pdf_object_helper.py +0 -1
  14. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/helpers/rich_text_helper.py +5 -5
  15. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/output/character_mapping.py +3 -2
  16. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/output/pdfalyzer_presenter.py +5 -3
  17. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/output/tables/decoding_stats_table.py +2 -1
  18. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/pdf_object_relationship.py +12 -12
  19. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/pdfalyzer.py +4 -5
  20. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/util/adobe_strings.py +4 -4
  21. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/util/argument_parser.py +7 -7
  22. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pyproject.toml +49 -29
  23. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/.pdfalyzer.example +0 -0
  24. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/LICENSE +0 -0
  25. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/__main__.py +0 -0
  26. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/config.py +0 -0
  27. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/decorators/document_model_printer.py +0 -0
  28. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/decorators/indeterminate_node.py +0 -0
  29. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/detection/constants/javascript_reserved_keywords.py +0 -0
  30. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/detection/javascript_hunter.py +0 -0
  31. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/helpers/dict_helper.py +0 -0
  32. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/helpers/number_helper.py +0 -0
  33. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/helpers/string_helper.py +0 -0
  34. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/output/layout.py +0 -0
  35. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/output/styles/node_colors.py +0 -0
  36. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/output/styles/rich_theme.py +0 -0
  37. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/output/tables/font_summary_table.py +0 -0
  38. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/output/tables/pdf_node_rich_table.py +0 -0
  39. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/output/tables/stream_objects_table.py +0 -0
  40. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/util/debugging.py +1 -1
  41. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/util/exceptions.py +0 -0
  42. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/util/pdf_parser_manager.py +0 -0
  43. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/yara_rules/PDF.yara +0 -0
  44. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/yara_rules/PDF_binary_stream.yara +0 -0
  45. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/yara_rules/__init.py__ +0 -0
  46. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/yara_rules/didier_stevens.yara +0 -0
  47. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/yara_rules/lprat.static_file_analysis.yara +0 -0
  48. {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/yara_rules/pdf_malware.yara +0 -0
@@ -1,5 +1,13 @@
1
1
  # NEXT RELEASE
2
2
 
3
+ ### 1.16.12
4
+ * Bump `PyPDF` to v6.0.0
5
+
6
+ ### 1.16.11
7
+ * Fix typo in `combine_pdfs` help
8
+ * Add some more PyPi classifiers
9
+ * Add a `.flake8` config and fix a bunch of style issues
10
+
3
11
  ### 1.16.10
4
12
  * Add `Environment :: Console` and `Programming Language :: Python` to pypi classifiers
5
13
  * Add `.pdfalyzer.example` to PyPi package
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pdfalyzer
3
- Version: 1.16.10
4
- Summary: A PDF analysis toolkit. Scan a PDF with relevant YARA rules, visualize its inner tree-like data structure in living color (lots of colors), force decodes of suspicious font binaries, and more.
3
+ Version: 1.16.12
4
+ Summary: PDF analysis tool. Scan a PDF with YARA rules, visualize its inner tree-like data structure in living color (lots of colors), force decodes of suspicious font binaries, and more.
5
5
  Home-page: https://github.com/michelcrypt4d4mus/pdfalyzer
6
6
  License: GPL-3.0-or-later
7
- Keywords: ascii art,binary,color,cybersecurity,DFIR,encoding,font,infosec,maldoc,malicious pdf,malware,malware analysis,pdf,pdfs,pdf analysis,threat assessment,visualization,yara
7
+ Keywords: ascii art,binary,color,cybersecurity,DFIR,encoding,font,infosec,maldoc,malicious pdf,malware,malware analysis,pdf,pdfs,pdf analysis,pypdf,threat assessment,threat hunting,threat intelligence,threat research,threatintel,visualization,yara
8
8
  Author: Michel de Cryptadamus
9
9
  Author-email: michel@cryptadamus.com
10
- Requires-Python: >=3.9.2,<4.0.0
10
+ Requires-Python: >=3.9.2,<4.0
11
11
  Classifier: Development Status :: 5 - Production/Stable
12
12
  Classifier: Environment :: Console
13
13
  Classifier: Intended Audience :: Information Technology
@@ -16,11 +16,14 @@ Classifier: Programming Language :: Python
16
16
  Classifier: Programming Language :: Python :: 3
17
17
  Classifier: Programming Language :: Python :: 3.10
18
18
  Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Programming Language :: Python :: 3.9
19
22
  Classifier: Topic :: Artistic Software
20
23
  Classifier: Topic :: Scientific/Engineering :: Visualization
21
24
  Classifier: Topic :: Security
22
25
  Requires-Dist: anytree (>=2.13,<3.0)
23
- Requires-Dist: pypdf (>=5.9.0,<6.0.0)
26
+ Requires-Dist: pypdf (>=6.0.0,<7.0.0)
24
27
  Requires-Dist: yaralyzer (>=1.0.4,<2.0.0)
25
28
  Project-URL: Changelog, https://github.com/michelcrypt4d4mus/pdfalyzer/blob/master/CHANGELOG.md
26
29
  Project-URL: Documentation, https://github.com/michelcrypt4d4mus/pdfalyzer
@@ -62,10 +65,12 @@ If you're looking for one of these things this may be the tool for you.
62
65
  ### What It Don't Do
63
66
  This tool is mostly for examining/working with a PDF's data and logical structure. As such it doesn't have much to offer as far as extracting text, rendering[^3], writing, etc. etc.
64
67
 
68
+ If you suspect you are dealing with a malcious PDF you can safely run `pdfalyze` on it; embedded javascript etc. will not be executed. If you want to actually look at the contents of a suspect PDF you can use [`dangerzone`](https://dangerzone.rocks/) to sanitize the contents with extreme prejudice before opening it.
69
+
65
70
  -------------
66
71
 
67
72
  # Installation
68
-
73
+ #### All Platforms
69
74
  Installation with [pipx](https://pypa.github.io/pipx/)[^4] is preferred though `pip3` / `pip` should also work.
70
75
  ```sh
71
76
  pipx install pdfalyzer
@@ -73,7 +78,12 @@ pipx install pdfalyzer
73
78
 
74
79
  See [PyPDF installation notes](https://github.com/py-pdf/pypdf#installation) about `PyCryptodome` if you plan to `pdfalyze` any files that use AES encryption.
75
80
 
76
- If you are on macOS someone out there was kind enough to make [The Pdfalyzer available via homebrew](https://formulae.brew.sh/formula/pdfalyzer) so `brew install pdfalyzer` should work.
81
+ #### macOS Homebrew
82
+ If you are on macOS and use `homebrew` someone out there was kind enough to make [The Pdfalyzer available via homebrew](https://formulae.brew.sh/formula/pdfalyzer) so this should work:
83
+
84
+ ```sh
85
+ brew install pdfalyzer
86
+ ```
77
87
 
78
88
  ### Troubleshooting
79
89
  1. If you used `pip3` instead of `pipx` and have an issue you should try to install with `pipx`.
@@ -33,10 +33,12 @@ If you're looking for one of these things this may be the tool for you.
33
33
  ### What It Don't Do
34
34
  This tool is mostly for examining/working with a PDF's data and logical structure. As such it doesn't have much to offer as far as extracting text, rendering[^3], writing, etc. etc.
35
35
 
36
+ If you suspect you are dealing with a malcious PDF you can safely run `pdfalyze` on it; embedded javascript etc. will not be executed. If you want to actually look at the contents of a suspect PDF you can use [`dangerzone`](https://dangerzone.rocks/) to sanitize the contents with extreme prejudice before opening it.
37
+
36
38
  -------------
37
39
 
38
40
  # Installation
39
-
41
+ #### All Platforms
40
42
  Installation with [pipx](https://pypa.github.io/pipx/)[^4] is preferred though `pip3` / `pip` should also work.
41
43
  ```sh
42
44
  pipx install pdfalyzer
@@ -44,7 +46,12 @@ pipx install pdfalyzer
44
46
 
45
47
  See [PyPDF installation notes](https://github.com/py-pdf/pypdf#installation) about `PyCryptodome` if you plan to `pdfalyze` any files that use AES encryption.
46
48
 
47
- If you are on macOS someone out there was kind enough to make [The Pdfalyzer available via homebrew](https://formulae.brew.sh/formula/pdfalyzer) so `brew install pdfalyzer` should work.
49
+ #### macOS Homebrew
50
+ If you are on macOS and use `homebrew` someone out there was kind enough to make [The Pdfalyzer available via homebrew](https://formulae.brew.sh/formula/pdfalyzer) so this should work:
51
+
52
+ ```sh
53
+ brew install pdfalyzer
54
+ ```
48
55
 
49
56
  ### Troubleshooting
50
57
  1. If you used `pip3` instead of `pipx` and have an issue you should try to install with `pipx`.
@@ -1,7 +1,6 @@
1
1
  import code
2
2
  import sys
3
3
  from os import environ, getcwd, path
4
- from pathlib import Path
5
4
 
6
5
  from dotenv import load_dotenv
7
6
  from pypdf import PdfWriter
@@ -23,7 +22,7 @@ from rich.text import Text
23
22
  from yaralyzer.helpers.rich_text_helper import prefix_with_plain_text_obj
24
23
  from yaralyzer.output.file_export import invoke_rich_export
25
24
  from yaralyzer.output.rich_console import console
26
- from yaralyzer.util.logging import log, log_and_print
25
+ from yaralyzer.util.logging import log_and_print
27
26
 
28
27
  from pdfalyzer.helpers.filesystem_helper import file_size_in_mb, set_max_open_files
29
28
  from pdfalyzer.helpers.rich_text_helper import print_highlighted
@@ -51,8 +50,8 @@ def pdfalyze():
51
50
  log_and_print(f"Binary stream extraction complete, files written to '{args.output_dir}'.\nExiting.\n")
52
51
  sys.exit()
53
52
 
54
- # The method that gets called is related to the argument name. See 'possible_output_sections' list in argument_parser.py
55
- # Analysis exports wrap themselves around the methods that actually generate the analyses
53
+ # The method that gets called is related to the argument name. See 'possible_output_sections' list in
54
+ # argument_parser.py. Analysis exports wrap themselves around the methods that actually generate the analyses.
56
55
  for (arg, method) in output_sections(args, pdfalyzer):
57
56
  if args.output_dir:
58
57
  output_basepath = PdfalyzerConfig.get_output_basepath(method)
@@ -89,7 +88,7 @@ def pdfalyzer_show_color_theme() -> None:
89
88
  if name not in ['reset', 'repr_url']
90
89
  ]
91
90
 
92
- console.print(Columns(colors, column_first=True, padding=(0,3)))
91
+ console.print(Columns(colors, column_first=True, padding=(0, 3)))
93
92
 
94
93
 
95
94
  def combine_pdfs():
@@ -114,7 +113,11 @@ def combine_pdfs():
114
113
  for i, page in enumerate(merger.pages):
115
114
  if args.image_quality < MAX_QUALITY:
116
115
  for j, img in enumerate(page.images):
117
- print_highlighted(f" -> Reducing image #{j + 1} quality on page {i + 1} to {args.image_quality}...", style='dim')
116
+ print_highlighted(
117
+ f" -> Reducing image #{j + 1} quality on page {i + 1} to {args.image_quality}...",
118
+ style='dim'
119
+ )
120
+
118
121
  img.replace(img.image, quality=args.image_quality)
119
122
 
120
123
  print_highlighted(f" -> Compressing page {i + 1}...", style='dim')
@@ -21,7 +21,7 @@ from yaralyzer.util.logging import log
21
21
  from pdfalyzer.config import PdfalyzerConfig
22
22
  from pdfalyzer.decorators.pdf_tree_node import PdfTreeNode
23
23
  from pdfalyzer.detection.constants.binary_regexes import (BACKTICK, DANGEROUS_PDF_KEYS_TO_HUNT_ONLY_IN_FONTS,
24
- DANGEROUS_PDF_KEYS_TO_HUNT_ONLY_IN_FONTS, DANGEROUS_STRINGS, FRONTSLASH, GUILLEMET, QUOTE_PATTERNS)
24
+ DANGEROUS_STRINGS, FRONTSLASH, GUILLEMET, QUOTE_PATTERNS)
25
25
  from pdfalyzer.helpers.string_helper import generate_hyphen_line
26
26
  from pdfalyzer.output.layout import print_headline_panel, print_section_sub_subheader
27
27
  from pdfalyzer.util.adobe_strings import CONTENTS, CURRENTFILE_EEXEC, FONT_FILE_KEYS
@@ -36,7 +36,7 @@ class BinaryScanner:
36
36
  self.stream_length = len(_bytes)
37
37
 
38
38
  if label is None and isinstance(owner, PdfTreeNode):
39
- self.label = owner.__rich__()
39
+ self.label = owner.__rich__()
40
40
 
41
41
  self.suppression_notice_queue = []
42
42
  self.regex_extraction_stats = defaultdict(lambda: RegexMatchMetrics())
@@ -86,8 +86,12 @@ class BinaryScanner:
86
86
  print_headline_panel(msg, style='dim')
87
87
  continue
88
88
 
89
+ print_section_sub_subheader(
90
+ f"Forcing Decode of {quote_type.capitalize()} Quoted Strings",
91
+ style=BYTES_NO_DIM
92
+ )
93
+
89
94
  quote_pattern = QUOTE_PATTERNS[quote_type]
90
- print_section_sub_subheader(f"Forcing Decode of {quote_type.capitalize()} Quoted Strings", style=BYTES_NO_DIM)
91
95
  yaralyzer = self._quote_yaralyzer(quote_pattern, quote_type)
92
96
  self.process_yara_matches(yaralyzer, f"{quote_type}_quoted")
93
97
 
@@ -135,7 +139,7 @@ class BinaryScanner:
135
139
  def process_yara_matches(self, yaralyzer: Yaralyzer, pattern: str, force: bool = False) -> None:
136
140
  """Decide whether to attempt to decode the matched bytes, track stats. force param ignores min/max length."""
137
141
  for bytes_match, decoder in yaralyzer.match_iterator():
138
- log.debug(f"Trackings stats for match: {pattern}, bytes_match: {bytes_match}, is_decodable: {bytes_match.is_decodable()}")
142
+ log.debug(f"Trackings match stats for {pattern}, bytes_match: {bytes_match}, is_decodable: {bytes_match.is_decodable()}") # noqa: E501
139
143
 
140
144
  # Send suppressed decodes to a queue and track the reason for the suppression in the stats
141
145
  if not (bytes_match.is_decodable() or force):
@@ -145,7 +149,7 @@ class BinaryScanner:
145
149
  # Print out any queued suppressed notices before printing non suppressed matches
146
150
  self._print_suppression_notices()
147
151
  console.print(decoder)
148
- self.regex_extraction_stats[pattern].tally_match(decoder) # TODO: This call must come after print(decoder)
152
+ self.regex_extraction_stats[pattern].tally_match(decoder) # TODO: This call must come after print(decoder)
149
153
 
150
154
  self._print_suppression_notices()
151
155
 
@@ -167,12 +171,12 @@ class BinaryScanner:
167
171
  return self._pattern_yaralyzer(quote_pattern, REGEX, label, label)
168
172
 
169
173
  def _pattern_yaralyzer(
170
- self,
171
- pattern: str,
172
- pattern_type: str,
173
- rules_label: Optional[str] = None,
174
- pattern_label: Optional[str] = None
175
- ) -> Yaralyzer:
174
+ self,
175
+ pattern: str,
176
+ pattern_type: str,
177
+ rules_label: Optional[str] = None,
178
+ pattern_label: Optional[str] = None
179
+ ) -> Yaralyzer:
176
180
  """Build a yaralyzer to scan self.bytes"""
177
181
  return Yaralyzer.for_patterns(
178
182
  patterns=[escape_yara_pattern(pattern)],
@@ -16,13 +16,14 @@ from pdfalyzer.util.adobe_strings import *
16
16
 
17
17
  class PdfObjectProperties:
18
18
  """Simple class to extract critical features of a PdfObject."""
19
+
19
20
  def __init__(
20
- self,
21
- pdf_object: PdfObject,
22
- address: str,
23
- idnum: int,
24
- indirect_object: Optional[IndirectObject] = None
25
- ):
21
+ self,
22
+ pdf_object: PdfObject,
23
+ address: str,
24
+ idnum: int,
25
+ indirect_object: Optional[IndirectObject] = None
26
+ ):
26
27
  self.idnum = idnum
27
28
  self.obj = pdf_object
28
29
  self.indirect_object = indirect_object
@@ -57,7 +58,7 @@ class PdfObjectProperties:
57
58
  else:
58
59
  self.first_address = address
59
60
 
60
- log.debug(f"Node ID: {self.idnum}, type: {self.type}, subtype: {self.sub_type}, " + \
61
+ log.debug(f"Node ID: {self.idnum}, type: {self.type}, subtype: {self.sub_type}, " +
61
62
  f"label: {self.label}, first_address: {self.first_address}")
62
63
 
63
64
  @classmethod
@@ -80,11 +81,11 @@ class PdfObjectProperties:
80
81
 
81
82
  @classmethod
82
83
  def to_table_row(
83
- cls,
84
- reference_key: str,
85
- obj: PdfObject,
86
- is_single_row_table: bool = False
87
- ) -> List[Union[Text, str]]:
84
+ cls,
85
+ reference_key: str,
86
+ obj: PdfObject,
87
+ is_single_row_table: bool = False
88
+ ) -> List[Union[Text, str]]:
88
89
  """PDF object property at reference_key becomes a formatted 3-tuple for use in Rich tables."""
89
90
  with_resolved_refs = cls.resolve_references(reference_key, obj)
90
91
 
@@ -6,7 +6,7 @@ Child/parent relationships should be set using the add_child() and set_parent()
6
6
  methods and not set directly. (TODO: this could be done better with anytree
7
7
  hooks)
8
8
  """
9
- from typing import Callable, List, Optional, Set
9
+ from typing import Callable, List, Optional
10
10
 
11
11
  from anytree import NodeMixin, SymlinkNode
12
12
  from pypdf.errors import PdfReadError
@@ -163,11 +163,14 @@ class PdfTreeNode(NodeMixin, PdfObjectProperties):
163
163
  return None
164
164
  else:
165
165
  address = refs_to_this_node[0].address
166
+
166
167
  # If other node's label doesn't start with a NON_STANDARD_ADDRESS string
167
- # and any of the relationships pointing at this node use something other than a
168
- # NON_STANDARD_ADDRESS_NODES string to refer here, print a warning about multiple refs.
169
- if not (is_prefixed_by_any(from_node.label, NON_STANDARD_ADDRESS_NODES) or \
170
- all(ref.address in NON_STANDARD_ADDRESS_NODES for ref in refs_to_this_node)):
168
+ # AND any of the relationships pointing at this node use something other than a
169
+ # NON_STANDARD_ADDRESS_NODES string to refer here,
170
+ # then print a warning about multiple refs.
171
+ if not (is_prefixed_by_any(from_node.label, NON_STANDARD_ADDRESS_NODES)
172
+ or
173
+ all(ref.address in NON_STANDARD_ADDRESS_NODES for ref in refs_to_this_node)):
171
174
  refs_to_this_node_str = "\n ".join([f"{i + 1}. {r}" for i, r in enumerate(refs_to_this_node)])
172
175
  msg = f"Multiple refs from {from_node} to {self}:\n {refs_to_this_node_str}"
173
176
  log.warning(msg + f"\nCommon address of refs: {address}")
@@ -37,7 +37,10 @@ class PdfTreeVerifier:
37
37
  log.warning(f"Methodd doesn't check revisions but this doc is generation {self.pdfalyzer.max_generation}")
38
38
 
39
39
  # We expect to see all ordinals up to the number of nodes /Trailer claims exist as obj. IDs.
40
- missing_node_ids = [i for i in range(1, self.pdfalyzer.pdf_size) if self.pdfalyzer.find_node_by_idnum(i) is None]
40
+ missing_node_ids = [
41
+ i for i in range(1, self.pdfalyzer.pdf_size)
42
+ if self.pdfalyzer.find_node_by_idnum(i) is None
43
+ ]
41
44
 
42
45
  for idnum in missing_node_ids:
43
46
  ref = IndirectObject(idnum, self.pdfalyzer.max_generation, self.pdfalyzer.pdf_reader)
@@ -57,13 +60,13 @@ class PdfTreeVerifier:
57
60
  log.error(f"Cannot find ref {ref} in PDF!")
58
61
  continue
59
62
  elif isinstance(obj, (NumberObject, NameObject)):
60
- log.info(f"Obj {idnum} is a {type(obj)} w/value {obj}; if relationshipd by /Length etc. this is a nonissue but maybe worth doublechecking")
63
+ log.info(f"Obj {idnum} is a {type(obj)} w/value {obj}; if relationshipd by /Length etc. this is a nonissue but maybe worth doublechecking") # noqa: E501
61
64
  continue
62
65
  elif not isinstance(obj, dict):
63
- log.error(f"Obj {idnum} ({obj}) of type {type(obj)} isn't dict, cannot determine if it should be in tree")
66
+ log.error(f"Obj {idnum} ({obj}) of type {type(obj)} isn't dict, cannot determine if it should be in tree") # noqa: E501
64
67
  continue
65
68
  elif TYPE not in obj:
66
- msg = f"Obj {idnum} has no {TYPE} and is not in tree. Either a loose node w/no data or an error in pdfalyzer."
69
+ msg = f"Obj {idnum} has no {TYPE} and is not in tree. Either a loose node w/no data or an error in pdfalyzer." # noqa: E501
67
70
  msg += f"\nHere's the contents for you to assess:\n{obj}"
68
71
  log.warning(msg)
69
72
  continue
@@ -36,13 +36,13 @@ PARENTHESES = 'parentheses'
36
36
 
37
37
  QUOTE_PATTERNS = {
38
38
  BACKTICK: '`.+`',
39
- BRACKET: '\\[.+\\]', # { 91 [-] 93 }
40
- CURLY_BRACKET: '{.+}', # { 123 [-] 125 }
41
- DOUBLE_LESS_THAN: '<<.+>>', # Hex { 60 60 [-] 62 62 }
39
+ BRACKET: '\\[.+\\]', # { 91 [-] 93 }
40
+ CURLY_BRACKET: '{.+}', # { 123 [-] 125 }
41
+ DOUBLE_LESS_THAN: '<<.+>>', # Hex { 60 60 [-] 62 62 }
42
42
  ESCAPED_SINGLE: "\\'.+\\'",
43
43
  ESCAPED_DOUBLE: '\\".+\\"',
44
- FRONTSLASH: '/.+/', # { 47 [-] 47 }
45
- GUILLEMET: 'AB [-] BB', # Guillemet quotes are not ANSI so require byte pattern
46
- LESS_THAN: '<.+>', # Hex { 60 [-] 62 }
47
- PARENTHESES: '\\(.+\\)', # Hex { 28 [-] 29 }
44
+ FRONTSLASH: '/.+/', # { 47 [-] 47 }
45
+ GUILLEMET: 'AB [-] BB', # Guillemet quotes are not ANSI so require byte pattern
46
+ LESS_THAN: '<.+>', # Hex { 60 [-] 62 }
47
+ PARENTHESES: '\\(.+\\)', # Hex { 28 [-] 29 }
48
48
  }
@@ -8,8 +8,6 @@ from typing import Optional, Union
8
8
  from yaralyzer.config import YaralyzerConfig
9
9
  from yaralyzer.yaralyzer import Yaralyzer
10
10
 
11
- from pdfalyzer.config import PdfalyzerConfig
12
-
13
11
  YARA_RULES_DIR = files('pdfalyzer').joinpath('yara_rules')
14
12
 
15
13
  YARA_RULES_FILES = [
@@ -38,7 +36,7 @@ def _build_yaralyzer(scannable: Union[bytes, str], label: Optional[str] = None)
38
36
  with as_file(YARA_RULES_DIR.joinpath(YARA_RULES_FILES[2])) as yara2:
39
37
  with as_file(YARA_RULES_DIR.joinpath(YARA_RULES_FILES[3])) as yara3:
40
38
  with as_file(YARA_RULES_DIR.joinpath(YARA_RULES_FILES[4])) as yara4:
41
- # If there is a custom yara_rules argument file use that instead of the files in the yara_rules/ dir
39
+ # If there is a custom yara_rules arg, use that instead of the files in the yara_rules/ dir
42
40
  rules_paths = YaralyzerConfig.args.yara_rules_files or []
43
41
 
44
42
  if not YaralyzerConfig.args.no_default_yara_rules:
@@ -142,19 +142,19 @@ class FontInfo:
142
142
  console.line()
143
143
 
144
144
  # TODO: currently unused
145
- def preview_bytes_at_advertised_lengths(self):
146
- """Show the bytes at the boundaries provided by /Length1, /Length2, and /Length3, if they exist"""
147
- lengths = self.lengths or []
145
+ # def preview_bytes_at_advertised_lengths(self):
146
+ # """Show the bytes at the boundaries provided by /Length1, /Length2, and /Length3, if they exist"""
147
+ # lengths = self.lengths or []
148
148
 
149
- if self.lengths is None or len(lengths) <= 1:
150
- console.print("No length demarcations to preview.", style='grey.dark')
149
+ # if self.lengths is None or len(lengths) <= 1:
150
+ # console.print("No length demarcations to preview.", style='grey.dark')
151
151
 
152
- for i, demarcation in enumerate(lengths[1:]):
153
- console.print(f"{self.font_file} at /Length{i} ({demarcation}):")
154
- print(f"\n Stream before: {self.stream_data[demarcation - FONT_SECTION_PREVIEW_LEN:demarcation + 1]}")
155
- print(f"\n Stream after: {self.stream_data[demarcation:demarcation + FONT_SECTION_PREVIEW_LEN]}")
152
+ # for i, demarcation in enumerate(lengths[1:]):
153
+ # console.print(f"{self.font_file} at /Length{i} ({demarcation}):")
154
+ # print(f"\n Stream before: {self.stream_data[demarcation - FONT_SECTION_PREVIEW_LEN:demarcation + 1]}")
155
+ # print(f"\n Stream after: {self.stream_data[demarcation:demarcation + FONT_SECTION_PREVIEW_LEN]}")
156
156
 
157
- print(f"\nfinal bytes back from {self.stream_data.lengths[2]} + 10: {self.stream_data[-10 - -f.lengths[2]:]}")
157
+ # print(f"\nfinal bytes back from {self.stream_data.lengths[2]} + 10: {self.stream_data[-10 - -f.lengths[2]:]}")
158
158
 
159
159
  def __str__(self) -> str:
160
160
  return self.display_title
@@ -15,7 +15,7 @@ OPEN_FILES_BUFFER = 30 # we might have some files open already so we need
15
15
  PDF_EXT = '.pdf'
16
16
 
17
17
  # TODO: this kind of type alias is not supported until Python 3.12
18
- #type StrOrPath = Union[str, Path]
18
+ # type StrOrPath = Union[str, Path]
19
19
 
20
20
 
21
21
  def with_pdf_extension(file_path: Union[str, Path]) -> str:
@@ -92,11 +92,11 @@ def set_max_open_files(num_filehandles: int = DEFAULT_MAX_OPEN_FILES) -> tuple[O
92
92
  resource.setrlimit(resource.RLIMIT_NOFILE, (soft, hard))
93
93
  except (ValueError, resource.error):
94
94
  try:
95
- hard = soft
96
- print_highlighted(f"Retrying setting max open files (soft, hard)=({soft}, {hard})", style='yellow')
97
- resource.setrlimit(resource.RLIMIT_NOFILE, (soft, hard))
95
+ hard = soft
96
+ print_highlighted(f"Retrying setting max open files (soft, hard)=({soft}, {hard})", style='yellow')
97
+ resource.setrlimit(resource.RLIMIT_NOFILE, (soft, hard))
98
98
  except Exception:
99
- print_highlighted('Failed to set max open files / ulimit, giving up!', style='error')
100
- soft,hard = resource.getrlimit(resource.RLIMIT_NOFILE)
99
+ print_highlighted('Failed to set max open files / ulimit, giving up!', style='error')
100
+ soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
101
101
 
102
102
  return (soft, hard)
@@ -6,7 +6,6 @@ from typing import List, Optional
6
6
  from pypdf.generic import IndirectObject, PdfObject
7
7
 
8
8
  from pdfalyzer.pdf_object_relationship import PdfObjectRelationship
9
- from pdfalyzer.util.adobe_strings import *
10
9
 
11
10
 
12
11
  def pdf_object_id(pdf_object) -> Optional[int]:
@@ -20,11 +20,11 @@ def print_highlighted(msg: Union[str, Text], **kwargs) -> None:
20
20
 
21
21
 
22
22
  def quoted_text(
23
- _string: str,
24
- style: str = '',
25
- quote_char_style: str = 'white',
26
- quote_char: str = "'"
27
- ) -> Text:
23
+ _string: str,
24
+ style: str = '',
25
+ quote_char_style: str = 'white',
26
+ quote_char: str = "'"
27
+ ) -> Text:
28
28
  """Wrap _string in 'quote_char'. Style 'quote_char' with 'quote_char_style'."""
29
29
  quote_char_txt = Text(quote_char, style=quote_char_style)
30
30
  txt = quote_char_txt + Text(_string, style=style) + quote_char_txt
@@ -8,6 +8,7 @@ from yaralyzer.helpers.bytes_helper import print_bytes
8
8
  from yaralyzer.output.rich_console import console
9
9
  from yaralyzer.util.logging import log
10
10
 
11
+ # from pdfalyzer.font_info import FontInfo # Causes circular import
11
12
  from pdfalyzer.helpers.rich_text_helper import quoted_text
12
13
  from pdfalyzer.helpers.string_helper import pp
13
14
  from pdfalyzer.output.layout import print_headline_panel, subheading_width
@@ -17,7 +18,7 @@ CHARMAP_TITLE_PADDING = (1, 0, 0, 2)
17
18
  CHARMAP_PADDING = (0, 2, 0, 10)
18
19
 
19
20
 
20
- def print_character_mapping(font: 'FontInfo') -> None:
21
+ def print_character_mapping(font: 'FontInfo') -> None: # noqa: F821
21
22
  """Prints the character mapping extracted by PyPDF._charmap in tidy columns"""
22
23
  if font.character_mapping is None or len(font.character_mapping) == 0:
23
24
  log.info(f"No character map found in {font}")
@@ -37,7 +38,7 @@ def print_character_mapping(font: 'FontInfo') -> None:
37
38
  console.line()
38
39
 
39
40
 
40
- def print_prepared_charmap(font: 'FontInfo'):
41
+ def print_prepared_charmap(font: 'FontInfo'): # noqa: F821
41
42
  """Prints the prepared_charmap returned by PyPDF."""
42
43
  if font.prepared_char_map is None:
43
44
  log.info(f"No prepared_charmap found in {font}")
@@ -27,7 +27,9 @@ from pdfalyzer.output.tables.decoding_stats_table import build_decoding_stats_ta
27
27
  from pdfalyzer.output.tables.pdf_node_rich_table import generate_rich_tree, get_symlink_representation
28
28
  from pdfalyzer.output.tables.stream_objects_table import stream_objects_table
29
29
  from pdfalyzer.pdfalyzer import Pdfalyzer
30
- from pdfalyzer.util.adobe_strings import *
30
+ # from pdfalyzer.util.adobe_strings import *
31
+
32
+ INTERNAL_YARA_ERROR_MSG = "Internal YARA error! YARA's error codes can be checked here: https://github.com/VirusTotal/yara/blob/master/libyara/include/yara/error.h" # noqa: E501
31
33
 
32
34
 
33
35
  class PdfalyzerPresenter:
@@ -130,9 +132,9 @@ class PdfalyzerPresenter:
130
132
 
131
133
  try:
132
134
  self.yaralyzer.yaralyze()
133
- except yara.Error as e:
135
+ except yara.Error:
134
136
  console.print_exception()
135
- print_fatal_error_panel("Internal YARA error! YARA's error codes can be checked here: https://github.com/VirusTotal/yara/blob/master/libyara/include/yara/error.h")
137
+ print_fatal_error_panel(INTERNAL_YARA_ERROR_MSG)
136
138
  return
137
139
 
138
140
  YaralyzerConfig.args.standalone_mode = False
@@ -7,6 +7,7 @@ from rich.table import Table
7
7
  from rich.text import Text
8
8
  from yaralyzer.helpers.rich_text_helper import CENTER, na_txt, prefix_with_plain_text_obj
9
9
 
10
+ from pdfalyzer.binary.binary_scanner import BinaryScanner
10
11
  from pdfalyzer.helpers.rich_text_helper import pct_txt
11
12
  from pdfalyzer.output.layout import generate_subtable, half_width, pad_header
12
13
 
@@ -17,7 +18,7 @@ REGEX_SUBTABLE_COLS = ['Metric', 'Value']
17
18
  DECODES_SUBTABLE_COLS = ['Encoding', '#', 'Decoded', '#', 'Forced', '#', 'Failed']
18
19
 
19
20
 
20
- def build_decoding_stats_table(scanner: 'BinaryScanner') -> Table:
21
+ def build_decoding_stats_table(scanner: BinaryScanner) -> Table:
21
22
  """Diplay aggregate results on the decoding attempts we made on subsets of scanner.bytes"""
22
23
  stats_table = _new_decoding_stats_table(scanner.label.plain if scanner.label else '')
23
24
  regexes_not_found_in_stream = []
@@ -14,12 +14,12 @@ INCOMPARABLE_PROPS = ['from_obj', 'to_obj']
14
14
 
15
15
  class PdfObjectRelationship:
16
16
  def __init__(
17
- self,
18
- from_node: 'PdfTreeNode',
19
- to_obj: IndirectObject,
20
- reference_key: str,
21
- address: str
22
- ) -> None:
17
+ self,
18
+ from_node: 'PdfTreeNode',
19
+ to_obj: IndirectObject,
20
+ reference_key: str,
21
+ address: str
22
+ ) -> None:
23
23
  """
24
24
  In the case of easy key/value pairs the reference_key and the address are the same but
25
25
  for more complicated references the address will be the reference_key plus sub references.
@@ -53,12 +53,12 @@ class PdfObjectRelationship:
53
53
 
54
54
  @classmethod
55
55
  def build_node_references(
56
- cls,
57
- from_node: 'PdfTreeObject',
58
- from_obj: Optional[PdfObject] = None,
59
- ref_key: Optional[Union[str, int]] = None,
60
- address: Optional[str] = None
61
- ) -> List['PdfObjectRelationship']:
56
+ cls,
57
+ from_node: 'PdfTreeObject',
58
+ from_obj: Optional[PdfObject] = None,
59
+ ref_key: Optional[Union[str, int]] = None,
60
+ address: Optional[str] = None
61
+ ) -> List['PdfObjectRelationship']:
62
62
  """
63
63
  Builds list of relationships 'from_node.obj' contains referencing other PDF objects.
64
64
  Initially called with single arg from_node. Other args are employed when recursable
@@ -77,7 +77,7 @@ class Pdfalyzer:
77
77
  nodes_to_walk_next = [self._add_relationship_to_pdf_tree(r) for r in node.references_to_other_nodes()]
78
78
  node.all_references_processed = True
79
79
 
80
- for next_node in [n for n in nodes_to_walk_next if not (n is None or n.all_references_processed) ]:
80
+ for next_node in [n for n in nodes_to_walk_next if not (n is None or n.all_references_processed)]:
81
81
  if not next_node.all_references_processed:
82
82
  self.walk_node(next_node)
83
83
 
@@ -105,7 +105,7 @@ class Pdfalyzer:
105
105
 
106
106
  def stream_nodes(self) -> List[PdfTreeNode]:
107
107
  """List of actual nodes (not SymlinkNodes) containing streams sorted by PDF object ID"""
108
- stream_filter = lambda node: node.contains_stream() and not isinstance(node, SymlinkNode)
108
+ stream_filter = lambda node: node.contains_stream() and not isinstance(node, SymlinkNode) # noqa: E731
109
109
  return sorted(findall(self.pdf_tree, stream_filter), key=lambda r: r.idnum)
110
110
 
111
111
  def _add_relationship_to_pdf_tree(self, relationship: PdfObjectRelationship) -> Optional[PdfTreeNode]:
@@ -114,7 +114,7 @@ class Pdfalyzer:
114
114
  placed in the PDF node processing queue.
115
115
  """
116
116
  log.info(f'Assessing relationship {relationship}...')
117
- was_seen_before = (relationship.to_obj.idnum in self.nodes_encountered) # Must come before _build_or_find()
117
+ was_seen_before = (relationship.to_obj.idnum in self.nodes_encountered) # Must come before _build_or_find()
118
118
  from_node = relationship.from_node
119
119
  to_node = self._build_or_find_node(relationship.to_obj, relationship.address)
120
120
  self.max_generation = max([self.max_generation, relationship.to_obj.generation or 0])
@@ -133,7 +133,7 @@ class Pdfalyzer:
133
133
  from_node.set_parent(to_node)
134
134
  elif to_node.parent is not None:
135
135
  # Some StructElem nodes I have seen use /P or /K despire not being the real parent/child
136
- if relationship.from_node.type.startswith(STRUCT_ELEM):# reference_key != relationship.address:
136
+ if relationship.from_node.type.startswith(STRUCT_ELEM):
137
137
  log.info(f"{relationship} fail: {to_node} parent is already {to_node.parent}")
138
138
  else:
139
139
  log.warning(f"{relationship} fail: {to_node} parent is already {to_node.parent}")
@@ -173,7 +173,6 @@ class Pdfalyzer:
173
173
 
174
174
  def _resolve_indeterminate_nodes(self) -> None:
175
175
  """Place all indeterminate nodes in the tree."""
176
- #set_log_level('INFO')
177
176
  indeterminate_nodes = [self.nodes_encountered[idnum] for idnum in self.indeterminate_ids]
178
177
  indeterminate_nodes_string = "\n ".join([f"{node}" for node in indeterminate_nodes])
179
178
  log.info(f"Resolving {len(indeterminate_nodes)} indeterminate nodes: {indeterminate_nodes_string}")
@@ -116,20 +116,20 @@ NON_TREE_REFERENCES = [
116
116
 
117
117
  # Some PdfObjects can't be properly placed in the tree until the entire tree is parsed
118
118
  INDETERMINATE_REF_KEYS = [
119
- ANNOTS, # At least when it appears in a page
119
+ ANNOTS, # At least when it appears in a page
120
120
  COLOR_SPACE,
121
121
  D,
122
122
  DEST,
123
123
  EXT_G_STATE,
124
- FIELDS, # At least for /AcroForm
124
+ FIELDS, # At least for /AcroForm
125
125
  FIRST,
126
126
  FONT,
127
127
  NAMES,
128
128
  OPEN_ACTION,
129
- P, # At least for widgets...
129
+ P, # At least for widgets...
130
130
  RESOURCES,
131
131
  XOBJECT,
132
- UNLABELED, # TODO: this might be wrong? maybe this is where the /Resources actually live?
132
+ UNLABELED, # TODO: this might be wrong? maybe this is where the /Resources actually live?
133
133
  ]
134
134
 
135
135
  INDETERMINATE_PREFIXES = [p for p in INDETERMINATE_REF_KEYS if len(p) > 2]
@@ -92,9 +92,9 @@ select.add_argument('-c', '--counts', action='store_true',
92
92
  help='show counts of some of the properties of the objects in the PDF')
93
93
 
94
94
  select.add_argument('-s', '--streams',
95
- help="scan all the PDF's decoded/decrypted streams for sus content as well as any YARA rule matches. " + \
96
- "brute force is involved; output is verbose. a single OBJ_ID can be optionally provided to " + \
97
- "limit the output to a single internal object. try '-s -- [OTHERARGS]' if you run into an " + \
95
+ help="scan all the PDF's decoded/decrypted streams for sus content as well as any YARA rule matches. " +
96
+ "brute force is involved; output is verbose. a single OBJ_ID can be optionally provided to " +
97
+ "limit the output to a single internal object. try '-s -- [OTHERARGS]' if you run into an " +
98
98
  "argument position related piccadilly.",
99
99
  nargs='?',
100
100
  const=ALL_STREAMS,
@@ -102,7 +102,7 @@ select.add_argument('-s', '--streams',
102
102
  type=int)
103
103
 
104
104
  select.add_argument('--extract-quoted',
105
- help="extract and force decode all bytes found between this kind of quotation marks " + \
105
+ help="extract and force decode all bytes found between this kind of quotation marks " +
106
106
  "(requires --streams. can be specified more than once)",
107
107
  choices=list(QUOTE_PATTERNS.keys()),
108
108
  dest='extract_quoteds',
@@ -147,7 +147,7 @@ def parse_arguments():
147
147
  args.output_dir = args.output_dir or getcwd()
148
148
  file_prefix = (args.file_prefix + '__') if args.file_prefix else ''
149
149
  args.file_suffix = ('_' + args.file_suffix) if args.file_suffix else ''
150
- args.output_basename = f"{file_prefix}{path.basename(args.file_to_scan_path)}"
150
+ args.output_basename = f"{file_prefix}{path.basename(args.file_to_scan_path)}"
151
151
  elif args.output_dir:
152
152
  log.warning('--output-dir provided but no export option was chosen')
153
153
 
@@ -203,8 +203,8 @@ MAX_QUALITY = 10
203
203
 
204
204
  combine_pdfs_parser = ArgumentParser(
205
205
  description="Combine multiple PDFs into one.",
206
- epilog="If all PDFs end in a number (e.g. 'xyz_1.pdf', 'xyz_2.pdf', etc. sort the files as if those were" \
207
- " page numebrs prior to merging.",
206
+ epilog="If all PDFs end in a number (e.g. 'xyz_1.pdf', 'xyz_2.pdf', etc. sort the files as if those were" +
207
+ " page numbers prior to merging.",
208
208
  formatter_class=RichHelpFormatterPlus)
209
209
 
210
210
  combine_pdfs_parser.add_argument('pdfs',
@@ -1,13 +1,35 @@
1
1
  [tool.poetry]
2
2
  name = "pdfalyzer"
3
- version = "1.16.10"
4
- description = "A PDF analysis toolkit. Scan a PDF with relevant YARA rules, visualize its inner tree-like data structure in living color (lots of colors), force decodes of suspicious font binaries, and more."
3
+ version = "1.16.12"
4
+ description = "PDF analysis tool. Scan a PDF with YARA rules, visualize its inner tree-like data structure in living color (lots of colors), force decodes of suspicious font binaries, and more."
5
5
  authors = ["Michel de Cryptadamus <michel@cryptadamus.com>"]
6
6
  license = "GPL-3.0-or-later"
7
7
  readme = "README.md"
8
+ documentation = "https://github.com/michelcrypt4d4mus/pdfalyzer"
8
9
  homepage = "https://github.com/michelcrypt4d4mus/pdfalyzer"
9
10
  repository = "https://github.com/michelcrypt4d4mus/pdfalyzer"
10
- documentation = "https://github.com/michelcrypt4d4mus/pdfalyzer"
11
+
12
+ classifiers = [
13
+ "Development Status :: 5 - Production/Stable",
14
+ "Environment :: Console",
15
+ "Intended Audience :: Information Technology",
16
+ "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
17
+ "Programming Language :: Python",
18
+ "Programming Language :: Python :: 3.9",
19
+ "Programming Language :: Python :: 3.10",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Programming Language :: Python :: 3.13",
23
+ "Topic :: Artistic Software",
24
+ "Topic :: Security",
25
+ "Topic :: Scientific/Engineering :: Visualization",
26
+ ]
27
+
28
+ include = [
29
+ "CHANGELOG.md",
30
+ "LICENSE",
31
+ ".pdfalyzer.example"
32
+ ]
11
33
 
12
34
  keywords = [
13
35
  "ascii art",
@@ -25,65 +47,63 @@ keywords = [
25
47
  "pdf",
26
48
  "pdfs",
27
49
  "pdf analysis",
50
+ "pypdf",
28
51
  "threat assessment",
52
+ "threat hunting",
53
+ "threat intelligence",
54
+ "threat research",
55
+ "threatintel",
29
56
  "visualization",
30
57
  "yara"
31
58
  ]
32
59
 
33
- classifiers = [
34
- "Development Status :: 5 - Production/Stable",
35
- "Environment :: Console",
36
- "Intended Audience :: Information Technology",
37
- "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
38
- "Programming Language :: Python",
39
- "Topic :: Artistic Software",
40
- "Topic :: Security",
41
- "Topic :: Scientific/Engineering :: Visualization",
42
- ]
43
-
44
- include = [
45
- "CHANGELOG.md",
46
- "LICENSE",
47
- ".pdfalyzer.example"
48
- ]
49
-
50
60
  packages = [
51
61
  { include = "pdfalyzer" }
52
62
  ]
53
63
 
54
64
 
55
- # Dependencies
65
+ #####################
66
+ # Dependencies #
67
+ #####################
56
68
  [tool.poetry.dependencies]
57
- python = "^3.9.2"
69
+ python = "^3.9,>=3.9.2"
58
70
  anytree = "~=2.13"
59
- pypdf = "^5.9.0"
71
+ pypdf = "^6.0.0"
60
72
  yaralyzer = "^1.0.4"
61
73
 
62
- # Dev dependencies
63
74
  [tool.poetry.group.dev.dependencies]
75
+ flake8 = "^7.3.0"
64
76
  pytest = "^7.1.2"
65
77
  pytest-skip-slow = "^0.0.3"
66
78
 
67
79
 
68
- # Scripts
80
+ #############
81
+ # Scripts #
82
+ #############
69
83
  [tool.poetry.scripts]
70
84
  combine_pdfs = 'pdfalyzer:combine_pdfs'
71
85
  pdfalyze = 'pdfalyzer:pdfalyze'
72
86
  pdfalyzer_show_color_theme = 'pdfalyzer:pdfalyzer_show_color_theme'
73
87
 
74
88
 
75
- # URLs for PyPi page
89
+ #####################
90
+ # PyPi URLs #
91
+ #####################
76
92
  [tool.poetry.urls]
77
93
  Changelog = "https://github.com/michelcrypt4d4mus/pdfalyzer/blob/master/CHANGELOG.md"
78
94
 
79
95
 
80
- # Poetry build system
96
+ ###############################
97
+ # Poetry build system #
98
+ ###############################
81
99
  [build-system]
82
- requires = ["poetry-core>=1.0.0"]
83
100
  build-backend = "poetry.core.masonry.api"
101
+ requires = ["poetry-core>=1.0.0"]
84
102
 
85
103
 
86
- # Pytest configuration
104
+ ##################
105
+ # pytest #
106
+ ##################
87
107
  [tool.pytest.ini_options]
88
108
  addopts = [
89
109
  "--import-mode=importlib",
File without changes
@@ -1,7 +1,7 @@
1
+
1
2
  import logging
2
3
 
3
4
 
4
5
  # Starting pdb.set_trace() this way kind of sucks because yr locals are messed up
5
6
  def debugger():
6
7
  import pdb; pdb.set_trace(locals())
7
-