pdfalyzer 1.15.0__tar.gz → 1.15.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pdfalyzer might be problematic. Click here for more details.

Files changed (45) hide show
  1. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/CHANGELOG.md +3 -0
  2. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/PKG-INFO +1 -1
  3. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/__init__.py +1 -0
  4. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/detection/yaralyzer_helper.py +7 -2
  5. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/output/pdfalyzer_presenter.py +1 -1
  6. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/util/argument_parser.py +18 -9
  7. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pyproject.toml +1 -1
  8. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/LICENSE +0 -0
  9. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/README.md +0 -0
  10. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/__main__.py +0 -0
  11. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/binary/binary_scanner.py +0 -0
  12. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/config.py +0 -0
  13. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/decorators/document_model_printer.py +0 -0
  14. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/decorators/indeterminate_node.py +0 -0
  15. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/decorators/pdf_object_properties.py +0 -0
  16. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/decorators/pdf_tree_node.py +0 -0
  17. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/decorators/pdf_tree_verifier.py +0 -0
  18. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/detection/constants/binary_regexes.py +0 -0
  19. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/detection/constants/javascript_reserved_keywords.py +0 -0
  20. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/detection/javascript_hunter.py +0 -0
  21. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/font_info.py +0 -0
  22. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/helpers/dict_helper.py +0 -0
  23. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/helpers/filesystem_helper.py +0 -0
  24. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/helpers/number_helper.py +0 -0
  25. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/helpers/pdf_object_helper.py +0 -0
  26. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/helpers/rich_text_helper.py +0 -0
  27. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/helpers/string_helper.py +0 -0
  28. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/output/character_mapping.py +0 -0
  29. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/output/layout.py +0 -0
  30. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/output/styles/node_colors.py +0 -0
  31. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/output/styles/rich_theme.py +0 -0
  32. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/output/tables/decoding_stats_table.py +0 -0
  33. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/output/tables/font_summary_table.py +0 -0
  34. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/output/tables/pdf_node_rich_table.py +0 -0
  35. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/output/tables/stream_objects_table.py +0 -0
  36. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/pdf_object_relationship.py +0 -0
  37. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/pdfalyzer.py +0 -0
  38. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/util/adobe_strings.py +0 -0
  39. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/util/debugging.py +0 -0
  40. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/util/exceptions.py +0 -0
  41. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/util/pdf_parser_manager.py +0 -0
  42. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/yara_rules/PDF.yara +0 -0
  43. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/yara_rules/PDF_binary_stream.yara +0 -0
  44. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/yara_rules/__init.py__ +0 -0
  45. {pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/yara_rules/lprat.static_file_analysis.yara +0 -0
@@ -1,5 +1,8 @@
1
1
  # NEXT RELEASE
2
2
 
3
+ ### 1.15.1
4
+ * Add `--no-default-yara-rules` command line option so users can use _only_ their own custom YARA rules files if they want. Previously you could only use custom YARA rules _in addition to_ the default rules; now you can just skip the default rules.
5
+
3
6
  # 1.15.0
4
7
  * Add `combine_pdfs` command line script to merge a bunch of PDFs into one
5
8
  * Remove unused `Deprecated` dependency
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pdfalyzer
3
- Version: 1.15.0
3
+ Version: 1.15.1
4
4
  Summary: A PDF analysis toolkit. Scan a PDF with relevant YARA rules, visualize its inner tree-like data structure in living color (lots of colors), force decodes of suspicious font binaries, and more.
5
5
  Home-page: https://github.com/michelcrypt4d4mus/pdfalyzer
6
6
  License: GPL-3.0-or-later
@@ -51,6 +51,7 @@ def pdfalyze():
51
51
  log_and_print(f"Binary stream extraction complete, files written to '{args.output_dir}'.\nExiting.\n")
52
52
  sys.exit()
53
53
 
54
+ # The method that gets called is related to the argument name. See 'possible_output_sections' list in argument_parser.py
54
55
  # Analysis exports wrap themselves around the methods that actually generate the analyses
55
56
  for (arg, method) in output_sections(args, pdfalyzer):
56
57
  if args.output_dir:
@@ -8,6 +8,8 @@ from typing import Optional, Union
8
8
  from yaralyzer.config import YaralyzerConfig
9
9
  from yaralyzer.yaralyzer import Yaralyzer
10
10
 
11
+ from pdfalyzer.config import PdfalyzerConfig
12
+
11
13
  YARA_RULES_DIR = files('pdfalyzer').joinpath('yara_rules')
12
14
 
13
15
  YARA_RULES_FILES = [
@@ -32,8 +34,11 @@ def _build_yaralyzer(scannable: Union[bytes, str], label: Optional[str] = None)
32
34
  with as_file(YARA_RULES_DIR.joinpath(YARA_RULES_FILES[0])) as yara0:
33
35
  with as_file(YARA_RULES_DIR.joinpath(YARA_RULES_FILES[1])) as yara1:
34
36
  with as_file(YARA_RULES_DIR.joinpath(YARA_RULES_FILES[2])) as yara2:
35
- rules_paths = [str(y) for y in [yara0, yara1, yara2]]
36
- rules_paths += YaralyzerConfig.args.yara_rules_files or []
37
+ # If there is a custom yara_rules argument file use that instead of the files in the yara_rules/ dir
38
+ rules_paths = YaralyzerConfig.args.yara_rules_files or []
39
+
40
+ if not YaralyzerConfig.args.no_default_yara_rules:
41
+ rules_paths += [str(y) for y in [yara0, yara1, yara2]]
37
42
 
38
43
  try:
39
44
  return Yaralyzer.for_rules_files(rules_paths, scannable, label)
@@ -124,7 +124,7 @@ class PdfalyzerPresenter:
124
124
  console.print(build_decoding_stats_table(binary_scanner), justify='center')
125
125
 
126
126
  def print_yara_results(self) -> None:
127
- """Scan the overall PDF and each individual binary stream in it with yara_rules/ files"""
127
+ """Scan the main PDF and each individual binary stream in it with yara_rules/*.yara files"""
128
128
  print_section_header(f"YARA Scan of PDF rules for '{self.pdfalyzer.pdf_basename}'")
129
129
  YaralyzerConfig.args.standalone_mode = True # TODO: using 'standalone mode' like this kind of sucks
130
130
 
@@ -9,7 +9,7 @@ from typing import List
9
9
  from rich_argparse_plus import RichHelpFormatterPlus
10
10
  from rich.prompt import Confirm
11
11
  from rich.text import Text
12
- from yaralyzer.util.argument_parser import export, parser, parse_arguments as parse_yaralyzer_args
12
+ from yaralyzer.util.argument_parser import export, parser, parse_arguments as parse_yaralyzer_args, source
13
13
  from yaralyzer.util.logging import log, log_and_print, log_argparse_result, log_current_config, log_invocation
14
14
 
15
15
  from pdfalyzer.config import ALL_STREAMS, PdfalyzerConfig
@@ -50,8 +50,13 @@ export.add_argument('-bin', '--extract-binary-streams',
50
50
  const='bin',
51
51
  help='extract all binary streams in the PDF to separate files (requires pdf-parser.py)')
52
52
 
53
+ # Add one more option to the YARA rules section
54
+ source.add_argument('--no-default-yara-rules',
55
+ action='store_true',
56
+ help='if --yara is selected use only custom rules from --yara-file arg and not the default included YARA rules')
53
57
 
54
- # Note that we extend the yaralyzer's parser and export
58
+
59
+ # Note that we extend the yaralyzer's parser and export
55
60
  parser = ArgumentParser(
56
61
  formatter_class=RichHelpFormatterPlus,
57
62
  description=DESCRIPTION,
@@ -78,7 +83,7 @@ select.add_argument('-f', '--fonts', action='store_true',
78
83
  help="show info about fonts included character mappings for embedded font binaries")
79
84
 
80
85
  select.add_argument('-y', '--yara', action='store_true',
81
- help="scan the PDF with YARA rules")
86
+ help="scan the PDF with the included malicious PDF YARA rules and/or your custom YARA rules")
82
87
 
83
88
  select.add_argument('-c', '--counts', action='store_true',
84
89
  help='show counts of some of the properties of the objects in the PDF')
@@ -127,10 +132,13 @@ def parse_arguments():
127
132
 
128
133
  if not args.streams:
129
134
  if args.extract_quoteds:
130
- raise ArgumentError(None, "--extract-quoted does nothing if --streams is not selected")
135
+ exit_with_error("--extract-quoted does nothing if --streams is not selected")
131
136
  if args.suppress_boms:
132
137
  log.warning("--suppress-boms has nothing to suppress if --streams is not selected")
133
138
 
139
+ if args.no_default_yara_rules and not args.yara_rules_files:
140
+ exit_with_error("--no-default-yara-rules requires at least one --yara-file argument")
141
+
134
142
  # File export options
135
143
  if args.export_svg or args.export_txt or args.export_html or args.extract_binary_streams:
136
144
  args.output_dir = args.output_dir or getcwd()
@@ -149,8 +157,8 @@ def parse_arguments():
149
157
 
150
158
  def output_sections(args, pdfalyzer) -> List[OutputSection]:
151
159
  """
152
- Determine which of the tree visualizations, font scans, etc were requested.
153
- If nothing was specified the default is to output all sections.
160
+ Determine which of the tree visualizations, font scans, etc should be run.
161
+ If nothing is specified output ALL sections other than --streams which is v. slow/verbose.
154
162
  """
155
163
  # Create a partial for print_font_info() because it's the only one that can take an argument
156
164
  # partials have no __name__ so update_wrapper() propagates the 'print_font_info' as this partial's name
@@ -158,7 +166,8 @@ def output_sections(args, pdfalyzer) -> List[OutputSection]:
158
166
  stream_scan = partial(pdfalyzer.print_streams_analysis, idnum=stream_id)
159
167
  update_wrapper(stream_scan, pdfalyzer.print_streams_analysis)
160
168
 
161
- # The first element string matches the argument in 'select' group.
169
+ # 1st element string matches the argument in 'select' group
170
+ # 2nd is fxn to call if selected.
162
171
  # Top to bottom is the default order of output.
163
172
  possible_output_sections = [
164
173
  OutputSection(DOCINFO, pdfalyzer.print_document_info),
@@ -246,7 +255,7 @@ def ask_to_proceed() -> None:
246
255
  def exit_with_error(error_message: str|None = None) -> None:
247
256
  """Print 'error_message' and exit with status code 1."""
248
257
  if error_message:
249
- print_highlighted(error_message, style='bold red')
258
+ print_highlighted(Text('').append('ERROR', style='bold red').append(f': {error_message}'))
250
259
 
251
- print_highlighted('Exiting...', style='red')
260
+ print_highlighted('Exiting...', style='dim red')
252
261
  sys.exit(1)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "pdfalyzer"
3
- version = "1.15.0"
3
+ version = "1.15.1"
4
4
  description = "A PDF analysis toolkit. Scan a PDF with relevant YARA rules, visualize its inner tree-like data structure in living color (lots of colors), force decodes of suspicious font binaries, and more."
5
5
  authors = ["Michel de Cryptadamus <michel@cryptadamus.com>"]
6
6
  license = "GPL-3.0-or-later"
File without changes
File without changes