PyPI - pdfalyzer - Versions diffs - 1.15.0__tar.gz → 1.15.1__tar.gz - Mend

pdfalyzer 1.15.0tar.gz → 1.15.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pdfalyzer might be problematic. Click here for more details.

Files changed (45) hide show

{pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/CHANGELOG.md RENAMED Viewed

@@ -1,5 +1,8 @@
 # NEXT RELEASE
+### 1.15.1
+* Add `--no-default-yara-rules` command line option so users can use _only_ their own custom YARA rules files if they want. Previously you could only use custom YARA rules _in addition to_ the default rules; now you can just skip the default rules.
 # 1.15.0
 * Add `combine_pdfs` command line script to merge a bunch of PDFs into one
 * Remove unused `Deprecated` dependency

{pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: pdfalyzer
-Version: 1.15.0
+Version: 1.15.1
 Summary: A PDF analysis toolkit. Scan a PDF with relevant YARA rules, visualize its inner tree-like data structure in living color (lots of colors), force decodes of suspicious font binaries, and more.
 Home-page: https://github.com/michelcrypt4d4mus/pdfalyzer
 License: GPL-3.0-or-later

{pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/__init__.py RENAMED Viewed

@@ -51,6 +51,7 @@ def pdfalyze():
         log_and_print(f"Binary stream extraction complete, files written to '{args.output_dir}'.\nExiting.\n")
         sys.exit()
+    # The method that gets called is related to the argument name. See 'possible_output_sections' list in argument_parser.py
     # Analysis exports wrap themselves around the methods that actually generate the analyses
     for (arg, method) in output_sections(args, pdfalyzer):
         if args.output_dir:

{pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/detection/yaralyzer_helper.py RENAMED Viewed

@@ -8,6 +8,8 @@ from typing import Optional, Union
 from yaralyzer.config import YaralyzerConfig
 from yaralyzer.yaralyzer import Yaralyzer
+from pdfalyzer.config import PdfalyzerConfig
 YARA_RULES_DIR = files('pdfalyzer').joinpath('yara_rules')
 YARA_RULES_FILES = [
@@ -32,8 +34,11 @@ def _build_yaralyzer(scannable: Union[bytes, str], label: Optional[str] = None)
     with as_file(YARA_RULES_DIR.joinpath(YARA_RULES_FILES[0])) as yara0:
         with as_file(YARA_RULES_DIR.joinpath(YARA_RULES_FILES[1])) as yara1:
             with as_file(YARA_RULES_DIR.joinpath(YARA_RULES_FILES[2])) as yara2:
-                rules_paths = [str(y) for y in [yara0, yara1, yara2]]
-                rules_paths += YaralyzerConfig.args.yara_rules_files or []
+                # If there is a custom yara_rules argument file use that instead of the files in the yara_rules/ dir
+                rules_paths = YaralyzerConfig.args.yara_rules_files or []
+                if not YaralyzerConfig.args.no_default_yara_rules:
+                    rules_paths += [str(y) for y in [yara0, yara1, yara2]]
                 try:
                     return Yaralyzer.for_rules_files(rules_paths, scannable, label)

{pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/output/pdfalyzer_presenter.py RENAMED Viewed

@@ -124,7 +124,7 @@ class PdfalyzerPresenter:
                 console.print(build_decoding_stats_table(binary_scanner), justify='center')
     def print_yara_results(self) -> None:
-        """Scan the overall PDF and each individual binary stream in it with yara_rules/ files"""
+        """Scan the main PDF and each individual binary stream in it with yara_rules/*.yara files"""
         print_section_header(f"YARA Scan of PDF rules for '{self.pdfalyzer.pdf_basename}'")
         YaralyzerConfig.args.standalone_mode = True  # TODO: using 'standalone mode' like this kind of sucks

{pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pdfalyzer/util/argument_parser.py RENAMED Viewed

@@ -9,7 +9,7 @@ from typing import List
 from rich_argparse_plus import RichHelpFormatterPlus
 from rich.prompt import Confirm
 from rich.text import Text
-from yaralyzer.util.argument_parser import export, parser, parse_arguments as parse_yaralyzer_args
+from yaralyzer.util.argument_parser import export, parser, parse_arguments as parse_yaralyzer_args, source
 from yaralyzer.util.logging import log, log_and_print, log_argparse_result, log_current_config, log_invocation
 from pdfalyzer.config import ALL_STREAMS, PdfalyzerConfig
@@ -50,8 +50,13 @@ export.add_argument('-bin', '--extract-binary-streams',
                     const='bin',
                     help='extract all binary streams in the PDF to separate files (requires pdf-parser.py)')
+# Add one more option to the YARA rules section
+source.add_argument('--no-default-yara-rules',
+                    action='store_true',
+                    help='if --yara is selected use only custom rules from --yara-file arg and not the default included YARA rules')
-#  Note that we extend the yaralyzer's parser and export
+# Note that we extend the yaralyzer's parser and export
 parser = ArgumentParser(
     formatter_class=RichHelpFormatterPlus,
     description=DESCRIPTION,
@@ -78,7 +83,7 @@ select.add_argument('-f', '--fonts', action='store_true',
                     help="show info about fonts included character mappings for embedded font binaries")
 select.add_argument('-y', '--yara', action='store_true',
-                    help="scan the PDF with YARA rules")
+                    help="scan the PDF with the included malicious PDF YARA rules and/or your custom YARA rules")
 select.add_argument('-c', '--counts', action='store_true',
                     help='show counts of some of the properties of the objects in the PDF')
@@ -127,10 +132,13 @@ def parse_arguments():
     if not args.streams:
         if args.extract_quoteds:
-            raise ArgumentError(None, "--extract-quoted does nothing if --streams is not selected")
+            exit_with_error("--extract-quoted does nothing if --streams is not selected")
         if args.suppress_boms:
             log.warning("--suppress-boms has nothing to suppress if --streams is not selected")
+    if args.no_default_yara_rules and not args.yara_rules_files:
+        exit_with_error("--no-default-yara-rules requires at least one --yara-file argument")
     # File export options
     if args.export_svg or args.export_txt or args.export_html or args.extract_binary_streams:
         args.output_dir = args.output_dir or getcwd()
@@ -149,8 +157,8 @@ def parse_arguments():
 def output_sections(args, pdfalyzer) -> List[OutputSection]:
     """
-    Determine which of the tree visualizations, font scans, etc were requested.
-    If nothing was specified the default is to output all sections.
+    Determine which of the tree visualizations, font scans, etc should be run.
+    If nothing is specified output ALL sections other than --streams which is v. slow/verbose.
     """
     # Create a partial for print_font_info() because it's the only one that can take an argument
     # partials have no __name__ so update_wrapper() propagates the 'print_font_info' as this partial's name
@@ -158,7 +166,8 @@ def output_sections(args, pdfalyzer) -> List[OutputSection]:
     stream_scan = partial(pdfalyzer.print_streams_analysis, idnum=stream_id)
     update_wrapper(stream_scan, pdfalyzer.print_streams_analysis)
-    # The first element string matches the argument in 'select' group.
+    # 1st element string matches the argument in 'select' group
+    # 2nd is fxn to call if selected.
     # Top to bottom is the default order of output.
     possible_output_sections = [
         OutputSection(DOCINFO, pdfalyzer.print_document_info),
@@ -246,7 +255,7 @@ def ask_to_proceed() -> None:
 def exit_with_error(error_message: str|None = None) -> None:
     """Print 'error_message' and exit with status code 1."""
     if error_message:
-        print_highlighted(error_message, style='bold red')
+        print_highlighted(Text('').append('ERROR', style='bold red').append(f': {error_message}'))
-    print_highlighted('Exiting...', style='red')
+    print_highlighted('Exiting...', style='dim red')
     sys.exit(1)

{pdfalyzer-1.15.0 → pdfalyzer-1.15.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pdfalyzer"
-version = "1.15.0"
+version = "1.15.1"
 description = "A PDF analysis toolkit. Scan a PDF with relevant YARA rules, visualize its inner tree-like data structure in living color (lots of colors), force decodes of suspicious font binaries, and more."
 authors = ["Michel de Cryptadamus <michel@cryptadamus.com>"]
 license = "GPL-3.0-or-later"