PyPI - pdfalyzer - Versions diffs - 1.17.1__py3-none-any.whl → 1.17.3__py3-none-any.whl - Mend

pdfalyzer 1.17.1py3-none-any.whl → 1.17.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pdfalyzer might be problematic. Click here for more details.

Files changed (11) hide show

CHANGELOG.md CHANGED Viewed

@@ -1,7 +1,14 @@
 # NEXT RELEASE
+### 1.17.3
+* Put back `--debug` arg for CLI tools
+### 1.17.2
+* Remove unused `--debug` args for CLI tools
+* Rename `extract_text_from_pdfs` to `extract_pdf_text`
 ### 1.17.1
-* Fix issue where `combine_pdfs` page ranges were indexed from 0 instead of 1
+* Fix issue where `extract_pdf_pages` page ranges were indexed from 0 instead of 1
 # 1.17.0
 * Add `extract_pdf_pages` command line tool (imported from `clown_sort`)

pdfalyzer/__init__.py CHANGED Viewed

@@ -32,8 +32,8 @@ from pdfalyzer.output.pdfalyzer_presenter import PdfalyzerPresenter
 from pdfalyzer.output.styles.rich_theme import PDFALYZER_THEME_DICT
 from pdfalyzer.pdfalyzer import Pdfalyzer
 from pdfalyzer.util.argument_parser import ask_to_proceed, output_sections, parse_arguments
-from pdfalyzer.util.cli_tools_argument_parser import (MAX_QUALITY, ask_to_proceed,
-     parse_combine_pdfs_args, parse_pdf_page_extraction_args, parse_text_extraction_args)
+from pdfalyzer.util.cli_tools_argument_parser import (MAX_QUALITY, parse_combine_pdfs_args,
+     parse_pdf_page_extraction_args, parse_text_extraction_args)
 from pdfalyzer.util.pdf_parser_manager import PdfParserManager
 # For the table shown by running pdfalyzer_show_color_theme
@@ -141,7 +141,7 @@ def extract_pdf_pages() -> None:
     PdfFile(args.pdf_file).extract_page_range(args.page_range, destination_dir=args.destination_dir)
-def extract_text_from_pdfs() -> None:
+def extract_pdf_text() -> None:
     """Extract text from a list of file or from all PDF files in a list of directories."""
     args: Namespace = parse_text_extraction_args()
     console.line()

pdfalyzer/decorators/pdf_file.py CHANGED Viewed

@@ -30,9 +30,9 @@ class PdfFile:
     Attributes:
         file_path (Path): The path to the PDF file.
-        dirname (Path): The directory containing the PDF file.
         basename (str): The base name of the PDF file (with extension).
         basename_without_ext (str): The base name of the PDF file (without extension).
+        dirname (Path): The directory containing the PDF file.
         extname (str): The file extension of the PDF file.
         file_size (int): The size of the file in bytes.
     """
@@ -74,11 +74,15 @@ class PdfFile:
         """
         destination_dir = Path(destination_dir or self.dirname)
         create_dir_if_it_does_not_exist(destination_dir)
+        pdf_reader = PdfReader(self.file_path)
+        page_count = len(pdf_reader.pages)
+        file_suffix = page_range.file_suffix()
+        if page_count < (page_range.last_page - 1):
+            raise ValueError(f"PDF only has {page_count} pages but you asked for pages {page_range}!")
-        if extra_file_suffix is None:
-            file_suffix = page_range.file_suffix()
-        else:
-            file_suffix = f"{page_range.file_suffix()}__{extra_file_suffix}"
+        if extra_file_suffix is not None:
+            file_suffix += f"__{extra_file_suffix}"
         extracted_pages_pdf_basename = insert_suffix_before_extension(self.file_path, file_suffix).name
         extracted_pages_pdf_path = destination_dir.joinpath(extracted_pages_pdf_basename)

pdfalyzer/helpers/filesystem_helper.py CHANGED Viewed

@@ -6,7 +6,6 @@ from pathlib import Path
 from typing import Optional, Union
 from yaralyzer.output.rich_console import console
-from yaralyzer.util.logging import log
 from pdfalyzer.helpers.rich_text_helper import print_highlighted

pdfalyzer/util/argument_parser.py CHANGED Viewed

@@ -196,7 +196,7 @@ def output_sections(args: Namespace, pdfalyzer: 'Pdfalyzer') -> List[OutputSecti
 def all_sections_chosen(args):
-    """Returns true if all flags are set or no flags are set."""
+    """Returns True if all flags are set or no flags are set."""
     return len([s for s in ALL_SECTIONS if vars(args)[s]]) == len(ALL_SECTIONS)

pdfalyzer/util/cli_tools_argument_parser.py CHANGED Viewed

@@ -2,8 +2,10 @@
 Argument parsers for the command line tools other than `pdfalyze` that are included with The Pdfalyzer.
 1. combine_pdfs
-2.
+2. extract_pdf_pages
+3. extract_pdf_text
 """
+import logging
 import sys
 from argparse import ArgumentParser, Namespace
 from pathlib import Path
@@ -87,6 +89,7 @@ extract_pdf_parser = ArgumentParser(
 )
 extract_pdf_parser.add_argument('pdf_file', metavar='PDF_FILE', help='PDF to extract pages from')
+extract_pdf_parser.add_argument('--debug', action='store_true', help='turn on debug level logging')
 extract_pdf_parser.add_argument('--page-range', '-r',
                                 type=page_range_validator,
@@ -97,8 +100,6 @@ extract_pdf_parser.add_argument('--destination-dir', '-d',
                                 help="directory to write the new PDF to",
                                 default=Path.cwd())
-extract_pdf_parser.add_argument('--debug', action='store_true', help='turn on debug level logging')
 def parse_pdf_page_extraction_args() -> Namespace:
     args = extract_pdf_parser.parse_args()
@@ -110,12 +111,13 @@ def parse_pdf_page_extraction_args() -> Namespace:
         log.error(f"Destination dir '{args.destination_dir}' does not exist.")
         sys.exit(1)
+    _set_log_level(args)
     return args
-############################
-#  extract_text_from_pdfs  #
-############################
+######################
+#  extract_pdf_text  #
+######################
 extract_text_parser = ArgumentParser(
     formatter_class=RichHelpFormatterPlus,
     description="Extract the text from one or more files or directories.",
@@ -153,4 +155,10 @@ def parse_text_extraction_args() -> Namespace:
         log.error(f"--page-range can only be specified for a single PDF")
         sys.exit(-1)
+    _set_log_level(args)
     return args
+def _set_log_level(args: Namespace):
+    if args.debug:
+        log.setLevel(logging.DEBUG)

{pdfalyzer-1.17.1.dist-info → pdfalyzer-1.17.3.dist-info}/METADATA RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.1
 Name: pdfalyzer
-Version: 1.17.1
-Summary: PDF analysis tool. Scan a PDF with YARA rules, visualize its inner tree-like data structure in living color (lots of colors), force decodes of suspicious font binaries, and more.
+Version: 1.17.3
+Summary: Analyze PDFs with colors (and YARA). Visualize a PDF's inner tree-like data structure, check it against a library of YARA rules, force decodes of suspicious font binaries, and more.
 Home-page: https://github.com/michelcrypt4d4mus/pdfalyzer
 License: GPL-3.0-or-later
 Keywords: ascii art,binary,color,cybersecurity,DFIR,encoding,font,infosec,maldoc,malicious pdf,malware,malware analysis,pdf,pdfs,pdf analysis,pypdf,threat assessment,threat hunting,threat intelligence,threat research,threatintel,visualization,yara
@@ -252,9 +252,9 @@ The Pdfalyzer comes with a few command line tools:
 * `combine_pdfs` - Combines multiple PDFs into a single PDF. Run `combine_pdfs --help` for more info.
 * `extract_pdf_pages` - Extracts page ranges (e.g. "10-25") from a PDF and writes them to a new PDF. Run `extract_pdf_pages --help` for more info.
-* `extract_text_from_pdfs` - Extracts text from a PDF, including applying OCR to all embedded images. Run `extract_text_from_pdfs --help` for more info.
+* `extract_pdf_text` - Extracts text from a PDF, including applying OCR to all embedded images. Run `extract_pdf_text --help` for more info.
-Running `extract_text_from_pdfs` requires that you install The Pdfalyzer's optional dependencies:
+Running `extract_pdf_text` requires that you install The Pdfalyzer's optional dependencies:
 ```bash
 pipx install pdfalyzer[extract]

{pdfalyzer-1.17.1.dist-info → pdfalyzer-1.17.3.dist-info}/RECORD RENAMED Viewed

@@ -1,13 +1,13 @@
 .pdfalyzer.example,sha256=sh_qkUBw4hfJia_Dx2wB-fsqJInhx2sSgA7WJz3MHYo,3917
-CHANGELOG.md,sha256=KtprK6EZ8FhdPWHs9E-YzGSqHxV_w0GnShvIJ6kMPss,13132
+CHANGELOG.md,sha256=ZFP4uDoiYT-kNa7XJuyNKhIjcvY5DU4CeMSGn0braPU,13301
 LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
-pdfalyzer/__init__.py,sha256=TgCkfaaWuxv3sNMHcMZjh5lAw0oPNYKqJYRXVy9hPKo,6181
+pdfalyzer/__init__.py,sha256=2Gikt_-OSXZqeQij4wSwb65g7jycVAupjeFmXBf51lo,6159
 pdfalyzer/__main__.py,sha256=Ko_AoAyYMLIe_cmhiUSl6twheLZrGyT8aOSJ2CP7EZY,43
 pdfalyzer/binary/binary_scanner.py,sha256=gEHHdQ3lKe1P1441fk2OLO8GxJF7FxF8Tq04ElB8ilU,10748
 pdfalyzer/config.py,sha256=RBtp3Q6IvOW922rcanB4mVceJEM0BEjNybc6_vC7efY,2122
 pdfalyzer/decorators/document_model_printer.py,sha256=2tjJItZltukmOD2wjGvl2IsBM7Gug59wMHGLpzGDbV0,2682
 pdfalyzer/decorators/indeterminate_node.py,sha256=QLJr-nGKih8gPZcIqxLU028OwWWD5VjNHYMUjniwT_k,6586
-pdfalyzer/decorators/pdf_file.py,sha256=CHXyM8RIvnjKnsDOJxUhk-sfRzLLW50MJpKKTax6Eqk,10274
+pdfalyzer/decorators/pdf_file.py,sha256=Az3TL_Ttj_pDOHoHsiwpNlrCckCgKTp0VuGevJIi_5c,10481
 pdfalyzer/decorators/pdf_object_properties.py,sha256=Il3RObxQ4XUf0Ei-nd4tjJO0LeaxC6u7yFa3cQs_jVY,5485
 pdfalyzer/decorators/pdf_tree_node.py,sha256=4LReGJUtG8iEcLUQD1jW-yp3xPWsHrC-3Anbkt7XZ3A,11134
 pdfalyzer/decorators/pdf_tree_verifier.py,sha256=2hVe9APsAWQZ7ra8AGndHQnGWmmxmb3ZwfJHZuLvsvc,4714
@@ -17,7 +17,7 @@ pdfalyzer/detection/javascript_hunter.py,sha256=_wT2vkKTMlm_RGCjYsmwcmV-ag1qep3E
 pdfalyzer/detection/yaralyzer_helper.py,sha256=bfIa18f0zdUoOAJIQcwVDjF52sJNV47NdsDasg01uiQ,2194
 pdfalyzer/font_info.py,sha256=2R85iETY_1eKCeRrkqeIxfPDqXZyWfCNcHx_-aTyF0s,6682
 pdfalyzer/helpers/dict_helper.py,sha256=2TP0_EJBouaWD6jfnAekrEZ4M5eHKL8Tm61FgXZtBAg,303
-pdfalyzer/helpers/filesystem_helper.py,sha256=onXhSMhxo0YkvdKdosRwUo_RGdW6yNzZF5hfjgZ3GBE,5085
+pdfalyzer/helpers/filesystem_helper.py,sha256=QCdUcZlufBWaV38LlrQEGUGOGUtZEozMVSRkoTjwKlU,5046
 pdfalyzer/helpers/image_helper.py,sha256=QjoAUcKKEtpmuEyOmEfmaUN-lzNykQ1SzqgNn9-R4Y0,1120
 pdfalyzer/helpers/number_helper.py,sha256=8IlRmaOVLJsUV18VLvWRZU8SzRxL0XZjrY3sjmk2Ro4,292
 pdfalyzer/helpers/pdf_object_helper.py,sha256=65BlUgnDM9brxJFw_WF8QLomWHaNh-pj88NWoxcMkoQ,1160
@@ -35,8 +35,8 @@ pdfalyzer/output/tables/stream_objects_table.py,sha256=PgQj8oTtW5_X8SMQb3FvCWDS-
 pdfalyzer/pdf_object_relationship.py,sha256=tOJTp73m82oNZJB7NxvTLv167kqthH8PRbVnev_4uEk,5291
 pdfalyzer/pdfalyzer.py,sha256=T5U8MZRlL0Kn-GJVqfIIoL7eBUdQeteu50VhQ-IoYh0,11214
 pdfalyzer/util/adobe_strings.py,sha256=eF4K1RhhR_qgMBT58MzCxWkqQj17OWLCNmG3SjZ9BUs,5045
-pdfalyzer/util/argument_parser.py,sha256=OdvGCowGnVNyulqC5968myCxY4gRu6--WmCIdkiXoWA,9732
-pdfalyzer/util/cli_tools_argument_parser.py,sha256=EE-lk1ZMv3JlZlZ9N3rAndIlYl1__C0iYG0Ti6MEHjM,6107
+pdfalyzer/util/argument_parser.py,sha256=9ixQMEZz00IPK8xRxuS7DMrQgXIrqhB2ve5W8XTZ1S8,9732
+pdfalyzer/util/cli_tools_argument_parser.py,sha256=RqK_5AWC7qm9Zy7pvDb-J1WSEGBkIyxzNDcFJwSmuX4,6285
 pdfalyzer/util/debugging.py,sha256=hjYGxptJmal9TTaAUkkoo0oNu2tdx6ZYSyC0WjvzHh0,156
 pdfalyzer/util/exceptions.py,sha256=XLFFTdx1n6i_VCmvuzvIOCa-djJvGEitfo9lhy3zq0k,98
 pdfalyzer/util/page_range.py,sha256=NMNh3_TojxTxBIpvUYK1AmvID_m8qOP6AihZrLWZF2I,1652
@@ -47,8 +47,8 @@ pdfalyzer/yara_rules/__init.py__,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
 pdfalyzer/yara_rules/didier_stevens.yara,sha256=4XhqafU09xzYUP7LCygHHBXOpAXUblJf6Tkn37MUy0w,7253
 pdfalyzer/yara_rules/lprat.static_file_analysis.yara,sha256=i0CwRH8pBx_QshKFTQtr1CP5n378EZelsF2FxMY2y5A,21859
 pdfalyzer/yara_rules/pdf_malware.yara,sha256=jDqSTP5BQSi2I_1xZiFZdy68I4oVWDat2j08-qdfbto,91063
-pdfalyzer-1.17.1.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
-pdfalyzer-1.17.1.dist-info/METADATA,sha256=nla_K-pZ8XoknqbcCqi90EPydVJ7STe6DDBfOOf_Dso,27309
-pdfalyzer-1.17.1.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
-pdfalyzer-1.17.1.dist-info/entry_points.txt,sha256=goHVADdqEFcniu4O0k7kabc2rLf3wvRrENJK6c9IkUw,249
-pdfalyzer-1.17.1.dist-info/RECORD,,
+pdfalyzer-1.17.3.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
+pdfalyzer-1.17.3.dist-info/METADATA,sha256=MczhorkJI7ozznrHf72k7a0QELDinDNHhex4ur8kSr8,27294
+pdfalyzer-1.17.3.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
+pdfalyzer-1.17.3.dist-info/entry_points.txt,sha256=5ve7Ydx0p33ZuQWol6wIFAPPUgQrEQoJkOy06mD5t9Y,237
+pdfalyzer-1.17.3.dist-info/RECORD,,

{pdfalyzer-1.17.1.dist-info → pdfalyzer-1.17.3.dist-info}/entry_points.txt RENAMED Viewed

@@ -1,7 +1,7 @@
 [console_scripts]
 combine_pdfs=pdfalyzer:combine_pdfs
 extract_pdf_pages=pdfalyzer:extract_pdf_pages
-extract_text_from_pdfs=pdfalyzer:extract_text_from_pdfs
+extract_pdf_text=pdfalyzer:extract_pdf_text
 pdfalyze=pdfalyzer:pdfalyze
 pdfalyzer_show_color_theme=pdfalyzer:pdfalyzer_show_color_theme

{pdfalyzer-1.17.1.dist-info → pdfalyzer-1.17.3.dist-info}/LICENSE RENAMED Viewed

File without changes

{pdfalyzer-1.17.1.dist-info → pdfalyzer-1.17.3.dist-info}/WHEEL RENAMED Viewed

File without changes

pdfalyzer 1.17.1__py3-none-any.whl → 1.17.3__py3-none-any.whl

Potentially problematic release.

pdfalyzer 1.17.1py3-none-any.whl → 1.17.3py3-none-any.whl