pdfalyzer 1.16.10__tar.gz → 1.16.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pdfalyzer might be problematic. Click here for more details.
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/CHANGELOG.md +8 -0
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/PKG-INFO +17 -7
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/README.md +9 -2
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/__init__.py +9 -6
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/binary/binary_scanner.py +15 -11
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/decorators/pdf_object_properties.py +13 -12
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/decorators/pdf_tree_node.py +8 -5
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/decorators/pdf_tree_verifier.py +7 -4
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/detection/constants/binary_regexes.py +7 -7
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/detection/yaralyzer_helper.py +1 -3
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/font_info.py +10 -10
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/helpers/filesystem_helper.py +6 -6
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/helpers/pdf_object_helper.py +0 -1
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/helpers/rich_text_helper.py +5 -5
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/output/character_mapping.py +3 -2
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/output/pdfalyzer_presenter.py +5 -3
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/output/tables/decoding_stats_table.py +2 -1
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/pdf_object_relationship.py +12 -12
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/pdfalyzer.py +4 -5
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/util/adobe_strings.py +4 -4
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/util/argument_parser.py +7 -7
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pyproject.toml +49 -29
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/.pdfalyzer.example +0 -0
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/LICENSE +0 -0
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/__main__.py +0 -0
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/config.py +0 -0
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/decorators/document_model_printer.py +0 -0
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/decorators/indeterminate_node.py +0 -0
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/detection/constants/javascript_reserved_keywords.py +0 -0
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/detection/javascript_hunter.py +0 -0
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/helpers/dict_helper.py +0 -0
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/helpers/number_helper.py +0 -0
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/helpers/string_helper.py +0 -0
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/output/layout.py +0 -0
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/output/styles/node_colors.py +0 -0
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/output/styles/rich_theme.py +0 -0
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/output/tables/font_summary_table.py +0 -0
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/output/tables/pdf_node_rich_table.py +0 -0
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/output/tables/stream_objects_table.py +0 -0
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/util/debugging.py +1 -1
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/util/exceptions.py +0 -0
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/util/pdf_parser_manager.py +0 -0
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/yara_rules/PDF.yara +0 -0
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/yara_rules/PDF_binary_stream.yara +0 -0
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/yara_rules/__init.py__ +0 -0
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/yara_rules/didier_stevens.yara +0 -0
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/yara_rules/lprat.static_file_analysis.yara +0 -0
- {pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/yara_rules/pdf_malware.yara +0 -0
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
# NEXT RELEASE
|
|
2
2
|
|
|
3
|
+
### 1.16.12
|
|
4
|
+
* Bump `PyPDF` to v6.0.0
|
|
5
|
+
|
|
6
|
+
### 1.16.11
|
|
7
|
+
* Fix typo in `combine_pdfs` help
|
|
8
|
+
* Add some more PyPi classifiers
|
|
9
|
+
* Add a `.flake8` config and fix a bunch of style issues
|
|
10
|
+
|
|
3
11
|
### 1.16.10
|
|
4
12
|
* Add `Environment :: Console` and `Programming Language :: Python` to pypi classifiers
|
|
5
13
|
* Add `.pdfalyzer.example` to PyPi package
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: pdfalyzer
|
|
3
|
-
Version: 1.16.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 1.16.12
|
|
4
|
+
Summary: PDF analysis tool. Scan a PDF with YARA rules, visualize its inner tree-like data structure in living color (lots of colors), force decodes of suspicious font binaries, and more.
|
|
5
5
|
Home-page: https://github.com/michelcrypt4d4mus/pdfalyzer
|
|
6
6
|
License: GPL-3.0-or-later
|
|
7
|
-
Keywords: ascii art,binary,color,cybersecurity,DFIR,encoding,font,infosec,maldoc,malicious pdf,malware,malware analysis,pdf,pdfs,pdf analysis,threat assessment,visualization,yara
|
|
7
|
+
Keywords: ascii art,binary,color,cybersecurity,DFIR,encoding,font,infosec,maldoc,malicious pdf,malware,malware analysis,pdf,pdfs,pdf analysis,pypdf,threat assessment,threat hunting,threat intelligence,threat research,threatintel,visualization,yara
|
|
8
8
|
Author: Michel de Cryptadamus
|
|
9
9
|
Author-email: michel@cryptadamus.com
|
|
10
|
-
Requires-Python: >=3.9.2,<4.0
|
|
10
|
+
Requires-Python: >=3.9.2,<4.0
|
|
11
11
|
Classifier: Development Status :: 5 - Production/Stable
|
|
12
12
|
Classifier: Environment :: Console
|
|
13
13
|
Classifier: Intended Audience :: Information Technology
|
|
@@ -16,11 +16,14 @@ Classifier: Programming Language :: Python
|
|
|
16
16
|
Classifier: Programming Language :: Python :: 3
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.10
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
19
22
|
Classifier: Topic :: Artistic Software
|
|
20
23
|
Classifier: Topic :: Scientific/Engineering :: Visualization
|
|
21
24
|
Classifier: Topic :: Security
|
|
22
25
|
Requires-Dist: anytree (>=2.13,<3.0)
|
|
23
|
-
Requires-Dist: pypdf (>=
|
|
26
|
+
Requires-Dist: pypdf (>=6.0.0,<7.0.0)
|
|
24
27
|
Requires-Dist: yaralyzer (>=1.0.4,<2.0.0)
|
|
25
28
|
Project-URL: Changelog, https://github.com/michelcrypt4d4mus/pdfalyzer/blob/master/CHANGELOG.md
|
|
26
29
|
Project-URL: Documentation, https://github.com/michelcrypt4d4mus/pdfalyzer
|
|
@@ -62,10 +65,12 @@ If you're looking for one of these things this may be the tool for you.
|
|
|
62
65
|
### What It Don't Do
|
|
63
66
|
This tool is mostly for examining/working with a PDF's data and logical structure. As such it doesn't have much to offer as far as extracting text, rendering[^3], writing, etc. etc.
|
|
64
67
|
|
|
68
|
+
If you suspect you are dealing with a malcious PDF you can safely run `pdfalyze` on it; embedded javascript etc. will not be executed. If you want to actually look at the contents of a suspect PDF you can use [`dangerzone`](https://dangerzone.rocks/) to sanitize the contents with extreme prejudice before opening it.
|
|
69
|
+
|
|
65
70
|
-------------
|
|
66
71
|
|
|
67
72
|
# Installation
|
|
68
|
-
|
|
73
|
+
#### All Platforms
|
|
69
74
|
Installation with [pipx](https://pypa.github.io/pipx/)[^4] is preferred though `pip3` / `pip` should also work.
|
|
70
75
|
```sh
|
|
71
76
|
pipx install pdfalyzer
|
|
@@ -73,7 +78,12 @@ pipx install pdfalyzer
|
|
|
73
78
|
|
|
74
79
|
See [PyPDF installation notes](https://github.com/py-pdf/pypdf#installation) about `PyCryptodome` if you plan to `pdfalyze` any files that use AES encryption.
|
|
75
80
|
|
|
76
|
-
|
|
81
|
+
#### macOS Homebrew
|
|
82
|
+
If you are on macOS and use `homebrew` someone out there was kind enough to make [The Pdfalyzer available via homebrew](https://formulae.brew.sh/formula/pdfalyzer) so this should work:
|
|
83
|
+
|
|
84
|
+
```sh
|
|
85
|
+
brew install pdfalyzer
|
|
86
|
+
```
|
|
77
87
|
|
|
78
88
|
### Troubleshooting
|
|
79
89
|
1. If you used `pip3` instead of `pipx` and have an issue you should try to install with `pipx`.
|
|
@@ -33,10 +33,12 @@ If you're looking for one of these things this may be the tool for you.
|
|
|
33
33
|
### What It Don't Do
|
|
34
34
|
This tool is mostly for examining/working with a PDF's data and logical structure. As such it doesn't have much to offer as far as extracting text, rendering[^3], writing, etc. etc.
|
|
35
35
|
|
|
36
|
+
If you suspect you are dealing with a malcious PDF you can safely run `pdfalyze` on it; embedded javascript etc. will not be executed. If you want to actually look at the contents of a suspect PDF you can use [`dangerzone`](https://dangerzone.rocks/) to sanitize the contents with extreme prejudice before opening it.
|
|
37
|
+
|
|
36
38
|
-------------
|
|
37
39
|
|
|
38
40
|
# Installation
|
|
39
|
-
|
|
41
|
+
#### All Platforms
|
|
40
42
|
Installation with [pipx](https://pypa.github.io/pipx/)[^4] is preferred though `pip3` / `pip` should also work.
|
|
41
43
|
```sh
|
|
42
44
|
pipx install pdfalyzer
|
|
@@ -44,7 +46,12 @@ pipx install pdfalyzer
|
|
|
44
46
|
|
|
45
47
|
See [PyPDF installation notes](https://github.com/py-pdf/pypdf#installation) about `PyCryptodome` if you plan to `pdfalyze` any files that use AES encryption.
|
|
46
48
|
|
|
47
|
-
|
|
49
|
+
#### macOS Homebrew
|
|
50
|
+
If you are on macOS and use `homebrew` someone out there was kind enough to make [The Pdfalyzer available via homebrew](https://formulae.brew.sh/formula/pdfalyzer) so this should work:
|
|
51
|
+
|
|
52
|
+
```sh
|
|
53
|
+
brew install pdfalyzer
|
|
54
|
+
```
|
|
48
55
|
|
|
49
56
|
### Troubleshooting
|
|
50
57
|
1. If you used `pip3` instead of `pipx` and have an issue you should try to install with `pipx`.
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import code
|
|
2
2
|
import sys
|
|
3
3
|
from os import environ, getcwd, path
|
|
4
|
-
from pathlib import Path
|
|
5
4
|
|
|
6
5
|
from dotenv import load_dotenv
|
|
7
6
|
from pypdf import PdfWriter
|
|
@@ -23,7 +22,7 @@ from rich.text import Text
|
|
|
23
22
|
from yaralyzer.helpers.rich_text_helper import prefix_with_plain_text_obj
|
|
24
23
|
from yaralyzer.output.file_export import invoke_rich_export
|
|
25
24
|
from yaralyzer.output.rich_console import console
|
|
26
|
-
from yaralyzer.util.logging import
|
|
25
|
+
from yaralyzer.util.logging import log_and_print
|
|
27
26
|
|
|
28
27
|
from pdfalyzer.helpers.filesystem_helper import file_size_in_mb, set_max_open_files
|
|
29
28
|
from pdfalyzer.helpers.rich_text_helper import print_highlighted
|
|
@@ -51,8 +50,8 @@ def pdfalyze():
|
|
|
51
50
|
log_and_print(f"Binary stream extraction complete, files written to '{args.output_dir}'.\nExiting.\n")
|
|
52
51
|
sys.exit()
|
|
53
52
|
|
|
54
|
-
# The method that gets called is related to the argument name. See 'possible_output_sections' list in
|
|
55
|
-
# Analysis exports wrap themselves around the methods that actually generate the analyses
|
|
53
|
+
# The method that gets called is related to the argument name. See 'possible_output_sections' list in
|
|
54
|
+
# argument_parser.py. Analysis exports wrap themselves around the methods that actually generate the analyses.
|
|
56
55
|
for (arg, method) in output_sections(args, pdfalyzer):
|
|
57
56
|
if args.output_dir:
|
|
58
57
|
output_basepath = PdfalyzerConfig.get_output_basepath(method)
|
|
@@ -89,7 +88,7 @@ def pdfalyzer_show_color_theme() -> None:
|
|
|
89
88
|
if name not in ['reset', 'repr_url']
|
|
90
89
|
]
|
|
91
90
|
|
|
92
|
-
console.print(Columns(colors, column_first=True, padding=(0,3)))
|
|
91
|
+
console.print(Columns(colors, column_first=True, padding=(0, 3)))
|
|
93
92
|
|
|
94
93
|
|
|
95
94
|
def combine_pdfs():
|
|
@@ -114,7 +113,11 @@ def combine_pdfs():
|
|
|
114
113
|
for i, page in enumerate(merger.pages):
|
|
115
114
|
if args.image_quality < MAX_QUALITY:
|
|
116
115
|
for j, img in enumerate(page.images):
|
|
117
|
-
print_highlighted(
|
|
116
|
+
print_highlighted(
|
|
117
|
+
f" -> Reducing image #{j + 1} quality on page {i + 1} to {args.image_quality}...",
|
|
118
|
+
style='dim'
|
|
119
|
+
)
|
|
120
|
+
|
|
118
121
|
img.replace(img.image, quality=args.image_quality)
|
|
119
122
|
|
|
120
123
|
print_highlighted(f" -> Compressing page {i + 1}...", style='dim')
|
|
@@ -21,7 +21,7 @@ from yaralyzer.util.logging import log
|
|
|
21
21
|
from pdfalyzer.config import PdfalyzerConfig
|
|
22
22
|
from pdfalyzer.decorators.pdf_tree_node import PdfTreeNode
|
|
23
23
|
from pdfalyzer.detection.constants.binary_regexes import (BACKTICK, DANGEROUS_PDF_KEYS_TO_HUNT_ONLY_IN_FONTS,
|
|
24
|
-
|
|
24
|
+
DANGEROUS_STRINGS, FRONTSLASH, GUILLEMET, QUOTE_PATTERNS)
|
|
25
25
|
from pdfalyzer.helpers.string_helper import generate_hyphen_line
|
|
26
26
|
from pdfalyzer.output.layout import print_headline_panel, print_section_sub_subheader
|
|
27
27
|
from pdfalyzer.util.adobe_strings import CONTENTS, CURRENTFILE_EEXEC, FONT_FILE_KEYS
|
|
@@ -36,7 +36,7 @@ class BinaryScanner:
|
|
|
36
36
|
self.stream_length = len(_bytes)
|
|
37
37
|
|
|
38
38
|
if label is None and isinstance(owner, PdfTreeNode):
|
|
39
|
-
|
|
39
|
+
self.label = owner.__rich__()
|
|
40
40
|
|
|
41
41
|
self.suppression_notice_queue = []
|
|
42
42
|
self.regex_extraction_stats = defaultdict(lambda: RegexMatchMetrics())
|
|
@@ -86,8 +86,12 @@ class BinaryScanner:
|
|
|
86
86
|
print_headline_panel(msg, style='dim')
|
|
87
87
|
continue
|
|
88
88
|
|
|
89
|
+
print_section_sub_subheader(
|
|
90
|
+
f"Forcing Decode of {quote_type.capitalize()} Quoted Strings",
|
|
91
|
+
style=BYTES_NO_DIM
|
|
92
|
+
)
|
|
93
|
+
|
|
89
94
|
quote_pattern = QUOTE_PATTERNS[quote_type]
|
|
90
|
-
print_section_sub_subheader(f"Forcing Decode of {quote_type.capitalize()} Quoted Strings", style=BYTES_NO_DIM)
|
|
91
95
|
yaralyzer = self._quote_yaralyzer(quote_pattern, quote_type)
|
|
92
96
|
self.process_yara_matches(yaralyzer, f"{quote_type}_quoted")
|
|
93
97
|
|
|
@@ -135,7 +139,7 @@ class BinaryScanner:
|
|
|
135
139
|
def process_yara_matches(self, yaralyzer: Yaralyzer, pattern: str, force: bool = False) -> None:
|
|
136
140
|
"""Decide whether to attempt to decode the matched bytes, track stats. force param ignores min/max length."""
|
|
137
141
|
for bytes_match, decoder in yaralyzer.match_iterator():
|
|
138
|
-
log.debug(f"Trackings stats for
|
|
142
|
+
log.debug(f"Trackings match stats for {pattern}, bytes_match: {bytes_match}, is_decodable: {bytes_match.is_decodable()}") # noqa: E501
|
|
139
143
|
|
|
140
144
|
# Send suppressed decodes to a queue and track the reason for the suppression in the stats
|
|
141
145
|
if not (bytes_match.is_decodable() or force):
|
|
@@ -145,7 +149,7 @@ class BinaryScanner:
|
|
|
145
149
|
# Print out any queued suppressed notices before printing non suppressed matches
|
|
146
150
|
self._print_suppression_notices()
|
|
147
151
|
console.print(decoder)
|
|
148
|
-
self.regex_extraction_stats[pattern].tally_match(decoder)
|
|
152
|
+
self.regex_extraction_stats[pattern].tally_match(decoder) # TODO: This call must come after print(decoder)
|
|
149
153
|
|
|
150
154
|
self._print_suppression_notices()
|
|
151
155
|
|
|
@@ -167,12 +171,12 @@ class BinaryScanner:
|
|
|
167
171
|
return self._pattern_yaralyzer(quote_pattern, REGEX, label, label)
|
|
168
172
|
|
|
169
173
|
def _pattern_yaralyzer(
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
174
|
+
self,
|
|
175
|
+
pattern: str,
|
|
176
|
+
pattern_type: str,
|
|
177
|
+
rules_label: Optional[str] = None,
|
|
178
|
+
pattern_label: Optional[str] = None
|
|
179
|
+
) -> Yaralyzer:
|
|
176
180
|
"""Build a yaralyzer to scan self.bytes"""
|
|
177
181
|
return Yaralyzer.for_patterns(
|
|
178
182
|
patterns=[escape_yara_pattern(pattern)],
|
|
@@ -16,13 +16,14 @@ from pdfalyzer.util.adobe_strings import *
|
|
|
16
16
|
|
|
17
17
|
class PdfObjectProperties:
|
|
18
18
|
"""Simple class to extract critical features of a PdfObject."""
|
|
19
|
+
|
|
19
20
|
def __init__(
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
21
|
+
self,
|
|
22
|
+
pdf_object: PdfObject,
|
|
23
|
+
address: str,
|
|
24
|
+
idnum: int,
|
|
25
|
+
indirect_object: Optional[IndirectObject] = None
|
|
26
|
+
):
|
|
26
27
|
self.idnum = idnum
|
|
27
28
|
self.obj = pdf_object
|
|
28
29
|
self.indirect_object = indirect_object
|
|
@@ -57,7 +58,7 @@ class PdfObjectProperties:
|
|
|
57
58
|
else:
|
|
58
59
|
self.first_address = address
|
|
59
60
|
|
|
60
|
-
log.debug(f"Node ID: {self.idnum}, type: {self.type}, subtype: {self.sub_type}, " +
|
|
61
|
+
log.debug(f"Node ID: {self.idnum}, type: {self.type}, subtype: {self.sub_type}, " +
|
|
61
62
|
f"label: {self.label}, first_address: {self.first_address}")
|
|
62
63
|
|
|
63
64
|
@classmethod
|
|
@@ -80,11 +81,11 @@ class PdfObjectProperties:
|
|
|
80
81
|
|
|
81
82
|
@classmethod
|
|
82
83
|
def to_table_row(
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
84
|
+
cls,
|
|
85
|
+
reference_key: str,
|
|
86
|
+
obj: PdfObject,
|
|
87
|
+
is_single_row_table: bool = False
|
|
88
|
+
) -> List[Union[Text, str]]:
|
|
88
89
|
"""PDF object property at reference_key becomes a formatted 3-tuple for use in Rich tables."""
|
|
89
90
|
with_resolved_refs = cls.resolve_references(reference_key, obj)
|
|
90
91
|
|
|
@@ -6,7 +6,7 @@ Child/parent relationships should be set using the add_child() and set_parent()
|
|
|
6
6
|
methods and not set directly. (TODO: this could be done better with anytree
|
|
7
7
|
hooks)
|
|
8
8
|
"""
|
|
9
|
-
from typing import Callable, List, Optional
|
|
9
|
+
from typing import Callable, List, Optional
|
|
10
10
|
|
|
11
11
|
from anytree import NodeMixin, SymlinkNode
|
|
12
12
|
from pypdf.errors import PdfReadError
|
|
@@ -163,11 +163,14 @@ class PdfTreeNode(NodeMixin, PdfObjectProperties):
|
|
|
163
163
|
return None
|
|
164
164
|
else:
|
|
165
165
|
address = refs_to_this_node[0].address
|
|
166
|
+
|
|
166
167
|
# If other node's label doesn't start with a NON_STANDARD_ADDRESS string
|
|
167
|
-
#
|
|
168
|
-
#
|
|
169
|
-
|
|
170
|
-
|
|
168
|
+
# AND any of the relationships pointing at this node use something other than a
|
|
169
|
+
# NON_STANDARD_ADDRESS_NODES string to refer here,
|
|
170
|
+
# then print a warning about multiple refs.
|
|
171
|
+
if not (is_prefixed_by_any(from_node.label, NON_STANDARD_ADDRESS_NODES)
|
|
172
|
+
or
|
|
173
|
+
all(ref.address in NON_STANDARD_ADDRESS_NODES for ref in refs_to_this_node)):
|
|
171
174
|
refs_to_this_node_str = "\n ".join([f"{i + 1}. {r}" for i, r in enumerate(refs_to_this_node)])
|
|
172
175
|
msg = f"Multiple refs from {from_node} to {self}:\n {refs_to_this_node_str}"
|
|
173
176
|
log.warning(msg + f"\nCommon address of refs: {address}")
|
|
@@ -37,7 +37,10 @@ class PdfTreeVerifier:
|
|
|
37
37
|
log.warning(f"Methodd doesn't check revisions but this doc is generation {self.pdfalyzer.max_generation}")
|
|
38
38
|
|
|
39
39
|
# We expect to see all ordinals up to the number of nodes /Trailer claims exist as obj. IDs.
|
|
40
|
-
missing_node_ids = [
|
|
40
|
+
missing_node_ids = [
|
|
41
|
+
i for i in range(1, self.pdfalyzer.pdf_size)
|
|
42
|
+
if self.pdfalyzer.find_node_by_idnum(i) is None
|
|
43
|
+
]
|
|
41
44
|
|
|
42
45
|
for idnum in missing_node_ids:
|
|
43
46
|
ref = IndirectObject(idnum, self.pdfalyzer.max_generation, self.pdfalyzer.pdf_reader)
|
|
@@ -57,13 +60,13 @@ class PdfTreeVerifier:
|
|
|
57
60
|
log.error(f"Cannot find ref {ref} in PDF!")
|
|
58
61
|
continue
|
|
59
62
|
elif isinstance(obj, (NumberObject, NameObject)):
|
|
60
|
-
log.info(f"Obj {idnum} is a {type(obj)} w/value {obj}; if relationshipd by /Length etc. this is a nonissue but maybe worth doublechecking")
|
|
63
|
+
log.info(f"Obj {idnum} is a {type(obj)} w/value {obj}; if relationshipd by /Length etc. this is a nonissue but maybe worth doublechecking") # noqa: E501
|
|
61
64
|
continue
|
|
62
65
|
elif not isinstance(obj, dict):
|
|
63
|
-
log.error(f"Obj {idnum} ({obj}) of type {type(obj)} isn't dict, cannot determine if it should be in tree")
|
|
66
|
+
log.error(f"Obj {idnum} ({obj}) of type {type(obj)} isn't dict, cannot determine if it should be in tree") # noqa: E501
|
|
64
67
|
continue
|
|
65
68
|
elif TYPE not in obj:
|
|
66
|
-
msg = f"Obj {idnum} has no {TYPE} and is not in tree. Either a loose node w/no data or an error in pdfalyzer."
|
|
69
|
+
msg = f"Obj {idnum} has no {TYPE} and is not in tree. Either a loose node w/no data or an error in pdfalyzer." # noqa: E501
|
|
67
70
|
msg += f"\nHere's the contents for you to assess:\n{obj}"
|
|
68
71
|
log.warning(msg)
|
|
69
72
|
continue
|
|
@@ -36,13 +36,13 @@ PARENTHESES = 'parentheses'
|
|
|
36
36
|
|
|
37
37
|
QUOTE_PATTERNS = {
|
|
38
38
|
BACKTICK: '`.+`',
|
|
39
|
-
BRACKET: '\\[.+\\]',
|
|
40
|
-
CURLY_BRACKET: '{.+}',
|
|
41
|
-
DOUBLE_LESS_THAN: '<<.+>>',
|
|
39
|
+
BRACKET: '\\[.+\\]', # { 91 [-] 93 }
|
|
40
|
+
CURLY_BRACKET: '{.+}', # { 123 [-] 125 }
|
|
41
|
+
DOUBLE_LESS_THAN: '<<.+>>', # Hex { 60 60 [-] 62 62 }
|
|
42
42
|
ESCAPED_SINGLE: "\\'.+\\'",
|
|
43
43
|
ESCAPED_DOUBLE: '\\".+\\"',
|
|
44
|
-
FRONTSLASH: '/.+/',
|
|
45
|
-
GUILLEMET: 'AB [-] BB',
|
|
46
|
-
LESS_THAN: '<.+>',
|
|
47
|
-
PARENTHESES: '\\(.+\\)',
|
|
44
|
+
FRONTSLASH: '/.+/', # { 47 [-] 47 }
|
|
45
|
+
GUILLEMET: 'AB [-] BB', # Guillemet quotes are not ANSI so require byte pattern
|
|
46
|
+
LESS_THAN: '<.+>', # Hex { 60 [-] 62 }
|
|
47
|
+
PARENTHESES: '\\(.+\\)', # Hex { 28 [-] 29 }
|
|
48
48
|
}
|
|
@@ -8,8 +8,6 @@ from typing import Optional, Union
|
|
|
8
8
|
from yaralyzer.config import YaralyzerConfig
|
|
9
9
|
from yaralyzer.yaralyzer import Yaralyzer
|
|
10
10
|
|
|
11
|
-
from pdfalyzer.config import PdfalyzerConfig
|
|
12
|
-
|
|
13
11
|
YARA_RULES_DIR = files('pdfalyzer').joinpath('yara_rules')
|
|
14
12
|
|
|
15
13
|
YARA_RULES_FILES = [
|
|
@@ -38,7 +36,7 @@ def _build_yaralyzer(scannable: Union[bytes, str], label: Optional[str] = None)
|
|
|
38
36
|
with as_file(YARA_RULES_DIR.joinpath(YARA_RULES_FILES[2])) as yara2:
|
|
39
37
|
with as_file(YARA_RULES_DIR.joinpath(YARA_RULES_FILES[3])) as yara3:
|
|
40
38
|
with as_file(YARA_RULES_DIR.joinpath(YARA_RULES_FILES[4])) as yara4:
|
|
41
|
-
# If there is a custom yara_rules
|
|
39
|
+
# If there is a custom yara_rules arg, use that instead of the files in the yara_rules/ dir
|
|
42
40
|
rules_paths = YaralyzerConfig.args.yara_rules_files or []
|
|
43
41
|
|
|
44
42
|
if not YaralyzerConfig.args.no_default_yara_rules:
|
|
@@ -142,19 +142,19 @@ class FontInfo:
|
|
|
142
142
|
console.line()
|
|
143
143
|
|
|
144
144
|
# TODO: currently unused
|
|
145
|
-
def preview_bytes_at_advertised_lengths(self):
|
|
146
|
-
|
|
147
|
-
|
|
145
|
+
# def preview_bytes_at_advertised_lengths(self):
|
|
146
|
+
# """Show the bytes at the boundaries provided by /Length1, /Length2, and /Length3, if they exist"""
|
|
147
|
+
# lengths = self.lengths or []
|
|
148
148
|
|
|
149
|
-
|
|
150
|
-
|
|
149
|
+
# if self.lengths is None or len(lengths) <= 1:
|
|
150
|
+
# console.print("No length demarcations to preview.", style='grey.dark')
|
|
151
151
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
152
|
+
# for i, demarcation in enumerate(lengths[1:]):
|
|
153
|
+
# console.print(f"{self.font_file} at /Length{i} ({demarcation}):")
|
|
154
|
+
# print(f"\n Stream before: {self.stream_data[demarcation - FONT_SECTION_PREVIEW_LEN:demarcation + 1]}")
|
|
155
|
+
# print(f"\n Stream after: {self.stream_data[demarcation:demarcation + FONT_SECTION_PREVIEW_LEN]}")
|
|
156
156
|
|
|
157
|
-
|
|
157
|
+
# print(f"\nfinal bytes back from {self.stream_data.lengths[2]} + 10: {self.stream_data[-10 - -f.lengths[2]:]}")
|
|
158
158
|
|
|
159
159
|
def __str__(self) -> str:
|
|
160
160
|
return self.display_title
|
|
@@ -15,7 +15,7 @@ OPEN_FILES_BUFFER = 30 # we might have some files open already so we need
|
|
|
15
15
|
PDF_EXT = '.pdf'
|
|
16
16
|
|
|
17
17
|
# TODO: this kind of type alias is not supported until Python 3.12
|
|
18
|
-
#type StrOrPath = Union[str, Path]
|
|
18
|
+
# type StrOrPath = Union[str, Path]
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
def with_pdf_extension(file_path: Union[str, Path]) -> str:
|
|
@@ -92,11 +92,11 @@ def set_max_open_files(num_filehandles: int = DEFAULT_MAX_OPEN_FILES) -> tuple[O
|
|
|
92
92
|
resource.setrlimit(resource.RLIMIT_NOFILE, (soft, hard))
|
|
93
93
|
except (ValueError, resource.error):
|
|
94
94
|
try:
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
95
|
+
hard = soft
|
|
96
|
+
print_highlighted(f"Retrying setting max open files (soft, hard)=({soft}, {hard})", style='yellow')
|
|
97
|
+
resource.setrlimit(resource.RLIMIT_NOFILE, (soft, hard))
|
|
98
98
|
except Exception:
|
|
99
|
-
|
|
100
|
-
|
|
99
|
+
print_highlighted('Failed to set max open files / ulimit, giving up!', style='error')
|
|
100
|
+
soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
|
|
101
101
|
|
|
102
102
|
return (soft, hard)
|
|
@@ -20,11 +20,11 @@ def print_highlighted(msg: Union[str, Text], **kwargs) -> None:
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
def quoted_text(
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
23
|
+
_string: str,
|
|
24
|
+
style: str = '',
|
|
25
|
+
quote_char_style: str = 'white',
|
|
26
|
+
quote_char: str = "'"
|
|
27
|
+
) -> Text:
|
|
28
28
|
"""Wrap _string in 'quote_char'. Style 'quote_char' with 'quote_char_style'."""
|
|
29
29
|
quote_char_txt = Text(quote_char, style=quote_char_style)
|
|
30
30
|
txt = quote_char_txt + Text(_string, style=style) + quote_char_txt
|
|
@@ -8,6 +8,7 @@ from yaralyzer.helpers.bytes_helper import print_bytes
|
|
|
8
8
|
from yaralyzer.output.rich_console import console
|
|
9
9
|
from yaralyzer.util.logging import log
|
|
10
10
|
|
|
11
|
+
# from pdfalyzer.font_info import FontInfo # Causes circular import
|
|
11
12
|
from pdfalyzer.helpers.rich_text_helper import quoted_text
|
|
12
13
|
from pdfalyzer.helpers.string_helper import pp
|
|
13
14
|
from pdfalyzer.output.layout import print_headline_panel, subheading_width
|
|
@@ -17,7 +18,7 @@ CHARMAP_TITLE_PADDING = (1, 0, 0, 2)
|
|
|
17
18
|
CHARMAP_PADDING = (0, 2, 0, 10)
|
|
18
19
|
|
|
19
20
|
|
|
20
|
-
def print_character_mapping(font: 'FontInfo') -> None:
|
|
21
|
+
def print_character_mapping(font: 'FontInfo') -> None: # noqa: F821
|
|
21
22
|
"""Prints the character mapping extracted by PyPDF._charmap in tidy columns"""
|
|
22
23
|
if font.character_mapping is None or len(font.character_mapping) == 0:
|
|
23
24
|
log.info(f"No character map found in {font}")
|
|
@@ -37,7 +38,7 @@ def print_character_mapping(font: 'FontInfo') -> None:
|
|
|
37
38
|
console.line()
|
|
38
39
|
|
|
39
40
|
|
|
40
|
-
def print_prepared_charmap(font: 'FontInfo'):
|
|
41
|
+
def print_prepared_charmap(font: 'FontInfo'): # noqa: F821
|
|
41
42
|
"""Prints the prepared_charmap returned by PyPDF."""
|
|
42
43
|
if font.prepared_char_map is None:
|
|
43
44
|
log.info(f"No prepared_charmap found in {font}")
|
|
@@ -27,7 +27,9 @@ from pdfalyzer.output.tables.decoding_stats_table import build_decoding_stats_ta
|
|
|
27
27
|
from pdfalyzer.output.tables.pdf_node_rich_table import generate_rich_tree, get_symlink_representation
|
|
28
28
|
from pdfalyzer.output.tables.stream_objects_table import stream_objects_table
|
|
29
29
|
from pdfalyzer.pdfalyzer import Pdfalyzer
|
|
30
|
-
from pdfalyzer.util.adobe_strings import *
|
|
30
|
+
# from pdfalyzer.util.adobe_strings import *
|
|
31
|
+
|
|
32
|
+
INTERNAL_YARA_ERROR_MSG = "Internal YARA error! YARA's error codes can be checked here: https://github.com/VirusTotal/yara/blob/master/libyara/include/yara/error.h" # noqa: E501
|
|
31
33
|
|
|
32
34
|
|
|
33
35
|
class PdfalyzerPresenter:
|
|
@@ -130,9 +132,9 @@ class PdfalyzerPresenter:
|
|
|
130
132
|
|
|
131
133
|
try:
|
|
132
134
|
self.yaralyzer.yaralyze()
|
|
133
|
-
except yara.Error
|
|
135
|
+
except yara.Error:
|
|
134
136
|
console.print_exception()
|
|
135
|
-
print_fatal_error_panel(
|
|
137
|
+
print_fatal_error_panel(INTERNAL_YARA_ERROR_MSG)
|
|
136
138
|
return
|
|
137
139
|
|
|
138
140
|
YaralyzerConfig.args.standalone_mode = False
|
|
@@ -7,6 +7,7 @@ from rich.table import Table
|
|
|
7
7
|
from rich.text import Text
|
|
8
8
|
from yaralyzer.helpers.rich_text_helper import CENTER, na_txt, prefix_with_plain_text_obj
|
|
9
9
|
|
|
10
|
+
from pdfalyzer.binary.binary_scanner import BinaryScanner
|
|
10
11
|
from pdfalyzer.helpers.rich_text_helper import pct_txt
|
|
11
12
|
from pdfalyzer.output.layout import generate_subtable, half_width, pad_header
|
|
12
13
|
|
|
@@ -17,7 +18,7 @@ REGEX_SUBTABLE_COLS = ['Metric', 'Value']
|
|
|
17
18
|
DECODES_SUBTABLE_COLS = ['Encoding', '#', 'Decoded', '#', 'Forced', '#', 'Failed']
|
|
18
19
|
|
|
19
20
|
|
|
20
|
-
def build_decoding_stats_table(scanner:
|
|
21
|
+
def build_decoding_stats_table(scanner: BinaryScanner) -> Table:
|
|
21
22
|
"""Diplay aggregate results on the decoding attempts we made on subsets of scanner.bytes"""
|
|
22
23
|
stats_table = _new_decoding_stats_table(scanner.label.plain if scanner.label else '')
|
|
23
24
|
regexes_not_found_in_stream = []
|
|
@@ -14,12 +14,12 @@ INCOMPARABLE_PROPS = ['from_obj', 'to_obj']
|
|
|
14
14
|
|
|
15
15
|
class PdfObjectRelationship:
|
|
16
16
|
def __init__(
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
17
|
+
self,
|
|
18
|
+
from_node: 'PdfTreeNode',
|
|
19
|
+
to_obj: IndirectObject,
|
|
20
|
+
reference_key: str,
|
|
21
|
+
address: str
|
|
22
|
+
) -> None:
|
|
23
23
|
"""
|
|
24
24
|
In the case of easy key/value pairs the reference_key and the address are the same but
|
|
25
25
|
for more complicated references the address will be the reference_key plus sub references.
|
|
@@ -53,12 +53,12 @@ class PdfObjectRelationship:
|
|
|
53
53
|
|
|
54
54
|
@classmethod
|
|
55
55
|
def build_node_references(
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
56
|
+
cls,
|
|
57
|
+
from_node: 'PdfTreeObject',
|
|
58
|
+
from_obj: Optional[PdfObject] = None,
|
|
59
|
+
ref_key: Optional[Union[str, int]] = None,
|
|
60
|
+
address: Optional[str] = None
|
|
61
|
+
) -> List['PdfObjectRelationship']:
|
|
62
62
|
"""
|
|
63
63
|
Builds list of relationships 'from_node.obj' contains referencing other PDF objects.
|
|
64
64
|
Initially called with single arg from_node. Other args are employed when recursable
|
|
@@ -77,7 +77,7 @@ class Pdfalyzer:
|
|
|
77
77
|
nodes_to_walk_next = [self._add_relationship_to_pdf_tree(r) for r in node.references_to_other_nodes()]
|
|
78
78
|
node.all_references_processed = True
|
|
79
79
|
|
|
80
|
-
for next_node in [n for n in nodes_to_walk_next if not (n is None or n.all_references_processed)
|
|
80
|
+
for next_node in [n for n in nodes_to_walk_next if not (n is None or n.all_references_processed)]:
|
|
81
81
|
if not next_node.all_references_processed:
|
|
82
82
|
self.walk_node(next_node)
|
|
83
83
|
|
|
@@ -105,7 +105,7 @@ class Pdfalyzer:
|
|
|
105
105
|
|
|
106
106
|
def stream_nodes(self) -> List[PdfTreeNode]:
|
|
107
107
|
"""List of actual nodes (not SymlinkNodes) containing streams sorted by PDF object ID"""
|
|
108
|
-
stream_filter = lambda node: node.contains_stream() and not isinstance(node, SymlinkNode)
|
|
108
|
+
stream_filter = lambda node: node.contains_stream() and not isinstance(node, SymlinkNode) # noqa: E731
|
|
109
109
|
return sorted(findall(self.pdf_tree, stream_filter), key=lambda r: r.idnum)
|
|
110
110
|
|
|
111
111
|
def _add_relationship_to_pdf_tree(self, relationship: PdfObjectRelationship) -> Optional[PdfTreeNode]:
|
|
@@ -114,7 +114,7 @@ class Pdfalyzer:
|
|
|
114
114
|
placed in the PDF node processing queue.
|
|
115
115
|
"""
|
|
116
116
|
log.info(f'Assessing relationship {relationship}...')
|
|
117
|
-
was_seen_before = (relationship.to_obj.idnum in self.nodes_encountered)
|
|
117
|
+
was_seen_before = (relationship.to_obj.idnum in self.nodes_encountered) # Must come before _build_or_find()
|
|
118
118
|
from_node = relationship.from_node
|
|
119
119
|
to_node = self._build_or_find_node(relationship.to_obj, relationship.address)
|
|
120
120
|
self.max_generation = max([self.max_generation, relationship.to_obj.generation or 0])
|
|
@@ -133,7 +133,7 @@ class Pdfalyzer:
|
|
|
133
133
|
from_node.set_parent(to_node)
|
|
134
134
|
elif to_node.parent is not None:
|
|
135
135
|
# Some StructElem nodes I have seen use /P or /K despire not being the real parent/child
|
|
136
|
-
if relationship.from_node.type.startswith(STRUCT_ELEM)
|
|
136
|
+
if relationship.from_node.type.startswith(STRUCT_ELEM):
|
|
137
137
|
log.info(f"{relationship} fail: {to_node} parent is already {to_node.parent}")
|
|
138
138
|
else:
|
|
139
139
|
log.warning(f"{relationship} fail: {to_node} parent is already {to_node.parent}")
|
|
@@ -173,7 +173,6 @@ class Pdfalyzer:
|
|
|
173
173
|
|
|
174
174
|
def _resolve_indeterminate_nodes(self) -> None:
|
|
175
175
|
"""Place all indeterminate nodes in the tree."""
|
|
176
|
-
#set_log_level('INFO')
|
|
177
176
|
indeterminate_nodes = [self.nodes_encountered[idnum] for idnum in self.indeterminate_ids]
|
|
178
177
|
indeterminate_nodes_string = "\n ".join([f"{node}" for node in indeterminate_nodes])
|
|
179
178
|
log.info(f"Resolving {len(indeterminate_nodes)} indeterminate nodes: {indeterminate_nodes_string}")
|
|
@@ -116,20 +116,20 @@ NON_TREE_REFERENCES = [
|
|
|
116
116
|
|
|
117
117
|
# Some PdfObjects can't be properly placed in the tree until the entire tree is parsed
|
|
118
118
|
INDETERMINATE_REF_KEYS = [
|
|
119
|
-
ANNOTS,
|
|
119
|
+
ANNOTS, # At least when it appears in a page
|
|
120
120
|
COLOR_SPACE,
|
|
121
121
|
D,
|
|
122
122
|
DEST,
|
|
123
123
|
EXT_G_STATE,
|
|
124
|
-
FIELDS,
|
|
124
|
+
FIELDS, # At least for /AcroForm
|
|
125
125
|
FIRST,
|
|
126
126
|
FONT,
|
|
127
127
|
NAMES,
|
|
128
128
|
OPEN_ACTION,
|
|
129
|
-
P,
|
|
129
|
+
P, # At least for widgets...
|
|
130
130
|
RESOURCES,
|
|
131
131
|
XOBJECT,
|
|
132
|
-
UNLABELED,
|
|
132
|
+
UNLABELED, # TODO: this might be wrong? maybe this is where the /Resources actually live?
|
|
133
133
|
]
|
|
134
134
|
|
|
135
135
|
INDETERMINATE_PREFIXES = [p for p in INDETERMINATE_REF_KEYS if len(p) > 2]
|
|
@@ -92,9 +92,9 @@ select.add_argument('-c', '--counts', action='store_true',
|
|
|
92
92
|
help='show counts of some of the properties of the objects in the PDF')
|
|
93
93
|
|
|
94
94
|
select.add_argument('-s', '--streams',
|
|
95
|
-
help="scan all the PDF's decoded/decrypted streams for sus content as well as any YARA rule matches. " +
|
|
96
|
-
"brute force is involved; output is verbose. a single OBJ_ID can be optionally provided to " +
|
|
97
|
-
"limit the output to a single internal object. try '-s -- [OTHERARGS]' if you run into an " +
|
|
95
|
+
help="scan all the PDF's decoded/decrypted streams for sus content as well as any YARA rule matches. " +
|
|
96
|
+
"brute force is involved; output is verbose. a single OBJ_ID can be optionally provided to " +
|
|
97
|
+
"limit the output to a single internal object. try '-s -- [OTHERARGS]' if you run into an " +
|
|
98
98
|
"argument position related piccadilly.",
|
|
99
99
|
nargs='?',
|
|
100
100
|
const=ALL_STREAMS,
|
|
@@ -102,7 +102,7 @@ select.add_argument('-s', '--streams',
|
|
|
102
102
|
type=int)
|
|
103
103
|
|
|
104
104
|
select.add_argument('--extract-quoted',
|
|
105
|
-
help="extract and force decode all bytes found between this kind of quotation marks " +
|
|
105
|
+
help="extract and force decode all bytes found between this kind of quotation marks " +
|
|
106
106
|
"(requires --streams. can be specified more than once)",
|
|
107
107
|
choices=list(QUOTE_PATTERNS.keys()),
|
|
108
108
|
dest='extract_quoteds',
|
|
@@ -147,7 +147,7 @@ def parse_arguments():
|
|
|
147
147
|
args.output_dir = args.output_dir or getcwd()
|
|
148
148
|
file_prefix = (args.file_prefix + '__') if args.file_prefix else ''
|
|
149
149
|
args.file_suffix = ('_' + args.file_suffix) if args.file_suffix else ''
|
|
150
|
-
args.output_basename =
|
|
150
|
+
args.output_basename = f"{file_prefix}{path.basename(args.file_to_scan_path)}"
|
|
151
151
|
elif args.output_dir:
|
|
152
152
|
log.warning('--output-dir provided but no export option was chosen')
|
|
153
153
|
|
|
@@ -203,8 +203,8 @@ MAX_QUALITY = 10
|
|
|
203
203
|
|
|
204
204
|
combine_pdfs_parser = ArgumentParser(
|
|
205
205
|
description="Combine multiple PDFs into one.",
|
|
206
|
-
epilog="If all PDFs end in a number (e.g. 'xyz_1.pdf', 'xyz_2.pdf', etc. sort the files as if those were"
|
|
207
|
-
" page
|
|
206
|
+
epilog="If all PDFs end in a number (e.g. 'xyz_1.pdf', 'xyz_2.pdf', etc. sort the files as if those were" +
|
|
207
|
+
" page numbers prior to merging.",
|
|
208
208
|
formatter_class=RichHelpFormatterPlus)
|
|
209
209
|
|
|
210
210
|
combine_pdfs_parser.add_argument('pdfs',
|
|
@@ -1,13 +1,35 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "pdfalyzer"
|
|
3
|
-
version = "1.16.
|
|
4
|
-
description = "
|
|
3
|
+
version = "1.16.12"
|
|
4
|
+
description = "PDF analysis tool. Scan a PDF with YARA rules, visualize its inner tree-like data structure in living color (lots of colors), force decodes of suspicious font binaries, and more."
|
|
5
5
|
authors = ["Michel de Cryptadamus <michel@cryptadamus.com>"]
|
|
6
6
|
license = "GPL-3.0-or-later"
|
|
7
7
|
readme = "README.md"
|
|
8
|
+
documentation = "https://github.com/michelcrypt4d4mus/pdfalyzer"
|
|
8
9
|
homepage = "https://github.com/michelcrypt4d4mus/pdfalyzer"
|
|
9
10
|
repository = "https://github.com/michelcrypt4d4mus/pdfalyzer"
|
|
10
|
-
|
|
11
|
+
|
|
12
|
+
classifiers = [
|
|
13
|
+
"Development Status :: 5 - Production/Stable",
|
|
14
|
+
"Environment :: Console",
|
|
15
|
+
"Intended Audience :: Information Technology",
|
|
16
|
+
"License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
|
|
17
|
+
"Programming Language :: Python",
|
|
18
|
+
"Programming Language :: Python :: 3.9",
|
|
19
|
+
"Programming Language :: Python :: 3.10",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Programming Language :: Python :: 3.13",
|
|
23
|
+
"Topic :: Artistic Software",
|
|
24
|
+
"Topic :: Security",
|
|
25
|
+
"Topic :: Scientific/Engineering :: Visualization",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
include = [
|
|
29
|
+
"CHANGELOG.md",
|
|
30
|
+
"LICENSE",
|
|
31
|
+
".pdfalyzer.example"
|
|
32
|
+
]
|
|
11
33
|
|
|
12
34
|
keywords = [
|
|
13
35
|
"ascii art",
|
|
@@ -25,65 +47,63 @@ keywords = [
|
|
|
25
47
|
"pdf",
|
|
26
48
|
"pdfs",
|
|
27
49
|
"pdf analysis",
|
|
50
|
+
"pypdf",
|
|
28
51
|
"threat assessment",
|
|
52
|
+
"threat hunting",
|
|
53
|
+
"threat intelligence",
|
|
54
|
+
"threat research",
|
|
55
|
+
"threatintel",
|
|
29
56
|
"visualization",
|
|
30
57
|
"yara"
|
|
31
58
|
]
|
|
32
59
|
|
|
33
|
-
classifiers = [
|
|
34
|
-
"Development Status :: 5 - Production/Stable",
|
|
35
|
-
"Environment :: Console",
|
|
36
|
-
"Intended Audience :: Information Technology",
|
|
37
|
-
"License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)",
|
|
38
|
-
"Programming Language :: Python",
|
|
39
|
-
"Topic :: Artistic Software",
|
|
40
|
-
"Topic :: Security",
|
|
41
|
-
"Topic :: Scientific/Engineering :: Visualization",
|
|
42
|
-
]
|
|
43
|
-
|
|
44
|
-
include = [
|
|
45
|
-
"CHANGELOG.md",
|
|
46
|
-
"LICENSE",
|
|
47
|
-
".pdfalyzer.example"
|
|
48
|
-
]
|
|
49
|
-
|
|
50
60
|
packages = [
|
|
51
61
|
{ include = "pdfalyzer" }
|
|
52
62
|
]
|
|
53
63
|
|
|
54
64
|
|
|
55
|
-
|
|
65
|
+
#####################
|
|
66
|
+
# Dependencies #
|
|
67
|
+
#####################
|
|
56
68
|
[tool.poetry.dependencies]
|
|
57
|
-
python = "^3.9.2"
|
|
69
|
+
python = "^3.9,>=3.9.2"
|
|
58
70
|
anytree = "~=2.13"
|
|
59
|
-
pypdf = "^
|
|
71
|
+
pypdf = "^6.0.0"
|
|
60
72
|
yaralyzer = "^1.0.4"
|
|
61
73
|
|
|
62
|
-
# Dev dependencies
|
|
63
74
|
[tool.poetry.group.dev.dependencies]
|
|
75
|
+
flake8 = "^7.3.0"
|
|
64
76
|
pytest = "^7.1.2"
|
|
65
77
|
pytest-skip-slow = "^0.0.3"
|
|
66
78
|
|
|
67
79
|
|
|
68
|
-
|
|
80
|
+
#############
|
|
81
|
+
# Scripts #
|
|
82
|
+
#############
|
|
69
83
|
[tool.poetry.scripts]
|
|
70
84
|
combine_pdfs = 'pdfalyzer:combine_pdfs'
|
|
71
85
|
pdfalyze = 'pdfalyzer:pdfalyze'
|
|
72
86
|
pdfalyzer_show_color_theme = 'pdfalyzer:pdfalyzer_show_color_theme'
|
|
73
87
|
|
|
74
88
|
|
|
75
|
-
|
|
89
|
+
#####################
|
|
90
|
+
# PyPi URLs #
|
|
91
|
+
#####################
|
|
76
92
|
[tool.poetry.urls]
|
|
77
93
|
Changelog = "https://github.com/michelcrypt4d4mus/pdfalyzer/blob/master/CHANGELOG.md"
|
|
78
94
|
|
|
79
95
|
|
|
80
|
-
|
|
96
|
+
###############################
|
|
97
|
+
# Poetry build system #
|
|
98
|
+
###############################
|
|
81
99
|
[build-system]
|
|
82
|
-
requires = ["poetry-core>=1.0.0"]
|
|
83
100
|
build-backend = "poetry.core.masonry.api"
|
|
101
|
+
requires = ["poetry-core>=1.0.0"]
|
|
84
102
|
|
|
85
103
|
|
|
86
|
-
|
|
104
|
+
##################
|
|
105
|
+
# pytest #
|
|
106
|
+
##################
|
|
87
107
|
[tool.pytest.ini_options]
|
|
88
108
|
addopts = [
|
|
89
109
|
"--import-mode=importlib",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{pdfalyzer-1.16.10 → pdfalyzer-1.16.12}/pdfalyzer/yara_rules/lprat.static_file_analysis.yara
RENAMED
|
File without changes
|
|
File without changes
|