pdfalyzer 1.16.9__py3-none-any.whl → 1.16.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pdfalyzer might be problematic. Click here for more details.

.pdfalyzer.example ADDED
@@ -0,0 +1,66 @@
1
+ # If you place a filed called '.pdfalyzer' in your home dir or the current dir environment variables specified
2
+ # in that .pdfalyzer file will be added to the environment each time pdfalyzer is invoked. (See the `dotenv`
3
+ # package for more details.) This file contains environment variables you can place in .pdfalyzer to configure
4
+ # the application above and beyond providing command line options. Useful if you want to permanently
5
+ # configure options you tend to reuse (e.g. '--maximize-width') so you can stop remembering to type them.
6
+ #
7
+ # Almost all of the yaralyzer (yes, you read that right - The Pdfalyzer uses The Yaralyzer for all
8
+ # kinds of backend functionality) command line options can be configured in this file by capitalizing them and
9
+ # prefixing 'YARALYZER'. e.g. to configure the --maximize-width option for every invocation, you would set:
10
+ # YARALYZER_MAXIMIZE_WIDTH=True
11
+ #
12
+ # Note that many of these options are actually configuring the yaralyzer, which is a separate tool leveraged
13
+ # by the Pdfalyzer to actually do the work of finding patterns. More info can be found at
14
+ # https://github.com/michelcrypt4d4mus/yaralyzer
15
+
16
+
17
+
18
+ # Expand the width of the output to the fit the display window (same as the --maximize-width options)
19
+ # YARALYZER_MAXIMIZE_WIDTH=True
20
+
21
+ # yara-python internal options passed through to yara.set_config() as the stack_size and max_match_data arguments
22
+ # YARALYZER_STACK_SIZE=10485760
23
+ # YARALYZER_MAX_MATCH_LENGTH=10737418240
24
+
25
+ # Suppress all PDF binary regex matching/scanning/etc
26
+ # YARALYZER_SUPPRESS_DECODES_TABLE=False
27
+
28
+ # Suppress the display of the table showing the the encoding assessments given by `chardet.detect()`
29
+ # about a particular chunk of binary data. (The most important data in the chardet confidence table is
30
+ # redunandant anyways. Only the low likelihood encodings are hidden from the usef)
31
+ # YARALYZER_SUPPRESS_CHARDET_TABLE=False
32
+ # Minimum confidence to display an encoding in the chardet results table
33
+ # YARALYZER_MIN_CHARDET_CONFIDENCE=2.0
34
+
35
+ # Configure how many bytes before and after any binary data should be included in scans and visualizations
36
+ # YARALYZER_SURROUNDING_BYTES=64
37
+
38
+ # Size thresholds (in bytes) under/over which pdfalyzer will NOT make attempts to decode a match.
39
+ # Longer byte sequences are for obvious reasons slower to decode by force.
40
+ # It may feel counterintuitive but larger chunks of random binary are also harder to examine and
41
+ # (in my experience) less likely to be maningful. Consider it - two frontslash characters 20,000 lines apart
42
+ # are more likely to be random than those same frontslashes when placed nearer to each other and
43
+ # in the vicinity of lot of computerized sigils of internet power like `.', `+bacd*?`,. and other regexes.*
44
+ # Keeping the max value number low will do more to affect the speed of the app than ay anything else you
45
+ # can easily configure..
46
+ #
47
+ # YARALYZER_MIN_DECODE_LENGTH=1
48
+ # YARALYZER_MAX_DECODE_LENGTH=256
49
+
50
+ # Directory to write application logs to. Must be an absolute path, not a relative one.
51
+ # These logs are not normally written to a file and the default log level means that the standard behavior
52
+ # is to more or less discard them. Be aware that if you configure this variable a few things will change:
53
+ #
54
+ # 1. Logs WILL NOT be written to STDOUT. They will stream ONLY to files in the configured directory.
55
+ # This is true even with the -D option.
56
+ # 2. The default log_level will be decreased from WARN (extremely spartan) to INFO (fairly verbose).
57
+ # The -D option, which sets the log level to DEBUG, will be respected whether or not
58
+ # YARALYZER_LOG_DIR is configured.
59
+ #
60
+ # YARALYZER_LOG_DIR=/path/to/pdfalyzer/log_dir/
61
+
62
+ # Log level
63
+ # YARALYZER_LOG_LEVEL='INFO'
64
+
65
+ # Path to directory containing Didier Stevens's pdf-parser.py. Only required for extracting binary streams to files.
66
+ # PDFALYZER_PDF_PARSER_PY_PATH=/path/to/pdfparserdotpy/
CHANGELOG.md CHANGED
@@ -1,5 +1,14 @@
1
1
  # NEXT RELEASE
2
2
 
3
+ ### 1.16.11
4
+ * Fix typo in `combine_pdfs` help
5
+ * Add some more PyPi classifiers
6
+ * Add a `.flake8` config and fix a bunch of style issues
7
+
8
+ ### 1.16.10
9
+ * Add `Environment :: Console` and `Programming Language :: Python` to pypi classifiers
10
+ * Add `.pdfalyzer.example` to PyPi package
11
+
3
12
  ### 1.16.9
4
13
  * Add `Development Status :: 5 - Production/Stable` to pypi classifiers
5
14
 
pdfalyzer/__init__.py CHANGED
@@ -1,7 +1,6 @@
1
1
  import code
2
2
  import sys
3
3
  from os import environ, getcwd, path
4
- from pathlib import Path
5
4
 
6
5
  from dotenv import load_dotenv
7
6
  from pypdf import PdfWriter
@@ -23,7 +22,7 @@ from rich.text import Text
23
22
  from yaralyzer.helpers.rich_text_helper import prefix_with_plain_text_obj
24
23
  from yaralyzer.output.file_export import invoke_rich_export
25
24
  from yaralyzer.output.rich_console import console
26
- from yaralyzer.util.logging import log, log_and_print
25
+ from yaralyzer.util.logging import log_and_print
27
26
 
28
27
  from pdfalyzer.helpers.filesystem_helper import file_size_in_mb, set_max_open_files
29
28
  from pdfalyzer.helpers.rich_text_helper import print_highlighted
@@ -51,8 +50,8 @@ def pdfalyze():
51
50
  log_and_print(f"Binary stream extraction complete, files written to '{args.output_dir}'.\nExiting.\n")
52
51
  sys.exit()
53
52
 
54
- # The method that gets called is related to the argument name. See 'possible_output_sections' list in argument_parser.py
55
- # Analysis exports wrap themselves around the methods that actually generate the analyses
53
+ # The method that gets called is related to the argument name. See 'possible_output_sections' list in
54
+ # argument_parser.py. Analysis exports wrap themselves around the methods that actually generate the analyses.
56
55
  for (arg, method) in output_sections(args, pdfalyzer):
57
56
  if args.output_dir:
58
57
  output_basepath = PdfalyzerConfig.get_output_basepath(method)
@@ -89,7 +88,7 @@ def pdfalyzer_show_color_theme() -> None:
89
88
  if name not in ['reset', 'repr_url']
90
89
  ]
91
90
 
92
- console.print(Columns(colors, column_first=True, padding=(0,3)))
91
+ console.print(Columns(colors, column_first=True, padding=(0, 3)))
93
92
 
94
93
 
95
94
  def combine_pdfs():
@@ -114,7 +113,11 @@ def combine_pdfs():
114
113
  for i, page in enumerate(merger.pages):
115
114
  if args.image_quality < MAX_QUALITY:
116
115
  for j, img in enumerate(page.images):
117
- print_highlighted(f" -> Reducing image #{j + 1} quality on page {i + 1} to {args.image_quality}...", style='dim')
116
+ print_highlighted(
117
+ f" -> Reducing image #{j + 1} quality on page {i + 1} to {args.image_quality}...",
118
+ style='dim'
119
+ )
120
+
118
121
  img.replace(img.image, quality=args.image_quality)
119
122
 
120
123
  print_highlighted(f" -> Compressing page {i + 1}...", style='dim')
@@ -12,8 +12,8 @@ from yaralyzer.decoding.bytes_decoder import BytesDecoder
12
12
  from yaralyzer.encoding_detection.character_encodings import BOMS
13
13
  from yaralyzer.helpers.bytes_helper import hex_string, print_bytes
14
14
  from yaralyzer.helpers.string_helper import escape_yara_pattern
15
- from yaralyzer.output.rich_console import BYTES_NO_DIM, console, console_width
16
15
  from yaralyzer.output.regex_match_metrics import RegexMatchMetrics
16
+ from yaralyzer.output.rich_console import BYTES_NO_DIM, console, console_width
17
17
  from yaralyzer.yara.yara_rule_builder import HEX, REGEX, safe_label
18
18
  from yaralyzer.yaralyzer import Yaralyzer
19
19
  from yaralyzer.util.logging import log
@@ -21,7 +21,7 @@ from yaralyzer.util.logging import log
21
21
  from pdfalyzer.config import PdfalyzerConfig
22
22
  from pdfalyzer.decorators.pdf_tree_node import PdfTreeNode
23
23
  from pdfalyzer.detection.constants.binary_regexes import (BACKTICK, DANGEROUS_PDF_KEYS_TO_HUNT_ONLY_IN_FONTS,
24
- DANGEROUS_PDF_KEYS_TO_HUNT_ONLY_IN_FONTS, DANGEROUS_STRINGS, FRONTSLASH, GUILLEMET, QUOTE_PATTERNS)
24
+ DANGEROUS_STRINGS, FRONTSLASH, GUILLEMET, QUOTE_PATTERNS)
25
25
  from pdfalyzer.helpers.string_helper import generate_hyphen_line
26
26
  from pdfalyzer.output.layout import print_headline_panel, print_section_sub_subheader
27
27
  from pdfalyzer.util.adobe_strings import CONTENTS, CURRENTFILE_EEXEC, FONT_FILE_KEYS
@@ -36,7 +36,7 @@ class BinaryScanner:
36
36
  self.stream_length = len(_bytes)
37
37
 
38
38
  if label is None and isinstance(owner, PdfTreeNode):
39
- self.label = owner.__rich__()
39
+ self.label = owner.__rich__()
40
40
 
41
41
  self.suppression_notice_queue = []
42
42
  self.regex_extraction_stats = defaultdict(lambda: RegexMatchMetrics())
@@ -86,8 +86,12 @@ class BinaryScanner:
86
86
  print_headline_panel(msg, style='dim')
87
87
  continue
88
88
 
89
+ print_section_sub_subheader(
90
+ f"Forcing Decode of {quote_type.capitalize()} Quoted Strings",
91
+ style=BYTES_NO_DIM
92
+ )
93
+
89
94
  quote_pattern = QUOTE_PATTERNS[quote_type]
90
- print_section_sub_subheader(f"Forcing Decode of {quote_type.capitalize()} Quoted Strings", style=BYTES_NO_DIM)
91
95
  yaralyzer = self._quote_yaralyzer(quote_pattern, quote_type)
92
96
  self.process_yara_matches(yaralyzer, f"{quote_type}_quoted")
93
97
 
@@ -135,7 +139,7 @@ class BinaryScanner:
135
139
  def process_yara_matches(self, yaralyzer: Yaralyzer, pattern: str, force: bool = False) -> None:
136
140
  """Decide whether to attempt to decode the matched bytes, track stats. force param ignores min/max length."""
137
141
  for bytes_match, decoder in yaralyzer.match_iterator():
138
- log.debug(f"Trackings stats for match: {pattern}, bytes_match: {bytes_match}, is_decodable: {bytes_match.is_decodable()}")
142
+ log.debug(f"Trackings match stats for {pattern}, bytes_match: {bytes_match}, is_decodable: {bytes_match.is_decodable()}") # noqa: E501
139
143
 
140
144
  # Send suppressed decodes to a queue and track the reason for the suppression in the stats
141
145
  if not (bytes_match.is_decodable() or force):
@@ -145,7 +149,7 @@ class BinaryScanner:
145
149
  # Print out any queued suppressed notices before printing non suppressed matches
146
150
  self._print_suppression_notices()
147
151
  console.print(decoder)
148
- self.regex_extraction_stats[pattern].tally_match(decoder) # TODO: This call must come after print(decoder)
152
+ self.regex_extraction_stats[pattern].tally_match(decoder) # TODO: This call must come after print(decoder)
149
153
 
150
154
  self._print_suppression_notices()
151
155
 
@@ -167,12 +171,12 @@ class BinaryScanner:
167
171
  return self._pattern_yaralyzer(quote_pattern, REGEX, label, label)
168
172
 
169
173
  def _pattern_yaralyzer(
170
- self,
171
- pattern: str,
172
- pattern_type: str,
173
- rules_label: Optional[str] = None,
174
- pattern_label: Optional[str] = None
175
- ) -> Yaralyzer:
174
+ self,
175
+ pattern: str,
176
+ pattern_type: str,
177
+ rules_label: Optional[str] = None,
178
+ pattern_label: Optional[str] = None
179
+ ) -> Yaralyzer:
176
180
  """Build a yaralyzer to scan self.bytes"""
177
181
  return Yaralyzer.for_patterns(
178
182
  patterns=[escape_yara_pattern(pattern)],
pdfalyzer/config.py CHANGED
@@ -1,3 +1,7 @@
1
+ """
2
+ PdfalyzerConfig object holds the unification of configuration options parsed from the command line
3
+ as well as those set by environment variables and/or a .pdfalyzer file.
4
+ """
1
5
  import importlib.resources
2
6
  from argparse import Namespace
3
7
  from os import environ, pardir, path
@@ -16,13 +16,14 @@ from pdfalyzer.util.adobe_strings import *
16
16
 
17
17
  class PdfObjectProperties:
18
18
  """Simple class to extract critical features of a PdfObject."""
19
+
19
20
  def __init__(
20
- self,
21
- pdf_object: PdfObject,
22
- address: str,
23
- idnum: int,
24
- indirect_object: Optional[IndirectObject] = None
25
- ):
21
+ self,
22
+ pdf_object: PdfObject,
23
+ address: str,
24
+ idnum: int,
25
+ indirect_object: Optional[IndirectObject] = None
26
+ ):
26
27
  self.idnum = idnum
27
28
  self.obj = pdf_object
28
29
  self.indirect_object = indirect_object
@@ -57,7 +58,7 @@ class PdfObjectProperties:
57
58
  else:
58
59
  self.first_address = address
59
60
 
60
- log.debug(f"Node ID: {self.idnum}, type: {self.type}, subtype: {self.sub_type}, " + \
61
+ log.debug(f"Node ID: {self.idnum}, type: {self.type}, subtype: {self.sub_type}, " +
61
62
  f"label: {self.label}, first_address: {self.first_address}")
62
63
 
63
64
  @classmethod
@@ -80,11 +81,11 @@ class PdfObjectProperties:
80
81
 
81
82
  @classmethod
82
83
  def to_table_row(
83
- cls,
84
- reference_key: str,
85
- obj: PdfObject,
86
- is_single_row_table: bool = False
87
- ) -> List[Union[Text, str]]:
84
+ cls,
85
+ reference_key: str,
86
+ obj: PdfObject,
87
+ is_single_row_table: bool = False
88
+ ) -> List[Union[Text, str]]:
88
89
  """PDF object property at reference_key becomes a formatted 3-tuple for use in Rich tables."""
89
90
  with_resolved_refs = cls.resolve_references(reference_key, obj)
90
91
 
@@ -6,7 +6,7 @@ Child/parent relationships should be set using the add_child() and set_parent()
6
6
  methods and not set directly. (TODO: this could be done better with anytree
7
7
  hooks)
8
8
  """
9
- from typing import Callable, List, Optional, Set
9
+ from typing import Callable, List, Optional
10
10
 
11
11
  from anytree import NodeMixin, SymlinkNode
12
12
  from pypdf.errors import PdfReadError
@@ -163,11 +163,14 @@ class PdfTreeNode(NodeMixin, PdfObjectProperties):
163
163
  return None
164
164
  else:
165
165
  address = refs_to_this_node[0].address
166
+
166
167
  # If other node's label doesn't start with a NON_STANDARD_ADDRESS string
167
- # and any of the relationships pointing at this node use something other than a
168
- # NON_STANDARD_ADDRESS_NODES string to refer here, print a warning about multiple refs.
169
- if not (is_prefixed_by_any(from_node.label, NON_STANDARD_ADDRESS_NODES) or \
170
- all(ref.address in NON_STANDARD_ADDRESS_NODES for ref in refs_to_this_node)):
168
+ # AND any of the relationships pointing at this node use something other than a
169
+ # NON_STANDARD_ADDRESS_NODES string to refer here,
170
+ # then print a warning about multiple refs.
171
+ if not (is_prefixed_by_any(from_node.label, NON_STANDARD_ADDRESS_NODES)
172
+ or
173
+ all(ref.address in NON_STANDARD_ADDRESS_NODES for ref in refs_to_this_node)):
171
174
  refs_to_this_node_str = "\n ".join([f"{i + 1}. {r}" for i, r in enumerate(refs_to_this_node)])
172
175
  msg = f"Multiple refs from {from_node} to {self}:\n {refs_to_this_node_str}"
173
176
  log.warning(msg + f"\nCommon address of refs: {address}")
@@ -37,7 +37,10 @@ class PdfTreeVerifier:
37
37
  log.warning(f"Methodd doesn't check revisions but this doc is generation {self.pdfalyzer.max_generation}")
38
38
 
39
39
  # We expect to see all ordinals up to the number of nodes /Trailer claims exist as obj. IDs.
40
- missing_node_ids = [i for i in range(1, self.pdfalyzer.pdf_size) if self.pdfalyzer.find_node_by_idnum(i) is None]
40
+ missing_node_ids = [
41
+ i for i in range(1, self.pdfalyzer.pdf_size)
42
+ if self.pdfalyzer.find_node_by_idnum(i) is None
43
+ ]
41
44
 
42
45
  for idnum in missing_node_ids:
43
46
  ref = IndirectObject(idnum, self.pdfalyzer.max_generation, self.pdfalyzer.pdf_reader)
@@ -57,13 +60,13 @@ class PdfTreeVerifier:
57
60
  log.error(f"Cannot find ref {ref} in PDF!")
58
61
  continue
59
62
  elif isinstance(obj, (NumberObject, NameObject)):
60
- log.info(f"Obj {idnum} is a {type(obj)} w/value {obj}; if relationshipd by /Length etc. this is a nonissue but maybe worth doublechecking")
63
+ log.info(f"Obj {idnum} is a {type(obj)} w/value {obj}; if relationshipd by /Length etc. this is a nonissue but maybe worth doublechecking") # noqa: E501
61
64
  continue
62
65
  elif not isinstance(obj, dict):
63
- log.error(f"Obj {idnum} ({obj}) of type {type(obj)} isn't dict, cannot determine if it should be in tree")
66
+ log.error(f"Obj {idnum} ({obj}) of type {type(obj)} isn't dict, cannot determine if it should be in tree") # noqa: E501
64
67
  continue
65
68
  elif TYPE not in obj:
66
- msg = f"Obj {idnum} has no {TYPE} and is not in tree. Either a loose node w/no data or an error in pdfalyzer."
69
+ msg = f"Obj {idnum} has no {TYPE} and is not in tree. Either a loose node w/no data or an error in pdfalyzer." # noqa: E501
67
70
  msg += f"\nHere's the contents for you to assess:\n{obj}"
68
71
  log.warning(msg)
69
72
  continue
@@ -36,13 +36,13 @@ PARENTHESES = 'parentheses'
36
36
 
37
37
  QUOTE_PATTERNS = {
38
38
  BACKTICK: '`.+`',
39
- BRACKET: '\\[.+\\]', # { 91 [-] 93 }
40
- CURLY_BRACKET: '{.+}', # { 123 [-] 125 }
41
- DOUBLE_LESS_THAN: '<<.+>>', # Hex { 60 60 [-] 62 62 }
39
+ BRACKET: '\\[.+\\]', # { 91 [-] 93 }
40
+ CURLY_BRACKET: '{.+}', # { 123 [-] 125 }
41
+ DOUBLE_LESS_THAN: '<<.+>>', # Hex { 60 60 [-] 62 62 }
42
42
  ESCAPED_SINGLE: "\\'.+\\'",
43
43
  ESCAPED_DOUBLE: '\\".+\\"',
44
- FRONTSLASH: '/.+/', # { 47 [-] 47 }
45
- GUILLEMET: 'AB [-] BB', # Guillemet quotes are not ANSI so require byte pattern
46
- LESS_THAN: '<.+>', # Hex { 60 [-] 62 }
47
- PARENTHESES: '\\(.+\\)', # Hex { 28 [-] 29 }
44
+ FRONTSLASH: '/.+/', # { 47 [-] 47 }
45
+ GUILLEMET: 'AB [-] BB', # Guillemet quotes are not ANSI so require byte pattern
46
+ LESS_THAN: '<.+>', # Hex { 60 [-] 62 }
47
+ PARENTHESES: '\\(.+\\)', # Hex { 28 [-] 29 }
48
48
  }
@@ -8,8 +8,6 @@ from typing import Optional, Union
8
8
  from yaralyzer.config import YaralyzerConfig
9
9
  from yaralyzer.yaralyzer import Yaralyzer
10
10
 
11
- from pdfalyzer.config import PdfalyzerConfig
12
-
13
11
  YARA_RULES_DIR = files('pdfalyzer').joinpath('yara_rules')
14
12
 
15
13
  YARA_RULES_FILES = [
@@ -38,7 +36,7 @@ def _build_yaralyzer(scannable: Union[bytes, str], label: Optional[str] = None)
38
36
  with as_file(YARA_RULES_DIR.joinpath(YARA_RULES_FILES[2])) as yara2:
39
37
  with as_file(YARA_RULES_DIR.joinpath(YARA_RULES_FILES[3])) as yara3:
40
38
  with as_file(YARA_RULES_DIR.joinpath(YARA_RULES_FILES[4])) as yara4:
41
- # If there is a custom yara_rules argument file use that instead of the files in the yara_rules/ dir
39
+ # If there is a custom yara_rules arg, use that instead of the files in the yara_rules/ dir
42
40
  rules_paths = YaralyzerConfig.args.yara_rules_files or []
43
41
 
44
42
  if not YaralyzerConfig.args.no_default_yara_rules:
pdfalyzer/font_info.py CHANGED
@@ -2,7 +2,6 @@
2
2
  Unify font information spread across a bunch of PdfObjects (Font, FontDescriptor,
3
3
  and FontFile) into a single class.
4
4
  """
5
-
6
5
  from pypdf._cmap import build_char_map, prepare_cm
7
6
  from pypdf.generic import IndirectObject, PdfObject
8
7
  from rich.text import Text
@@ -11,9 +10,9 @@ from yaralyzer.util.logging import log
11
10
 
12
11
  from pdfalyzer.binary.binary_scanner import BinaryScanner
13
12
  from pdfalyzer.output.character_mapping import print_character_mapping, print_prepared_charmap
14
- from pdfalyzer.output.tables.font_summary_table import font_summary_table
15
13
  from pdfalyzer.output.layout import print_section_subheader
16
14
  from pdfalyzer.output.styles.node_colors import get_label_style
15
+ from pdfalyzer.output.tables.font_summary_table import font_summary_table
17
16
  from pdfalyzer.util.adobe_strings import (FONT, FONT_DESCRIPTOR, FONT_FILE, FONT_LENGTHS, RESOURCES,
18
17
  SUBTYPE, TO_UNICODE, TYPE, W, WIDTHS)
19
18
 
@@ -143,19 +142,19 @@ class FontInfo:
143
142
  console.line()
144
143
 
145
144
  # TODO: currently unused
146
- def preview_bytes_at_advertised_lengths(self):
147
- """Show the bytes at the boundaries provided by /Length1, /Length2, and /Length3, if they exist"""
148
- lengths = self.lengths or []
145
+ # def preview_bytes_at_advertised_lengths(self):
146
+ # """Show the bytes at the boundaries provided by /Length1, /Length2, and /Length3, if they exist"""
147
+ # lengths = self.lengths or []
149
148
 
150
- if self.lengths is None or len(lengths) <= 1:
151
- console.print("No length demarcations to preview.", style='grey.dark')
149
+ # if self.lengths is None or len(lengths) <= 1:
150
+ # console.print("No length demarcations to preview.", style='grey.dark')
152
151
 
153
- for i, demarcation in enumerate(lengths[1:]):
154
- console.print(f"{self.font_file} at /Length{i} ({demarcation}):")
155
- print(f"\n Stream before: {self.stream_data[demarcation - FONT_SECTION_PREVIEW_LEN:demarcation + 1]}")
156
- print(f"\n Stream after: {self.stream_data[demarcation:demarcation + FONT_SECTION_PREVIEW_LEN]}")
152
+ # for i, demarcation in enumerate(lengths[1:]):
153
+ # console.print(f"{self.font_file} at /Length{i} ({demarcation}):")
154
+ # print(f"\n Stream before: {self.stream_data[demarcation - FONT_SECTION_PREVIEW_LEN:demarcation + 1]}")
155
+ # print(f"\n Stream after: {self.stream_data[demarcation:demarcation + FONT_SECTION_PREVIEW_LEN]}")
157
156
 
158
- print(f"\nfinal bytes back from {self.stream_data.lengths[2]} + 10: {self.stream_data[-10 - -f.lengths[2]:]}")
157
+ # print(f"\nfinal bytes back from {self.stream_data.lengths[2]} + 10: {self.stream_data[-10 - -f.lengths[2]:]}")
159
158
 
160
159
  def __str__(self) -> str:
161
160
  return self.display_title
@@ -15,7 +15,7 @@ OPEN_FILES_BUFFER = 30 # we might have some files open already so we need
15
15
  PDF_EXT = '.pdf'
16
16
 
17
17
  # TODO: this kind of type alias is not supported until Python 3.12
18
- #type StrOrPath = Union[str, Path]
18
+ # type StrOrPath = Union[str, Path]
19
19
 
20
20
 
21
21
  def with_pdf_extension(file_path: Union[str, Path]) -> str:
@@ -92,11 +92,11 @@ def set_max_open_files(num_filehandles: int = DEFAULT_MAX_OPEN_FILES) -> tuple[O
92
92
  resource.setrlimit(resource.RLIMIT_NOFILE, (soft, hard))
93
93
  except (ValueError, resource.error):
94
94
  try:
95
- hard = soft
96
- print_highlighted(f"Retrying setting max open files (soft, hard)=({soft}, {hard})", style='yellow')
97
- resource.setrlimit(resource.RLIMIT_NOFILE, (soft, hard))
95
+ hard = soft
96
+ print_highlighted(f"Retrying setting max open files (soft, hard)=({soft}, {hard})", style='yellow')
97
+ resource.setrlimit(resource.RLIMIT_NOFILE, (soft, hard))
98
98
  except Exception:
99
- print_highlighted('Failed to set max open files / ulimit, giving up!', style='error')
100
- soft,hard = resource.getrlimit(resource.RLIMIT_NOFILE)
99
+ print_highlighted('Failed to set max open files / ulimit, giving up!', style='error')
100
+ soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
101
101
 
102
102
  return (soft, hard)
@@ -6,7 +6,6 @@ from typing import List, Optional
6
6
  from pypdf.generic import IndirectObject, PdfObject
7
7
 
8
8
  from pdfalyzer.pdf_object_relationship import PdfObjectRelationship
9
- from pdfalyzer.util.adobe_strings import *
10
9
 
11
10
 
12
11
  def pdf_object_id(pdf_object) -> Optional[int]:
@@ -20,6 +19,7 @@ def does_list_have_any_references(_list) -> bool:
20
19
 
21
20
 
22
21
  def _sort_pdf_object_refs(refs: List[PdfObjectRelationship]) -> List[PdfObjectRelationship]:
22
+ """Sort a list of PdfObjectRelationship objects by their to_obj's idnum. Only used by pytest."""
23
23
  return sorted(refs, key=lambda ref: ref.to_obj.idnum)
24
24
 
25
25
 
@@ -1,13 +1,11 @@
1
1
  """
2
- Functions for miscellaneous Rich text/string operations.
2
+ Functions for miscellaneous Rich text/string pretty printing operations.
3
3
  """
4
4
  from typing import List, Union
5
5
 
6
6
  from pypdf.generic import PdfObject
7
7
  from rich.console import Console
8
- from rich.highlighter import RegexHighlighter, JSONHighlighter
9
8
  from rich.text import Text
10
- from yaralyzer.output.rich_console import console
11
9
 
12
10
  from pdfalyzer.helpers.pdf_object_helper import pypdf_class_name
13
11
  from pdfalyzer.output.styles.node_colors import get_label_style, get_class_style_italic
@@ -22,11 +20,11 @@ def print_highlighted(msg: Union[str, Text], **kwargs) -> None:
22
20
 
23
21
 
24
22
  def quoted_text(
25
- _string: str,
26
- style: str = '',
27
- quote_char_style: str = 'white',
28
- quote_char: str = "'"
29
- ) -> Text:
23
+ _string: str,
24
+ style: str = '',
25
+ quote_char_style: str = 'white',
26
+ quote_char: str = "'"
27
+ ) -> Text:
30
28
  """Wrap _string in 'quote_char'. Style 'quote_char' with 'quote_char_style'."""
31
29
  quote_char_txt = Text(quote_char, style=quote_char_style)
32
30
  txt = quote_char_txt + Text(_string, style=style) + quote_char_txt
@@ -8,6 +8,7 @@ from yaralyzer.helpers.bytes_helper import print_bytes
8
8
  from yaralyzer.output.rich_console import console
9
9
  from yaralyzer.util.logging import log
10
10
 
11
+ # from pdfalyzer.font_info import FontInfo # Causes circular import
11
12
  from pdfalyzer.helpers.rich_text_helper import quoted_text
12
13
  from pdfalyzer.helpers.string_helper import pp
13
14
  from pdfalyzer.output.layout import print_headline_panel, subheading_width
@@ -17,7 +18,7 @@ CHARMAP_TITLE_PADDING = (1, 0, 0, 2)
17
18
  CHARMAP_PADDING = (0, 2, 0, 10)
18
19
 
19
20
 
20
- def print_character_mapping(font: 'FontInfo') -> None:
21
+ def print_character_mapping(font: 'FontInfo') -> None: # noqa: F821
21
22
  """Prints the character mapping extracted by PyPDF._charmap in tidy columns"""
22
23
  if font.character_mapping is None or len(font.character_mapping) == 0:
23
24
  log.info(f"No character map found in {font}")
@@ -37,7 +38,7 @@ def print_character_mapping(font: 'FontInfo') -> None:
37
38
  console.line()
38
39
 
39
40
 
40
- def print_prepared_charmap(font: 'FontInfo'):
41
+ def print_prepared_charmap(font: 'FontInfo'): # noqa: F821
41
42
  """Prints the prepared_charmap returned by PyPDF."""
42
43
  if font.prepared_char_map is None:
43
44
  log.info(f"No prepared_charmap found in {font}")
@@ -1,5 +1,5 @@
1
1
  """
2
- Methods to help with the design of the output
2
+ Methods to help with the formatting of the output tables, headers, panels, etc.
3
3
  """
4
4
  from rich import box
5
5
  from rich.padding import Padding
@@ -23,11 +23,13 @@ from pdfalyzer.detection.yaralyzer_helper import get_bytes_yaralyzer, get_file_y
23
23
  from pdfalyzer.helpers.string_helper import pp
24
24
  from pdfalyzer.output.layout import (print_fatal_error_panel, print_section_header, print_section_subheader,
25
25
  print_section_sub_subheader)
26
+ from pdfalyzer.output.tables.decoding_stats_table import build_decoding_stats_table
26
27
  from pdfalyzer.output.tables.pdf_node_rich_table import generate_rich_tree, get_symlink_representation
27
28
  from pdfalyzer.output.tables.stream_objects_table import stream_objects_table
28
- from pdfalyzer.output.tables.decoding_stats_table import build_decoding_stats_table
29
29
  from pdfalyzer.pdfalyzer import Pdfalyzer
30
- from pdfalyzer.util.adobe_strings import *
30
+ # from pdfalyzer.util.adobe_strings import *
31
+
32
+ INTERNAL_YARA_ERROR_MSG = "Internal YARA error! YARA's error codes can be checked here: https://github.com/VirusTotal/yara/blob/master/libyara/include/yara/error.h" # noqa: E501
31
33
 
32
34
 
33
35
  class PdfalyzerPresenter:
@@ -91,7 +93,6 @@ class PdfalyzerPresenter:
91
93
  2. Check for (and force decode) dangerous PDF instructions like /JavaScript and /OpenAction
92
94
  3. Check for (and force decode) any BOMs (byte order marks)
93
95
  4. Check for (and force decode) any sequences of bytes between quotes
94
-
95
96
  """
96
97
  print_section_header(f'Binary Stream Analysis / Extraction')
97
98
  console.print(self._stream_objects_table())
@@ -109,6 +110,7 @@ class PdfalyzerPresenter:
109
110
  log.warning(msg)
110
111
  node_stream_bytes = node_stream_bytes.encode()
111
112
 
113
+ console.line()
112
114
  print_section_subheader(f"{escape(str(node))} Summary and Analysis", style=f"{BYTES_HIGHLIGHT} reverse")
113
115
  binary_scanner = BinaryScanner(node_stream_bytes, node)
114
116
  console.print(bytes_hashes_table(binary_scanner.bytes))
@@ -130,9 +132,9 @@ class PdfalyzerPresenter:
130
132
 
131
133
  try:
132
134
  self.yaralyzer.yaralyze()
133
- except yara.Error as e:
135
+ except yara.Error:
134
136
  console.print_exception()
135
- print_fatal_error_panel("Internal YARA error! YARA's error codes can be checked here: https://github.com/VirusTotal/yara/blob/master/libyara/include/yara/error.h")
137
+ print_fatal_error_panel(INTERNAL_YARA_ERROR_MSG)
136
138
  return
137
139
 
138
140
  YaralyzerConfig.args.standalone_mode = False
@@ -1,9 +1,13 @@
1
+ """
2
+ Helper functions for building a table that summarizes the decoding attempts made on binary data.
3
+ """
1
4
  from numbers import Number
2
5
 
3
6
  from rich.table import Table
4
7
  from rich.text import Text
5
8
  from yaralyzer.helpers.rich_text_helper import CENTER, na_txt, prefix_with_plain_text_obj
6
9
 
10
+ from pdfalyzer.binary.binary_scanner import BinaryScanner
7
11
  from pdfalyzer.helpers.rich_text_helper import pct_txt
8
12
  from pdfalyzer.output.layout import generate_subtable, half_width, pad_header
9
13
 
@@ -13,7 +17,8 @@ NOT_FOUND_MSG = Text('(not found)', style='grey.dark_italic')
13
17
  REGEX_SUBTABLE_COLS = ['Metric', 'Value']
14
18
  DECODES_SUBTABLE_COLS = ['Encoding', '#', 'Decoded', '#', 'Forced', '#', 'Failed']
15
19
 
16
- def build_decoding_stats_table(scanner: 'BinaryScanner') -> Table:
20
+
21
+ def build_decoding_stats_table(scanner: BinaryScanner) -> Table:
17
22
  """Diplay aggregate results on the decoding attempts we made on subsets of scanner.bytes"""
18
23
  stats_table = _new_decoding_stats_table(scanner.label.plain if scanner.label else '')
19
24
  regexes_not_found_in_stream = []
@@ -1,7 +1,6 @@
1
1
  """
2
2
  Build a rich table to show the sizes of embedded streams.
3
3
  """
4
-
5
4
  from typing import List
6
5
 
7
6
  from rich.table import Table
@@ -14,12 +14,12 @@ INCOMPARABLE_PROPS = ['from_obj', 'to_obj']
14
14
 
15
15
  class PdfObjectRelationship:
16
16
  def __init__(
17
- self,
18
- from_node: 'PdfTreeNode',
19
- to_obj: IndirectObject,
20
- reference_key: str,
21
- address: str
22
- ) -> None:
17
+ self,
18
+ from_node: 'PdfTreeNode',
19
+ to_obj: IndirectObject,
20
+ reference_key: str,
21
+ address: str
22
+ ) -> None:
23
23
  """
24
24
  In the case of easy key/value pairs the reference_key and the address are the same but
25
25
  for more complicated references the address will be the reference_key plus sub references.
@@ -53,12 +53,12 @@ class PdfObjectRelationship:
53
53
 
54
54
  @classmethod
55
55
  def build_node_references(
56
- cls,
57
- from_node: 'PdfTreeObject',
58
- from_obj: Optional[PdfObject] = None,
59
- ref_key: Optional[Union[str, int]] = None,
60
- address: Optional[str] = None
61
- ) -> List['PdfObjectRelationship']:
56
+ cls,
57
+ from_node: 'PdfTreeObject',
58
+ from_obj: Optional[PdfObject] = None,
59
+ ref_key: Optional[Union[str, int]] = None,
60
+ address: Optional[str] = None
61
+ ) -> List['PdfObjectRelationship']:
62
62
  """
63
63
  Builds list of relationships 'from_node.obj' contains referencing other PDF objects.
64
64
  Initially called with single arg from_node. Other args are employed when recursable
pdfalyzer/pdfalyzer.py CHANGED
@@ -77,7 +77,7 @@ class Pdfalyzer:
77
77
  nodes_to_walk_next = [self._add_relationship_to_pdf_tree(r) for r in node.references_to_other_nodes()]
78
78
  node.all_references_processed = True
79
79
 
80
- for next_node in [n for n in nodes_to_walk_next if not (n is None or n.all_references_processed) ]:
80
+ for next_node in [n for n in nodes_to_walk_next if not (n is None or n.all_references_processed)]:
81
81
  if not next_node.all_references_processed:
82
82
  self.walk_node(next_node)
83
83
 
@@ -105,7 +105,7 @@ class Pdfalyzer:
105
105
 
106
106
  def stream_nodes(self) -> List[PdfTreeNode]:
107
107
  """List of actual nodes (not SymlinkNodes) containing streams sorted by PDF object ID"""
108
- stream_filter = lambda node: node.contains_stream() and not isinstance(node, SymlinkNode)
108
+ stream_filter = lambda node: node.contains_stream() and not isinstance(node, SymlinkNode) # noqa: E731
109
109
  return sorted(findall(self.pdf_tree, stream_filter), key=lambda r: r.idnum)
110
110
 
111
111
  def _add_relationship_to_pdf_tree(self, relationship: PdfObjectRelationship) -> Optional[PdfTreeNode]:
@@ -114,7 +114,7 @@ class Pdfalyzer:
114
114
  placed in the PDF node processing queue.
115
115
  """
116
116
  log.info(f'Assessing relationship {relationship}...')
117
- was_seen_before = (relationship.to_obj.idnum in self.nodes_encountered) # Must come before _build_or_find()
117
+ was_seen_before = (relationship.to_obj.idnum in self.nodes_encountered) # Must come before _build_or_find()
118
118
  from_node = relationship.from_node
119
119
  to_node = self._build_or_find_node(relationship.to_obj, relationship.address)
120
120
  self.max_generation = max([self.max_generation, relationship.to_obj.generation or 0])
@@ -133,7 +133,7 @@ class Pdfalyzer:
133
133
  from_node.set_parent(to_node)
134
134
  elif to_node.parent is not None:
135
135
  # Some StructElem nodes I have seen use /P or /K despire not being the real parent/child
136
- if relationship.from_node.type.startswith(STRUCT_ELEM):# reference_key != relationship.address:
136
+ if relationship.from_node.type.startswith(STRUCT_ELEM):
137
137
  log.info(f"{relationship} fail: {to_node} parent is already {to_node.parent}")
138
138
  else:
139
139
  log.warning(f"{relationship} fail: {to_node} parent is already {to_node.parent}")
@@ -173,7 +173,6 @@ class Pdfalyzer:
173
173
 
174
174
  def _resolve_indeterminate_nodes(self) -> None:
175
175
  """Place all indeterminate nodes in the tree."""
176
- #set_log_level('INFO')
177
176
  indeterminate_nodes = [self.nodes_encountered[idnum] for idnum in self.indeterminate_ids]
178
177
  indeterminate_nodes_string = "\n ".join([f"{node}" for node in indeterminate_nodes])
179
178
  log.info(f"Resolving {len(indeterminate_nodes)} indeterminate nodes: {indeterminate_nodes_string}")
@@ -1,7 +1,6 @@
1
1
  """
2
2
  String constants specified in the Adobe specs for PDFs, fonts, etc.
3
3
  """
4
-
5
4
  from pypdf.constants import (CatalogDictionary, ImageAttributes, PageAttributes,
6
5
  PagesAttributes, Resources)
7
6
 
@@ -117,20 +116,20 @@ NON_TREE_REFERENCES = [
117
116
 
118
117
  # Some PdfObjects can't be properly placed in the tree until the entire tree is parsed
119
118
  INDETERMINATE_REF_KEYS = [
120
- ANNOTS, # At least when it appears in a page
119
+ ANNOTS, # At least when it appears in a page
121
120
  COLOR_SPACE,
122
121
  D,
123
122
  DEST,
124
123
  EXT_G_STATE,
125
- FIELDS, # At least for /AcroForm
124
+ FIELDS, # At least for /AcroForm
126
125
  FIRST,
127
126
  FONT,
128
127
  NAMES,
129
128
  OPEN_ACTION,
130
- P, # At least for widgets...
129
+ P, # At least for widgets...
131
130
  RESOURCES,
132
131
  XOBJECT,
133
- UNLABELED, # TODO: this might be wrong? maybe this is where the /Resources actually live?
132
+ UNLABELED, # TODO: this might be wrong? maybe this is where the /Resources actually live?
134
133
  ]
135
134
 
136
135
  INDETERMINATE_PREFIXES = [p for p in INDETERMINATE_REF_KEYS if len(p) > 2]
@@ -1,5 +1,8 @@
1
+ """
2
+ Parse command line arguments for pdfalyzer and construct the PdfalyzerConfig object.
3
+ """
1
4
  import sys
2
- from argparse import ArgumentError, ArgumentParser, Namespace
5
+ from argparse import ArgumentParser, Namespace
3
6
  from collections import namedtuple
4
7
  from functools import partial, update_wrapper
5
8
  from importlib.metadata import version
@@ -89,9 +92,9 @@ select.add_argument('-c', '--counts', action='store_true',
89
92
  help='show counts of some of the properties of the objects in the PDF')
90
93
 
91
94
  select.add_argument('-s', '--streams',
92
- help="scan all the PDF's decoded/decrypted streams for sus content as well as any YARA rule matches. " + \
93
- "brute force is involved; output is verbose. a single OBJ_ID can be optionally provided to " + \
94
- "limit the output to a single internal object. try '-s -- [OTHERARGS]' if you run into an " + \
95
+ help="scan all the PDF's decoded/decrypted streams for sus content as well as any YARA rule matches. " +
96
+ "brute force is involved; output is verbose. a single OBJ_ID can be optionally provided to " +
97
+ "limit the output to a single internal object. try '-s -- [OTHERARGS]' if you run into an " +
95
98
  "argument position related piccadilly.",
96
99
  nargs='?',
97
100
  const=ALL_STREAMS,
@@ -99,7 +102,7 @@ select.add_argument('-s', '--streams',
99
102
  type=int)
100
103
 
101
104
  select.add_argument('--extract-quoted',
102
- help="extract and force decode all bytes found between this kind of quotation marks " + \
105
+ help="extract and force decode all bytes found between this kind of quotation marks " +
103
106
  "(requires --streams. can be specified more than once)",
104
107
  choices=list(QUOTE_PATTERNS.keys()),
105
108
  dest='extract_quoteds',
@@ -144,7 +147,7 @@ def parse_arguments():
144
147
  args.output_dir = args.output_dir or getcwd()
145
148
  file_prefix = (args.file_prefix + '__') if args.file_prefix else ''
146
149
  args.file_suffix = ('_' + args.file_suffix) if args.file_suffix else ''
147
- args.output_basename = f"{file_prefix}{path.basename(args.file_to_scan_path)}"
150
+ args.output_basename = f"{file_prefix}{path.basename(args.file_to_scan_path)}"
148
151
  elif args.output_dir:
149
152
  log.warning('--output-dir provided but no export option was chosen')
150
153
 
@@ -200,8 +203,8 @@ MAX_QUALITY = 10
200
203
 
201
204
  combine_pdfs_parser = ArgumentParser(
202
205
  description="Combine multiple PDFs into one.",
203
- epilog="If all PDFs end in a number (e.g. 'xyz_1.pdf', 'xyz_2.pdf', etc. sort the files as if those were" \
204
- " page numebrs prior to merging.",
206
+ epilog="If all PDFs end in a number (e.g. 'xyz_1.pdf', 'xyz_2.pdf', etc. sort the files as if those were" +
207
+ " page numbers prior to merging.",
205
208
  formatter_class=RichHelpFormatterPlus)
206
209
 
207
210
  combine_pdfs_parser.add_argument('pdfs',
@@ -1,7 +1,7 @@
1
+
1
2
  import logging
2
3
 
3
4
 
4
5
  # Starting pdb.set_trace() this way kind of sucks because yr locals are messed up
5
6
  def debugger():
6
7
  import pdb; pdb.set_trace(locals())
7
-
@@ -1,19 +1,24 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pdfalyzer
3
- Version: 1.16.9
4
- Summary: A PDF analysis toolkit. Scan a PDF with relevant YARA rules, visualize its inner tree-like data structure in living color (lots of colors), force decodes of suspicious font binaries, and more.
3
+ Version: 1.16.11
4
+ Summary: PDF analysis tool. Scan a PDF with YARA rules, visualize its inner tree-like data structure in living color (lots of colors), force decodes of suspicious font binaries, and more.
5
5
  Home-page: https://github.com/michelcrypt4d4mus/pdfalyzer
6
6
  License: GPL-3.0-or-later
7
- Keywords: ascii art,binary,color,cybersecurity,DFIR,encoding,font,infosec,maldoc,malicious pdf,malware,malware analysis,pdf,pdfs,pdf analysis,threat assessment,visualization,yara
7
+ Keywords: ascii art,binary,color,cybersecurity,DFIR,encoding,font,infosec,maldoc,malicious pdf,malware,malware analysis,pdf,pdfs,pdf analysis,pypdf,threat assessment,visualization,yara
8
8
  Author: Michel de Cryptadamus
9
9
  Author-email: michel@cryptadamus.com
10
10
  Requires-Python: >=3.9.2,<4.0.0
11
11
  Classifier: Development Status :: 5 - Production/Stable
12
+ Classifier: Environment :: Console
12
13
  Classifier: Intended Audience :: Information Technology
13
14
  Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
15
+ Classifier: Programming Language :: Python
14
16
  Classifier: Programming Language :: Python :: 3
15
17
  Classifier: Programming Language :: Python :: 3.10
16
18
  Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Programming Language :: Python :: 3.9
17
22
  Classifier: Topic :: Artistic Software
18
23
  Classifier: Topic :: Scientific/Engineering :: Visualization
19
24
  Classifier: Topic :: Security
@@ -304,12 +309,6 @@ These are the naming conventions at play in The Pdfalyzer code base:
304
309
  * [`PyPDF` documentation](https://pypdf.readthedocs.io/en/stable/) (latest is 4.x or something so these are the relevant docs for `pdfalyze`)
305
310
 
306
311
 
307
- # TODO
308
- * Highlight decodes with a lot of Javascript keywords
309
- * https://github.com/mandiant/flare-floss (https://github.com/mandiant/flare-floss/releases/download/v2.1.0/floss-v2.1.0-linux.zip)
310
- * https://github.com/1Project/Scanr/blob/master/emulator/emulator.py
311
-
312
-
313
312
  [^1]: The official Adobe PDF specification calls this tree the PDF's "logical structure", which is a good example of nomenclature that does not help those who see it understand anything about what is being described. I can forgive them given that they named this thing back in the 80s, though it's a good example of why picking good names for things at the beginning is so important.
314
313
 
315
314
  [^2]: An exception will be raised if there's any issue placing a node while parsing or if there are any nodes not reachable from the root of the tree at the end of parsing. If there are no exceptions then all internal PDF objects are guaranteed to exist in the tree except in these situations when warnings will be printed:
@@ -0,0 +1,50 @@
1
+ .pdfalyzer.example,sha256=sh_qkUBw4hfJia_Dx2wB-fsqJInhx2sSgA7WJz3MHYo,3917
2
+ CHANGELOG.md,sha256=ueR9OA8xj4EBFYitrcAJ6yGs1MXw7rnsCcpOdxrog0Q,12588
3
+ LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
4
+ pdfalyzer/__init__.py,sha256=Z7KzZamL44xFAE9_t6jX1-KhWAIn5f-voTo5BK8tlg8,5394
5
+ pdfalyzer/__main__.py,sha256=Ko_AoAyYMLIe_cmhiUSl6twheLZrGyT8aOSJ2CP7EZY,43
6
+ pdfalyzer/binary/binary_scanner.py,sha256=h_qcflLWn4pu5NH9F84BuVSYemWQjA3kNxsmXVdz3fk,10211
7
+ pdfalyzer/config.py,sha256=4YMDZu3-t5RSGckjN9bT5LzXyhwHXcxi4QjzVQ4-N6U,2097
8
+ pdfalyzer/decorators/document_model_printer.py,sha256=2tjJItZltukmOD2wjGvl2IsBM7Gug59wMHGLpzGDbV0,2682
9
+ pdfalyzer/decorators/indeterminate_node.py,sha256=ivB6dX5aN8W9m0ksXhmUcixnjYjnuE7DARalH-nMjxY,6616
10
+ pdfalyzer/decorators/pdf_object_properties.py,sha256=D7WNZy2s03iijAf7066HN6W_R4wKwX8juo6TivQrCB0,5477
11
+ pdfalyzer/decorators/pdf_tree_node.py,sha256=Esmdox2Z9VXUieFhC-JgHeOXzWy0ju_OsPkOJvhep9A,10952
12
+ pdfalyzer/decorators/pdf_tree_verifier.py,sha256=I8Sd8cOEwJQrWe30n-hGVF1oZPqQD2xhckAwr69f7P8,4617
13
+ pdfalyzer/detection/constants/binary_regexes.py,sha256=s69S7uq1v4vBy3ZkKKKt3ClNuFCuQ0ztootUxzlgfFw,1632
14
+ pdfalyzer/detection/constants/javascript_reserved_keywords.py,sha256=CXXdWskdQa0Hs5wCci2RBVvipgZg34_cLfmkWG4Xcmg,991
15
+ pdfalyzer/detection/javascript_hunter.py,sha256=_wT2vkKTMlm_RGCjYsmwcmV-ag1qep3EpkHmUw0nWcQ,711
16
+ pdfalyzer/detection/yaralyzer_helper.py,sha256=VLokcqffcw1RPriWPWA9QSMbefuHm0ACPmvG8biHIl0,2287
17
+ pdfalyzer/font_info.py,sha256=2R85iETY_1eKCeRrkqeIxfPDqXZyWfCNcHx_-aTyF0s,6682
18
+ pdfalyzer/helpers/dict_helper.py,sha256=2TP0_EJBouaWD6jfnAekrEZ4M5eHKL8Tm61FgXZtBAg,303
19
+ pdfalyzer/helpers/filesystem_helper.py,sha256=yAeZ8VllSdO9eudGllwd_7odUHsfoM9SseQHDGvU59k,4117
20
+ pdfalyzer/helpers/number_helper.py,sha256=8IlRmaOVLJsUV18VLvWRZU8SzRxL0XZjrY3sjmk2Ro4,292
21
+ pdfalyzer/helpers/pdf_object_helper.py,sha256=Ab4gKhhuEO-nKYdMYlFVo2g0ygebJsN_ZZ1tCFkVdhM,1160
22
+ pdfalyzer/helpers/rich_text_helper.py,sha256=wTucWbrf8OuS2wVuudIH4v078NTUPQbCqU9ZJ4bDzdE,2122
23
+ pdfalyzer/helpers/string_helper.py,sha256=75EDEFw3UWHvWF32WtvZVBbqYY3ozO4y30dtH2qVMX0,2278
24
+ pdfalyzer/output/character_mapping.py,sha256=RWqIu1kGnB4bQS1E4ZcjMWtUqR9ehvTth7U9Nq99_zk,2189
25
+ pdfalyzer/output/layout.py,sha256=lAJQiu76E-_5MRghpRK7zuXqkhWI7ZjsptfadXXZQF8,2183
26
+ pdfalyzer/output/pdfalyzer_presenter.py,sha256=TUsMc2GTUDjFzIGk7Ep5ZASfXcKX_WNtZzZKbQTHcfY,8580
27
+ pdfalyzer/output/styles/node_colors.py,sha256=rfsTAUF43K_buw21SZoP6L5c_cLy7S-xA4GUiWJsDkc,3986
28
+ pdfalyzer/output/styles/rich_theme.py,sha256=Y8QmuINlyZNIHvf3oD0CV3w2dC49NNKtvOChvudDCT8,1983
29
+ pdfalyzer/output/tables/decoding_stats_table.py,sha256=LJAqbaL4nUlbQD0tB65SRaTUAhF86n0QqBMYWK2-sxU,3623
30
+ pdfalyzer/output/tables/font_summary_table.py,sha256=xfTqC7BlQd0agQf6nDDhkcJno7hru6mf9_xY1f5IDcw,2065
31
+ pdfalyzer/output/tables/pdf_node_rich_table.py,sha256=7G-FLb_EUP50kZmYCTbo8Q6taU4xKp2QIGNOnQtYbNg,5908
32
+ pdfalyzer/output/tables/stream_objects_table.py,sha256=PgQj8oTtW5_X8SMQb3FvCWDS-d4Zl6QiE44Qhiv7lTY,706
33
+ pdfalyzer/pdf_object_relationship.py,sha256=tOJTp73m82oNZJB7NxvTLv167kqthH8PRbVnev_4uEk,5291
34
+ pdfalyzer/pdfalyzer.py,sha256=Sdld8l3Eg8f9asiyD-fhwlU6dC4_tP6h8zXTKw-cUug,10956
35
+ pdfalyzer/util/adobe_strings.py,sha256=eF4K1RhhR_qgMBT58MzCxWkqQj17OWLCNmG3SjZ9BUs,5045
36
+ pdfalyzer/util/argument_parser.py,sha256=yfkZTQX3t5G7y0xk2x9Ef0_2V2MuukXexiclDrf_8So,11987
37
+ pdfalyzer/util/debugging.py,sha256=hjYGxptJmal9TTaAUkkoo0oNu2tdx6ZYSyC0WjvzHh0,156
38
+ pdfalyzer/util/exceptions.py,sha256=XLFFTdx1n6i_VCmvuzvIOCa-djJvGEitfo9lhy3zq0k,98
39
+ pdfalyzer/util/pdf_parser_manager.py,sha256=FVRYAYsCd0y5MAm--qvXnwCZnDtB3x85FdJtb-gpyw4,3109
40
+ pdfalyzer/yara_rules/PDF.yara,sha256=70JzPq5F6AS8F46Seu6u0j5GS1JHxkS42r7g7PVSpRg,81489
41
+ pdfalyzer/yara_rules/PDF_binary_stream.yara,sha256=Qt0Wd7RFXYiHaT9YxTCrhC68ccmFcEG1XMNC3p5IwcI,821
42
+ pdfalyzer/yara_rules/__init.py__,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
43
+ pdfalyzer/yara_rules/didier_stevens.yara,sha256=4XhqafU09xzYUP7LCygHHBXOpAXUblJf6Tkn37MUy0w,7253
44
+ pdfalyzer/yara_rules/lprat.static_file_analysis.yara,sha256=i0CwRH8pBx_QshKFTQtr1CP5n378EZelsF2FxMY2y5A,21859
45
+ pdfalyzer/yara_rules/pdf_malware.yara,sha256=jDqSTP5BQSi2I_1xZiFZdy68I4oVWDat2j08-qdfbto,91063
46
+ pdfalyzer-1.16.11.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
47
+ pdfalyzer-1.16.11.dist-info/METADATA,sha256=Xa0oDRfBu_NyzjAiI4cG2tqrDxJZRx0mPErknDQcceA,26187
48
+ pdfalyzer-1.16.11.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
49
+ pdfalyzer-1.16.11.dist-info/entry_points.txt,sha256=aZurgt-Xg3pojS7oTRI4hNLpK1hO4kTfChf0x2eQoD8,147
50
+ pdfalyzer-1.16.11.dist-info/RECORD,,
@@ -1,49 +0,0 @@
1
- CHANGELOG.md,sha256=EvbWGXHL4_rqkwzObtVhY0QL58O4gP4uE0Z9JaqSAEA,12307
2
- LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
3
- pdfalyzer/__init__.py,sha256=q8qSdGdyUYmTYGOp_d2bRCCFASnlVt4wa-DlBikD5-M,5362
4
- pdfalyzer/__main__.py,sha256=Ko_AoAyYMLIe_cmhiUSl6twheLZrGyT8aOSJ2CP7EZY,43
5
- pdfalyzer/binary/binary_scanner.py,sha256=7NrXx8GB2gpb04oR2bcZJKkOXOlzn2hWpcGlcYMqSfs,10217
6
- pdfalyzer/config.py,sha256=oN-pVR037lt3giRsnsm4c8ku5hCW8ChFqYFi9V7w1qU,1918
7
- pdfalyzer/decorators/document_model_printer.py,sha256=2tjJItZltukmOD2wjGvl2IsBM7Gug59wMHGLpzGDbV0,2682
8
- pdfalyzer/decorators/indeterminate_node.py,sha256=ivB6dX5aN8W9m0ksXhmUcixnjYjnuE7DARalH-nMjxY,6616
9
- pdfalyzer/decorators/pdf_object_properties.py,sha256=I7kix5hXNguAH2VW2uINIZRHJ8xYS4JGfc6Aiakyh4c,5522
10
- pdfalyzer/decorators/pdf_tree_node.py,sha256=sd3a4uQMu_KQ_wvo0pjwQ8K1HI7xGgsGd47eI2IWybY,10927
11
- pdfalyzer/decorators/pdf_tree_verifier.py,sha256=YC56SQxp5o2zMYgsBPCzX89pCkUHdZ-MCFNIPD9XKRc,4541
12
- pdfalyzer/detection/constants/binary_regexes.py,sha256=eFx1VVAOzxKmlacbGgicDCp1fcKgOkQkkzeduGjqLBQ,1594
13
- pdfalyzer/detection/constants/javascript_reserved_keywords.py,sha256=CXXdWskdQa0Hs5wCci2RBVvipgZg34_cLfmkWG4Xcmg,991
14
- pdfalyzer/detection/javascript_hunter.py,sha256=_wT2vkKTMlm_RGCjYsmwcmV-ag1qep3EpkHmUw0nWcQ,711
15
- pdfalyzer/detection/yaralyzer_helper.py,sha256=KLGhX9qDB7eeuBbdl6mPRP1GivKkMZa79DPMTzq7b1c,2342
16
- pdfalyzer/font_info.py,sha256=0NQ6g4q3pTdirwGjJhur8HkXQlC732cR7IhilO33g2A,6663
17
- pdfalyzer/helpers/dict_helper.py,sha256=2TP0_EJBouaWD6jfnAekrEZ4M5eHKL8Tm61FgXZtBAg,303
18
- pdfalyzer/helpers/filesystem_helper.py,sha256=1clV0mqKFJUJC4xU2q_ApklpHCqCclxJAVJwRp93OF0,4110
19
- pdfalyzer/helpers/number_helper.py,sha256=8IlRmaOVLJsUV18VLvWRZU8SzRxL0XZjrY3sjmk2Ro4,292
20
- pdfalyzer/helpers/pdf_object_helper.py,sha256=Ija6cWKfFQRXCfZv2ezU1V2v0KFDn9f4ayeX8eG9GmI,1102
21
- pdfalyzer/helpers/rich_text_helper.py,sha256=j4zs41T94vjkr9e86ZVQRKYiUBjoZyZMdVJ4d4uiON8,2239
22
- pdfalyzer/helpers/string_helper.py,sha256=75EDEFw3UWHvWF32WtvZVBbqYY3ozO4y30dtH2qVMX0,2278
23
- pdfalyzer/output/character_mapping.py,sha256=MtC3jKdtMaugi5038fne0T_SFSo9QU4lZl_s7bW7gzI,2092
24
- pdfalyzer/output/layout.py,sha256=E58T9Tl6BYZTDsj6ouMr1J5SSUiXa7timUNxnOI2IzI,2149
25
- pdfalyzer/output/pdfalyzer_presenter.py,sha256=CSboSnYFlkgOfwMf3TcoTTJY6FLXJ9OulI9UieSTJeE,8492
26
- pdfalyzer/output/styles/node_colors.py,sha256=rfsTAUF43K_buw21SZoP6L5c_cLy7S-xA4GUiWJsDkc,3986
27
- pdfalyzer/output/styles/rich_theme.py,sha256=Y8QmuINlyZNIHvf3oD0CV3w2dC49NNKtvOChvudDCT8,1983
28
- pdfalyzer/output/tables/decoding_stats_table.py,sha256=mhQOiWhmovaC4sop38WcxStv_bIdAlQWUysAz5fW4MU,3461
29
- pdfalyzer/output/tables/font_summary_table.py,sha256=xfTqC7BlQd0agQf6nDDhkcJno7hru6mf9_xY1f5IDcw,2065
30
- pdfalyzer/output/tables/pdf_node_rich_table.py,sha256=7G-FLb_EUP50kZmYCTbo8Q6taU4xKp2QIGNOnQtYbNg,5908
31
- pdfalyzer/output/tables/stream_objects_table.py,sha256=nzCTci8Kqs8Pyghad3L5KWHDdIWRSrKCRNW8geA_rMo,707
32
- pdfalyzer/pdf_object_relationship.py,sha256=ug-338eoXFdD4YtDWPdzcfxP2fQDQa-GE8I3m3a01TA,5339
33
- pdfalyzer/pdfalyzer.py,sha256=6JflqQJb2crXXaVA6DHHgWB45w2MBFB3pqE3AlZO5WI,11013
34
- pdfalyzer/util/adobe_strings.py,sha256=F1MOBtSyIuF5HPmzWDr8MgnLyVodOsZSy4AFFCMHq_Y,5033
35
- pdfalyzer/util/argument_parser.py,sha256=hC0CLZPIXerP9Z0WZYE4Vj8wEPLwo3KpA-iRio6VKm0,11918
36
- pdfalyzer/util/debugging.py,sha256=nE64VUQbdu2OQRC8w8-AJkMtBOy8Kf3mjozuFslfWsw,156
37
- pdfalyzer/util/exceptions.py,sha256=XLFFTdx1n6i_VCmvuzvIOCa-djJvGEitfo9lhy3zq0k,98
38
- pdfalyzer/util/pdf_parser_manager.py,sha256=FVRYAYsCd0y5MAm--qvXnwCZnDtB3x85FdJtb-gpyw4,3109
39
- pdfalyzer/yara_rules/PDF.yara,sha256=70JzPq5F6AS8F46Seu6u0j5GS1JHxkS42r7g7PVSpRg,81489
40
- pdfalyzer/yara_rules/PDF_binary_stream.yara,sha256=Qt0Wd7RFXYiHaT9YxTCrhC68ccmFcEG1XMNC3p5IwcI,821
41
- pdfalyzer/yara_rules/__init.py__,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
- pdfalyzer/yara_rules/didier_stevens.yara,sha256=4XhqafU09xzYUP7LCygHHBXOpAXUblJf6Tkn37MUy0w,7253
43
- pdfalyzer/yara_rules/lprat.static_file_analysis.yara,sha256=i0CwRH8pBx_QshKFTQtr1CP5n378EZelsF2FxMY2y5A,21859
44
- pdfalyzer/yara_rules/pdf_malware.yara,sha256=jDqSTP5BQSi2I_1xZiFZdy68I4oVWDat2j08-qdfbto,91063
45
- pdfalyzer-1.16.9.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
46
- pdfalyzer-1.16.9.dist-info/METADATA,sha256=KfbRZuBph56FwH16RaEEfjuUf6Kxsfmb2P5QQF4rYjk,26228
47
- pdfalyzer-1.16.9.dist-info/WHEEL,sha256=d2fvjOD7sXsVzChCqf0Ty0JbHKBaLYwDbGQDwQTnJ50,88
48
- pdfalyzer-1.16.9.dist-info/entry_points.txt,sha256=aZurgt-Xg3pojS7oTRI4hNLpK1hO4kTfChf0x2eQoD8,147
49
- pdfalyzer-1.16.9.dist-info/RECORD,,