epstein-files 1.1.0__py3-none-any.whl → 1.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epstein_files/__init__.py +16 -27
- epstein_files/documents/communication.py +10 -14
- epstein_files/documents/document.py +1 -1
- epstein_files/documents/email.py +152 -75
- epstein_files/documents/imessage/text_message.py +42 -25
- epstein_files/documents/messenger_log.py +31 -12
- epstein_files/documents/other_file.py +13 -12
- epstein_files/epstein_files.py +20 -81
- epstein_files/util/constant/common_words.py +3 -3
- epstein_files/util/constant/html.py +4 -5
- epstein_files/util/constant/names.py +18 -6
- epstein_files/util/constant/strings.py +6 -2
- epstein_files/util/constant/urls.py +1 -1
- epstein_files/util/constants.py +19 -23
- epstein_files/util/env.py +55 -36
- epstein_files/util/file_helper.py +1 -2
- epstein_files/util/highlighted_group.py +1019 -189
- epstein_files/util/logging.py +8 -1
- epstein_files/util/output.py +183 -89
- epstein_files/util/rich.py +35 -69
- epstein_files/util/timer.py +1 -1
- epstein_files/util/word_count.py +3 -4
- {epstein_files-1.1.0.dist-info → epstein_files-1.1.3.dist-info}/METADATA +4 -1
- epstein_files-1.1.3.dist-info/RECORD +33 -0
- epstein_files-1.1.0.dist-info/RECORD +0 -33
- {epstein_files-1.1.0.dist-info → epstein_files-1.1.3.dist-info}/LICENSE +0 -0
- {epstein_files-1.1.0.dist-info → epstein_files-1.1.3.dist-info}/WHEEL +0 -0
- {epstein_files-1.1.0.dist-info → epstein_files-1.1.3.dist-info}/entry_points.txt +0 -0
epstein_files/util/env.py
CHANGED
|
@@ -2,14 +2,29 @@ import logging
|
|
|
2
2
|
from argparse import ArgumentParser
|
|
3
3
|
from os import environ
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from sys import argv, exit
|
|
6
5
|
|
|
7
6
|
from rich_argparse_plus import RichHelpFormatterPlus
|
|
8
7
|
|
|
9
|
-
from epstein_files.util.
|
|
8
|
+
from epstein_files.util.constant.output_files import ALL_EMAILS_PATH, CHRONOLOGICAL_EMAILS_PATH, TEXT_MSGS_HTML_PATH
|
|
9
|
+
from epstein_files.util.logging import env_log_level, exit_with_error, logger
|
|
10
10
|
|
|
11
11
|
DEFAULT_WIDTH = 145
|
|
12
|
-
|
|
12
|
+
DEFAULT_FILE = 'default_file'
|
|
13
|
+
EPSTEIN_GENERATE = 'epstein_generate'
|
|
14
|
+
HTML_SCRIPTS = [EPSTEIN_GENERATE, 'epstein_word_count']
|
|
15
|
+
|
|
16
|
+
# Verify Epstein docs dir exists
|
|
17
|
+
EPSTEIN_DOCS_DIR_ENV_VAR_NAME = 'EPSTEIN_DOCS_DIR'
|
|
18
|
+
DOCS_DIR_ENV = environ.get(EPSTEIN_DOCS_DIR_ENV_VAR_NAME)
|
|
19
|
+
DOCS_DIR = Path(DOCS_DIR_ENV or '').resolve()
|
|
20
|
+
|
|
21
|
+
if not DOCS_DIR_ENV:
|
|
22
|
+
exit_with_error(f"{EPSTEIN_DOCS_DIR_ENV_VAR_NAME} env var not set!\n")
|
|
23
|
+
elif not DOCS_DIR.exists():
|
|
24
|
+
exit_with_error(f"{EPSTEIN_DOCS_DIR_ENV_VAR_NAME}='{DOCS_DIR}' does not exist!\n")
|
|
25
|
+
|
|
26
|
+
is_env_var_set = lambda s: len(environ.get(s) or '') > 0
|
|
27
|
+
is_output_arg = lambda arg: any([arg.startswith(pfx) for pfx in ['colors_only', 'json', 'make_clean', 'output']])
|
|
13
28
|
|
|
14
29
|
|
|
15
30
|
RichHelpFormatterPlus.choose_theme('morning_glory')
|
|
@@ -21,14 +36,14 @@ parser.add_argument('--overwrite-pickle', '-op', action='store_true', help='re-p
|
|
|
21
36
|
output = parser.add_argument_group('OUTPUT', 'Options used by epstein_generate.')
|
|
22
37
|
output.add_argument('--all-emails', '-ae', action='store_true', help='all the emails instead of just the interesting ones')
|
|
23
38
|
output.add_argument('--all-other-files', '-ao', action='store_true', help='all the non-email, non-text msg files instead of just the interesting ones')
|
|
24
|
-
|
|
39
|
+
parser.add_argument('--build', '-b', nargs="?", default=None, const=DEFAULT_FILE, help='write output to HTML file')
|
|
25
40
|
output.add_argument('--email-timeline', action='store_true', help='print a table of all emails in chronological order')
|
|
26
41
|
output.add_argument('--json-files', action='store_true', help='pretty print all the raw JSON data files in the collection and exit')
|
|
27
42
|
output.add_argument('--json-metadata', action='store_true', help='dump JSON metadata for all files and exit')
|
|
28
43
|
output.add_argument('--output-emails', '-oe', action='store_true', help='generate emails section')
|
|
29
44
|
output.add_argument('--output-other', '-oo', action='store_true', help='generate other files section')
|
|
30
45
|
output.add_argument('--output-texts', '-ot', action='store_true', help='generate text messages section')
|
|
31
|
-
output.add_argument('--sort-alphabetical', action='store_true', help='sort
|
|
46
|
+
output.add_argument('--sort-alphabetical', action='store_true', help='sort tables alphabetically intead of by count')
|
|
32
47
|
output.add_argument('--suppress-output', action='store_true', help='no output to terminal (use with --build)')
|
|
33
48
|
output.add_argument('--uninteresting', action='store_true', help='only output uninteresting other files')
|
|
34
49
|
output.add_argument('--width', '-w', type=int, default=DEFAULT_WIDTH, help='screen width to use (in characters)')
|
|
@@ -36,7 +51,7 @@ output.add_argument('--width', '-w', type=int, default=DEFAULT_WIDTH, help='scre
|
|
|
36
51
|
scripts = parser.add_argument_group('SCRIPTS', 'Options used by epstein_search, epstein_show, and epstein_diff.')
|
|
37
52
|
scripts.add_argument('positional_args', nargs='*', help='strings to searchs for, file IDs to show or diff, etc.')
|
|
38
53
|
scripts.add_argument('--raw', '-r', action='store_true', help='show raw contents of file (used by epstein_show)')
|
|
39
|
-
scripts.add_argument('--whole-file', '-wf', action='store_true', help='print whole
|
|
54
|
+
scripts.add_argument('--whole-file', '-wf', action='store_true', help='print whole files')
|
|
40
55
|
|
|
41
56
|
debug = parser.add_argument_group('DEBUG')
|
|
42
57
|
debug.add_argument('--colors-only', '-c', action='store_true', help='print header with color key table and links and exit')
|
|
@@ -45,24 +60,11 @@ debug.add_argument('--deep-debug', '-dd', action='store_true', help='set debug l
|
|
|
45
60
|
debug.add_argument('--json-stats', '-j', action='store_true', help='print JSON formatted stats about the files')
|
|
46
61
|
debug.add_argument('--skip-other-files', '-sof', action='store_true', help='skip parsing non email/text files')
|
|
47
62
|
debug.add_argument('--suppress-logs', '-sl', action='store_true', help='set debug level to FATAL')
|
|
48
|
-
args = parser.parse_args()
|
|
49
63
|
|
|
50
64
|
|
|
51
|
-
#
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
DOCS_DIR = Path(DOCS_DIR_ENV or '').resolve()
|
|
55
|
-
|
|
56
|
-
if not DOCS_DIR_ENV:
|
|
57
|
-
print(f"\n ERROR: {EPSTEIN_DOCS_DIR_ENV_VAR_NAME} env var not set!\n")
|
|
58
|
-
exit(1)
|
|
59
|
-
elif not DOCS_DIR.exists():
|
|
60
|
-
print(f"\n ERROR: {EPSTEIN_DOCS_DIR_ENV_VAR_NAME}='{DOCS_DIR}' does not exist!\n")
|
|
61
|
-
exit(1)
|
|
62
|
-
|
|
63
|
-
current_script = Path(argv[0]).name
|
|
64
|
-
is_env_var_set = lambda s: len(environ.get(s) or '') > 0
|
|
65
|
-
is_html_script = current_script in HTML_SCRIPTS
|
|
65
|
+
# Parse args
|
|
66
|
+
args = parser.parse_args()
|
|
67
|
+
is_html_script = parser.prog in HTML_SCRIPTS
|
|
66
68
|
|
|
67
69
|
args.debug = args.deep_debug or args.debug or is_env_var_set('DEBUG')
|
|
68
70
|
args.names = [None if n == 'None' else n for n in (args.names or [])]
|
|
@@ -70,8 +72,33 @@ args.output_emails = args.output_emails or args.all_emails
|
|
|
70
72
|
args.output_other = args.output_other or args.all_other_files or args.uninteresting
|
|
71
73
|
args.overwrite_pickle = args.overwrite_pickle or (is_env_var_set('OVERWRITE_PICKLE') and not is_env_var_set('PICKLED'))
|
|
72
74
|
args.width = args.width if is_html_script else None
|
|
73
|
-
|
|
74
|
-
|
|
75
|
+
|
|
76
|
+
if is_html_script:
|
|
77
|
+
if args.positional_args:
|
|
78
|
+
exit_with_error(f"{parser.prog} does not accept positional arguments (receeived {args.positional_args})")
|
|
79
|
+
|
|
80
|
+
if parser.prog == EPSTEIN_GENERATE:
|
|
81
|
+
if any([is_output_arg(arg) and val for arg, val in vars(args).items()]):
|
|
82
|
+
if args.email_timeline:
|
|
83
|
+
exit_with_error(f"--email-timeline option is mutually exlusive with other output options")
|
|
84
|
+
elif not args.email_timeline:
|
|
85
|
+
logger.warning(f"No output section chosen; outputting default selection of texts, selected emails, and other files...")
|
|
86
|
+
args.output_texts = args.output_emails = args.output_other = True
|
|
87
|
+
|
|
88
|
+
if args.build == DEFAULT_FILE:
|
|
89
|
+
if args.all_emails:
|
|
90
|
+
args.build = ALL_EMAILS_PATH
|
|
91
|
+
elif args.email_timeline:
|
|
92
|
+
args.build = CHRONOLOGICAL_EMAILS_PATH
|
|
93
|
+
else:
|
|
94
|
+
args.build = TEXT_MSGS_HTML_PATH
|
|
95
|
+
elif parser.prog.startswith('epstein_') and not args.positional_args:
|
|
96
|
+
exit_with_error(f"{parser.prog} requires positional arguments but got none!")
|
|
97
|
+
|
|
98
|
+
if args.names:
|
|
99
|
+
logger.warning(f"Output restricted to {args.names}")
|
|
100
|
+
args.output_other = False
|
|
101
|
+
|
|
75
102
|
|
|
76
103
|
# Log level args
|
|
77
104
|
if args.deep_debug:
|
|
@@ -83,15 +110,7 @@ elif args.suppress_logs:
|
|
|
83
110
|
elif not env_log_level:
|
|
84
111
|
logger.setLevel(logging.WARNING)
|
|
85
112
|
|
|
86
|
-
logger.
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
logger.warning(f"No output section chosen; outputting default selection of texts, selected emails, and other files...")
|
|
91
|
-
args.output_texts = args.output_emails = args.output_other = True
|
|
92
|
-
|
|
93
|
-
if args.debug:
|
|
94
|
-
logger.warning(f"Invocation args:\ncurrent_script={current_script}\nis_html_script={is_html_script},\nis_output_selected={is_any_output_selected},\nargs={args}")
|
|
95
|
-
|
|
96
|
-
if args.names:
|
|
97
|
-
logger.warning(f"Output restricted to {args.names}")
|
|
113
|
+
logger.debug(f'Log level set to {logger.level}...')
|
|
114
|
+
args_str = ',\n'.join([f"{k}={v}" for k, v in vars(args).items() if v])
|
|
115
|
+
logger.info(f"'{parser.prog}' script invoked\n{args_str}")
|
|
116
|
+
logger.debug(f"Reading Epstein documents from '{DOCS_DIR}'...")
|
|
@@ -11,11 +11,10 @@ FILENAME_LENGTH = len(HOUSE_OVERSIGHT_PREFIX) + 6
|
|
|
11
11
|
KB = 1024
|
|
12
12
|
MB = KB * KB
|
|
13
13
|
|
|
14
|
-
file_size = lambda file_path: Path(file_path).stat().st_size
|
|
15
|
-
|
|
16
14
|
# Coerce methods handle both string and int arguments.
|
|
17
15
|
coerce_file_name = lambda filename_or_id: coerce_file_stem(filename_or_id) + '.txt'
|
|
18
16
|
coerce_file_path = lambda filename_or_id: DOCS_DIR.joinpath(coerce_file_name(filename_or_id))
|
|
17
|
+
file_size = lambda file_path: Path(file_path).stat().st_size
|
|
19
18
|
id_str = lambda id: f"{int(id):06d}"
|
|
20
19
|
|
|
21
20
|
|