atomicshop 2.6.8__py3-none-any.whl → 2.6.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of atomicshop might be problematic. Click here for more details.
- atomicshop/__init__.py +1 -1
- atomicshop/addons/mains/FACT/update_extract.py +7 -0
- atomicshop/archiver/_search_in_zip.py +189 -0
- atomicshop/archiver/archiver.py +34 -0
- atomicshop/archiver/search_in_archive.py +234 -0
- atomicshop/archiver/sevenz.py +16 -0
- atomicshop/archiver/zip.py +283 -0
- atomicshop/basics/bytes_arrays.py +56 -12
- atomicshop/file_io/jsons.py +11 -0
- atomicshop/file_io/xmls.py +20 -9
- atomicshop/web.py +2 -2
- atomicshop/wrappers/configparserw.py +24 -11
- atomicshop/wrappers/factw/config_install.py +2 -0
- atomicshop/wrappers/githubw.py +2 -2
- {atomicshop-2.6.8.dist-info → atomicshop-2.6.10.dist-info}/METADATA +3 -2
- {atomicshop-2.6.8.dist-info → atomicshop-2.6.10.dist-info}/RECORD +19 -15
- atomicshop/archiver.py +0 -152
- {atomicshop-2.6.8.dist-info → atomicshop-2.6.10.dist-info}/LICENSE.txt +0 -0
- {atomicshop-2.6.8.dist-info → atomicshop-2.6.10.dist-info}/WHEEL +0 -0
- {atomicshop-2.6.8.dist-info → atomicshop-2.6.10.dist-info}/top_level.txt +0 -0
atomicshop/__init__.py
CHANGED
|
@@ -1,8 +1,15 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
1
3
|
from atomicshop.wrappers.factw.fact_extractor import get_extractor
|
|
4
|
+
from atomicshop.wrappers.factw import config_install
|
|
5
|
+
from atomicshop import permissions, filesystem
|
|
2
6
|
|
|
3
7
|
|
|
4
8
|
def main():
|
|
5
9
|
get_extractor.get_extractor_script()
|
|
10
|
+
fact_extractor_executable_path: str = (
|
|
11
|
+
filesystem.get_working_directory() + os.sep + config_install.FACT_EXTRACTOR_FILE_NAME)
|
|
12
|
+
permissions.set_executable_permission(fact_extractor_executable_path)
|
|
6
13
|
|
|
7
14
|
|
|
8
15
|
if __name__ == '__main__':
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
# This was written before 'search_in_archive', currently search_in_archive is in test mode.
|
|
2
|
+
# So probably this will go away eventually.
|
|
3
|
+
import os
|
|
4
|
+
import zipfile
|
|
5
|
+
from io import BytesIO
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _get_unique_filename(directory, filename):
|
|
9
|
+
"""
|
|
10
|
+
Generates a unique filename by appending a number if the file already exists.
|
|
11
|
+
"""
|
|
12
|
+
name, ext = os.path.splitext(filename)
|
|
13
|
+
counter = 1
|
|
14
|
+
unique_filename = filename
|
|
15
|
+
while os.path.exists(os.path.join(directory, unique_filename)):
|
|
16
|
+
unique_filename = f"{name}_{counter}{ext}"
|
|
17
|
+
counter += 1
|
|
18
|
+
return unique_filename
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _is_zip_file(file, zip_obj):
|
|
22
|
+
try:
|
|
23
|
+
with zip_obj.open(file) as file_data:
|
|
24
|
+
with zipfile.ZipFile(BytesIO(file_data.read())) as zip_file:
|
|
25
|
+
if zip_file.testzip() is None: # No errors found
|
|
26
|
+
return True
|
|
27
|
+
except zipfile.BadZipFile:
|
|
28
|
+
return False
|
|
29
|
+
return False
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _match_file_name(target, current, case_sensitive):
|
|
33
|
+
if case_sensitive:
|
|
34
|
+
return current.endswith(target)
|
|
35
|
+
else:
|
|
36
|
+
return current.lower().endswith(target.lower())
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _handle_nested_zip(
|
|
40
|
+
zip_obj, item, archived_file_bytes, file_names, results, found_set, recursive, return_first_only,
|
|
41
|
+
case_sensitive, callback_functions, extract_file_to_path):
|
|
42
|
+
|
|
43
|
+
if recursive and _is_zip_file(item.filename, zip_obj):
|
|
44
|
+
nested_zip_bytes = BytesIO(archived_file_bytes)
|
|
45
|
+
with zipfile.ZipFile(nested_zip_bytes) as nested_zip:
|
|
46
|
+
_search_in_zip(
|
|
47
|
+
nested_zip, file_names, results, found_set, case_sensitive, return_first_only, recursive,
|
|
48
|
+
callback_functions, extract_file_to_path)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _handle_file_extraction(item, extract_file_to_path, archived_file_bytes):
|
|
52
|
+
if extract_file_to_path:
|
|
53
|
+
unique_filename = _get_unique_filename(extract_file_to_path, os.path.basename(item.filename))
|
|
54
|
+
with open(os.path.join(extract_file_to_path, unique_filename), 'wb') as f:
|
|
55
|
+
f.write(archived_file_bytes)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _handle_callback_matching(item, archived_file_bytes, callback_functions, results, found_set, return_first_only):
|
|
59
|
+
for callback in callback_functions:
|
|
60
|
+
callback_result = callback(archived_file_bytes)
|
|
61
|
+
if callback_result:
|
|
62
|
+
# Initialize key for callback function name if not present
|
|
63
|
+
if callback.__name__ not in results:
|
|
64
|
+
results[callback.__name__] = []
|
|
65
|
+
file_info = {
|
|
66
|
+
'bytes': archived_file_bytes,
|
|
67
|
+
'name': item.filename,
|
|
68
|
+
'size': item.file_size,
|
|
69
|
+
'modified_time': item.date_time
|
|
70
|
+
}
|
|
71
|
+
results[callback.__name__].append(file_info)
|
|
72
|
+
if return_first_only:
|
|
73
|
+
found_set.add(item.filename)
|
|
74
|
+
return True
|
|
75
|
+
return False
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _handle_name_matching(item, archived_file_bytes, file_names, case_sensitive, results, found_set, return_first_only):
|
|
79
|
+
if any(_match_file_name(file_name, item.filename, case_sensitive) for file_name in file_names):
|
|
80
|
+
if item.filename not in results:
|
|
81
|
+
results[item.filename] = []
|
|
82
|
+
file_info = {
|
|
83
|
+
'bytes': archived_file_bytes,
|
|
84
|
+
'name': item.filename,
|
|
85
|
+
'size': item.file_size,
|
|
86
|
+
'modified_time': item.date_time
|
|
87
|
+
}
|
|
88
|
+
results[item.filename].append(file_info)
|
|
89
|
+
if return_first_only:
|
|
90
|
+
found_set.add(item.filename)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _search_in_zip(
|
|
94
|
+
zip_obj, file_names, results, found_set, case_sensitive, return_first_only, recursive, callback_functions,
|
|
95
|
+
extract_file_to_path):
|
|
96
|
+
|
|
97
|
+
for item in zip_obj.infolist():
|
|
98
|
+
if item.filename.endswith('/'): # Skip directories
|
|
99
|
+
continue
|
|
100
|
+
|
|
101
|
+
with zip_obj.open(item) as file_data:
|
|
102
|
+
archived_file_bytes = file_data.read()
|
|
103
|
+
|
|
104
|
+
callback_matched = False
|
|
105
|
+
if callback_functions:
|
|
106
|
+
callback_matched = _handle_callback_matching(
|
|
107
|
+
item, archived_file_bytes, callback_functions, results, found_set, return_first_only)
|
|
108
|
+
|
|
109
|
+
if callback_matched:
|
|
110
|
+
_handle_file_extraction(item, extract_file_to_path, archived_file_bytes)
|
|
111
|
+
else:
|
|
112
|
+
_handle_nested_zip(
|
|
113
|
+
zip_obj, item, archived_file_bytes, file_names, results, found_set, recursive, return_first_only,
|
|
114
|
+
case_sensitive, callback_functions, extract_file_to_path)
|
|
115
|
+
if file_names and not callback_matched:
|
|
116
|
+
_handle_name_matching(
|
|
117
|
+
item, archived_file_bytes, file_names, case_sensitive, results, found_set, return_first_only)
|
|
118
|
+
|
|
119
|
+
if file_names is not None and len(found_set) == len(file_names):
|
|
120
|
+
break # All files found, stop searching
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _initialize_results(callback_functions):
|
|
124
|
+
if callback_functions:
|
|
125
|
+
return {callback.__name__: [] for callback in callback_functions}
|
|
126
|
+
else:
|
|
127
|
+
return {}
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def _search_zip_content(
|
|
131
|
+
file_path, file_bytes, file_names_to_search, results, found_set, case_sensitive, return_first_only, recursive,
|
|
132
|
+
callback_functions, extract_file_to_path):
|
|
133
|
+
if file_bytes is not None:
|
|
134
|
+
with zipfile.ZipFile(BytesIO(file_bytes), 'r') as zip_ref:
|
|
135
|
+
_search_in_zip(zip_ref, file_names_to_search, results, found_set, case_sensitive, return_first_only,
|
|
136
|
+
recursive, callback_functions, extract_file_to_path)
|
|
137
|
+
elif file_path is not None:
|
|
138
|
+
with zipfile.ZipFile(file_path, 'r') as zip_ref:
|
|
139
|
+
_search_in_zip(zip_ref, file_names_to_search, results, found_set, case_sensitive, return_first_only,
|
|
140
|
+
recursive, callback_functions, extract_file_to_path)
|
|
141
|
+
else:
|
|
142
|
+
raise ValueError("Either file_path or file_bytes must be provided.")
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def search_file_in_zip(
|
|
146
|
+
file_path: str = None,
|
|
147
|
+
file_bytes: bytes = None,
|
|
148
|
+
file_names_to_search: list[str] = None,
|
|
149
|
+
case_sensitive: bool = True,
|
|
150
|
+
return_first_only: bool = False,
|
|
151
|
+
return_empty_list_per_file_name: bool = False,
|
|
152
|
+
recursive: bool = False,
|
|
153
|
+
callback_functions: list = None,
|
|
154
|
+
extract_file_to_path: str = None
|
|
155
|
+
) -> dict[str, list[bytes]]:
|
|
156
|
+
"""
|
|
157
|
+
Function searches for the file names inside the zip file and returns a dictionary where the keys are the
|
|
158
|
+
names of the callback functions and the values are lists of found file bytes.
|
|
159
|
+
:param file_path: string, full path to the zip file.
|
|
160
|
+
:param file_bytes: bytes, the bytes of the zip file.
|
|
161
|
+
:param file_names_to_search: list of strings, the names of the files to search.
|
|
162
|
+
:param case_sensitive: boolean, default is 'True'. Determines if file name search should be case sensitive.
|
|
163
|
+
:param return_first_only: boolean, default is 'False'. Return only the first found file for each file name.
|
|
164
|
+
:param return_empty_list_per_file_name: boolean, default is 'False'.
|
|
165
|
+
True: Return empty list for each file name that wasn't found.
|
|
166
|
+
False: Don't return empty list for each file name that wasn't found.
|
|
167
|
+
:param recursive: boolean, default is 'False'. If True, search for file names recursively in nested zip files.
|
|
168
|
+
:param callback_functions: list of callables, default is None. Each function takes a file name and should return a
|
|
169
|
+
boolean that will tell the main function if this file is 'found' or not.
|
|
170
|
+
:param extract_file_to_path: string, full path to the directory where the found files should be extracted.
|
|
171
|
+
:return: dictionary of lists of bytes.
|
|
172
|
+
"""
|
|
173
|
+
|
|
174
|
+
if file_names_to_search is None and callback_functions is None:
|
|
175
|
+
raise ValueError("Either file_names_to_search or callback_functions must be provided.")
|
|
176
|
+
|
|
177
|
+
# Initialize results dictionary.
|
|
178
|
+
results = _initialize_results(callback_functions)
|
|
179
|
+
found_set = set()
|
|
180
|
+
|
|
181
|
+
_search_zip_content(
|
|
182
|
+
file_path, file_bytes, file_names_to_search, results, found_set, case_sensitive, return_first_only, recursive,
|
|
183
|
+
callback_functions, extract_file_to_path)
|
|
184
|
+
|
|
185
|
+
if not return_empty_list_per_file_name:
|
|
186
|
+
# Filter out keys with empty lists.
|
|
187
|
+
results = {key: value for key, value in results.items() if value}
|
|
188
|
+
|
|
189
|
+
return results
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import shutil
|
|
3
|
+
|
|
4
|
+
from ..print_api import print_api
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def extract_archive_with_shutil(file_path: str, target_directory: str, **kwargs) -> str:
|
|
8
|
+
"""
|
|
9
|
+
Function extracts the archive to target directory.
|
|
10
|
+
Returns full path to extracted directory.
|
|
11
|
+
This function doesn't preserve the original date and time of files from the archive, instead the time of extraction
|
|
12
|
+
will be applied.
|
|
13
|
+
|
|
14
|
+
:param file_path: Full file path to archived file to extract.
|
|
15
|
+
:param target_directory: The directory on the filesystem to extract the file to.
|
|
16
|
+
:return: str.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
print_api(f'Extracting {file_path}', **kwargs)
|
|
20
|
+
|
|
21
|
+
extracted_directory: str = str()
|
|
22
|
+
|
|
23
|
+
try:
|
|
24
|
+
shutil.unpack_archive(file_path, target_directory)
|
|
25
|
+
file_name = file_path.rsplit(os.sep, maxsplit=1)[1]
|
|
26
|
+
file_name_no_extension = file_name.rsplit('.', maxsplit=1)[0]
|
|
27
|
+
extracted_directory: str = target_directory + os.sep + file_name_no_extension
|
|
28
|
+
except Exception as exception_object:
|
|
29
|
+
print_api(f'Error extracting: {file_path}', error_type=True, **kwargs)
|
|
30
|
+
print_api(exception_object, error_type=True, **kwargs)
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
print_api(f'Extracted to: {extracted_directory}', **kwargs)
|
|
34
|
+
return extracted_directory
|
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import zipfile
|
|
3
|
+
from io import BytesIO
|
|
4
|
+
|
|
5
|
+
from . import zip, sevenz
|
|
6
|
+
|
|
7
|
+
import py7zr
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _get_unique_filename(directory, filename):
|
|
11
|
+
"""
|
|
12
|
+
Generates a unique filename by appending a number if the file already exists.
|
|
13
|
+
"""
|
|
14
|
+
name, ext = os.path.splitext(filename)
|
|
15
|
+
counter = 1
|
|
16
|
+
unique_filename = filename
|
|
17
|
+
while os.path.exists(os.path.join(directory, unique_filename)):
|
|
18
|
+
unique_filename = f"{name}_{counter}{ext}"
|
|
19
|
+
counter += 1
|
|
20
|
+
return unique_filename
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _is_zip_file(file, zip_obj):
|
|
24
|
+
try:
|
|
25
|
+
with zip_obj.open(file) as file_data:
|
|
26
|
+
with zipfile.ZipFile(BytesIO(file_data.read())) as zip_file:
|
|
27
|
+
if zip_file.testzip() is None: # No errors found
|
|
28
|
+
return True
|
|
29
|
+
except zipfile.BadZipFile:
|
|
30
|
+
return False
|
|
31
|
+
return False
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _match_file_name(target, current, case_sensitive):
|
|
35
|
+
if case_sensitive:
|
|
36
|
+
return current.endswith(target)
|
|
37
|
+
else:
|
|
38
|
+
return current.lower().endswith(target.lower())
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _handle_nested_zip(
|
|
42
|
+
zip_obj, item, archived_file_bytes, file_names, results, found_set, recursive, return_first_only,
|
|
43
|
+
case_sensitive, callback_functions, extract_file_to_path):
|
|
44
|
+
|
|
45
|
+
if recursive and _is_zip_file(item.filename, zip_obj):
|
|
46
|
+
nested_zip_bytes = BytesIO(archived_file_bytes)
|
|
47
|
+
with zipfile.ZipFile(nested_zip_bytes) as nested_zip:
|
|
48
|
+
_search_in_archive(
|
|
49
|
+
nested_zip, file_names, results, found_set, case_sensitive, return_first_only, recursive,
|
|
50
|
+
callback_functions, extract_file_to_path)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def _handle_file_extraction(item, extract_file_to_path, archived_file_bytes):
|
|
54
|
+
if extract_file_to_path:
|
|
55
|
+
unique_filename = _get_unique_filename(extract_file_to_path, os.path.basename(item.filename))
|
|
56
|
+
with open(os.path.join(extract_file_to_path, unique_filename), 'wb') as f:
|
|
57
|
+
f.write(archived_file_bytes)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _handle_callback_matching(
|
|
61
|
+
item, archive_type, archived_file_bytes, callback_functions, results, found_set, return_first_only):
|
|
62
|
+
|
|
63
|
+
for callback in callback_functions:
|
|
64
|
+
callback_result = callback(archived_file_bytes)
|
|
65
|
+
if callback_result:
|
|
66
|
+
# Initialize key for callback function name if not present
|
|
67
|
+
if callback.__name__ not in results:
|
|
68
|
+
results[callback.__name__] = []
|
|
69
|
+
|
|
70
|
+
if archive_type == 'zip':
|
|
71
|
+
file_info = {
|
|
72
|
+
'bytes': archived_file_bytes,
|
|
73
|
+
'name': item.filename,
|
|
74
|
+
'size': item.file_size,
|
|
75
|
+
'modified_time': item.date_time
|
|
76
|
+
}
|
|
77
|
+
elif archive_type == '7z':
|
|
78
|
+
file_info = {
|
|
79
|
+
'bytes': archived_file_bytes,
|
|
80
|
+
'name': item.filename,
|
|
81
|
+
'size': item.uncompressed,
|
|
82
|
+
'modified_time': item.creationtime
|
|
83
|
+
}
|
|
84
|
+
results[callback.__name__].append(file_info)
|
|
85
|
+
if return_first_only:
|
|
86
|
+
found_set.add(item.filename)
|
|
87
|
+
return True
|
|
88
|
+
return False
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _handle_name_matching(item, archived_file_bytes, file_names, case_sensitive, results, found_set, return_first_only):
|
|
92
|
+
if any(_match_file_name(file_name, item.filename, case_sensitive) for file_name in file_names):
|
|
93
|
+
if item.filename not in results:
|
|
94
|
+
results[item.filename] = []
|
|
95
|
+
file_info = {
|
|
96
|
+
'bytes': archived_file_bytes,
|
|
97
|
+
'name': item.filename,
|
|
98
|
+
'size': item.file_size,
|
|
99
|
+
'modified_time': item.date_time
|
|
100
|
+
}
|
|
101
|
+
results[item.filename].append(file_info)
|
|
102
|
+
if return_first_only:
|
|
103
|
+
found_set.add(item.filename)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _search_in_archive(
|
|
107
|
+
arch_obj, archive_type, file_names, results, found_set, case_sensitive, return_first_only, recursive,
|
|
108
|
+
callback_functions, extract_file_to_path):
|
|
109
|
+
|
|
110
|
+
file_info_list = None
|
|
111
|
+
if archive_type == 'zip':
|
|
112
|
+
file_info_list = arch_obj.infolist()
|
|
113
|
+
elif archive_type == '7z':
|
|
114
|
+
file_info_list = arch_obj.list()
|
|
115
|
+
|
|
116
|
+
for item in file_info_list:
|
|
117
|
+
if item.filename.endswith('/'): # Skip directories
|
|
118
|
+
continue
|
|
119
|
+
|
|
120
|
+
archived_file_bytes = None
|
|
121
|
+
if archive_type == 'zip':
|
|
122
|
+
with arch_obj.open(item) as file_data:
|
|
123
|
+
archived_file_bytes = file_data.read()
|
|
124
|
+
elif archive_type == '7z':
|
|
125
|
+
file_dict = arch_obj.read(item.filename)
|
|
126
|
+
archived_file_bytes = file_dict[item.filename].read()
|
|
127
|
+
|
|
128
|
+
callback_matched = False
|
|
129
|
+
if callback_functions:
|
|
130
|
+
callback_matched = _handle_callback_matching(
|
|
131
|
+
item, archive_type, archived_file_bytes, callback_functions, results, found_set, return_first_only)
|
|
132
|
+
|
|
133
|
+
if callback_matched:
|
|
134
|
+
_handle_file_extraction(item, extract_file_to_path, archived_file_bytes)
|
|
135
|
+
else:
|
|
136
|
+
_handle_nested_zip(
|
|
137
|
+
arch_obj, item, archived_file_bytes, file_names, results, found_set, recursive, return_first_only,
|
|
138
|
+
case_sensitive, callback_functions, extract_file_to_path)
|
|
139
|
+
if file_names and not callback_matched:
|
|
140
|
+
_handle_name_matching(
|
|
141
|
+
item, archived_file_bytes, file_names, case_sensitive, results, found_set, return_first_only)
|
|
142
|
+
|
|
143
|
+
if file_names is not None and len(found_set) == len(file_names):
|
|
144
|
+
break # All files found, stop searching
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _initialize_results(callback_functions):
|
|
148
|
+
if callback_functions:
|
|
149
|
+
return {callback.__name__: [] for callback in callback_functions}
|
|
150
|
+
else:
|
|
151
|
+
return {}
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _open_archive(archive_type, file_like_object):
|
|
155
|
+
if archive_type == 'zip':
|
|
156
|
+
return zipfile.ZipFile(file_like_object, 'r')
|
|
157
|
+
elif archive_type == '7z':
|
|
158
|
+
return py7zr.SevenZipFile(file_like_object, 'r')
|
|
159
|
+
else:
|
|
160
|
+
raise ValueError("Unsupported archive format.")
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _get_archive_type(file_path, file_bytes) -> tuple:
|
|
164
|
+
if file_bytes is not None:
|
|
165
|
+
file_like_object = BytesIO(file_bytes)
|
|
166
|
+
elif file_path is not None:
|
|
167
|
+
file_like_object = file_path
|
|
168
|
+
else:
|
|
169
|
+
raise ValueError("Either file_path or file_bytes must be provided.")
|
|
170
|
+
|
|
171
|
+
if zip.is_zip_zipfile(file_path=file_like_object):
|
|
172
|
+
return 'zip', file_like_object
|
|
173
|
+
elif sevenz.is_7z(file_path=file_like_object):
|
|
174
|
+
return '7z', file_like_object
|
|
175
|
+
else:
|
|
176
|
+
raise ValueError("Unsupported archive format.")
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _search_archive_content(
|
|
180
|
+
file_path, file_bytes, file_names_to_search, results, found_set, case_sensitive, return_first_only, recursive,
|
|
181
|
+
callback_functions, extract_file_to_path):
|
|
182
|
+
|
|
183
|
+
archive_type, file_like_object = _get_archive_type(file_path, file_bytes)
|
|
184
|
+
|
|
185
|
+
with _open_archive(archive_type, file_like_object) as archive_ref:
|
|
186
|
+
_search_in_archive(archive_ref, archive_type, file_names_to_search, results, found_set, case_sensitive, return_first_only,
|
|
187
|
+
recursive, callback_functions, extract_file_to_path)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def search_file_in_archive(
|
|
191
|
+
file_path: str = None,
|
|
192
|
+
file_bytes: bytes = None,
|
|
193
|
+
file_names_to_search: list[str] = None,
|
|
194
|
+
case_sensitive: bool = True,
|
|
195
|
+
return_first_only: bool = False,
|
|
196
|
+
return_empty_list_per_file_name: bool = False,
|
|
197
|
+
recursive: bool = False,
|
|
198
|
+
callback_functions: list = None,
|
|
199
|
+
extract_file_to_path: str = None
|
|
200
|
+
) -> dict[str, list[bytes]]:
|
|
201
|
+
"""
|
|
202
|
+
Function searches for the file names inside the zip file and returns a dictionary where the keys are the
|
|
203
|
+
names of the callback functions and the values are lists of found file bytes.
|
|
204
|
+
:param file_path: string, full path to the zip file.
|
|
205
|
+
:param file_bytes: bytes, the bytes of the zip file.
|
|
206
|
+
:param file_names_to_search: list of strings, the names of the files to search.
|
|
207
|
+
:param case_sensitive: boolean, default is 'True'. Determines if file name search should be case sensitive.
|
|
208
|
+
:param return_first_only: boolean, default is 'False'. Return only the first found file for each file name.
|
|
209
|
+
:param return_empty_list_per_file_name: boolean, default is 'False'.
|
|
210
|
+
True: Return empty list for each file name that wasn't found.
|
|
211
|
+
False: Don't return empty list for each file name that wasn't found.
|
|
212
|
+
:param recursive: boolean, default is 'False'. If True, search for file names recursively in nested zip files.
|
|
213
|
+
:param callback_functions: list of callables, default is None. Each function takes a file name and should return a
|
|
214
|
+
boolean that will tell the main function if this file is 'found' or not.
|
|
215
|
+
:param extract_file_to_path: string, full path to the directory where the found files should be extracted.
|
|
216
|
+
:return: dictionary of lists of bytes.
|
|
217
|
+
"""
|
|
218
|
+
|
|
219
|
+
if file_names_to_search is None and callback_functions is None:
|
|
220
|
+
raise ValueError("Either file_names_to_search or callback_functions must be provided.")
|
|
221
|
+
|
|
222
|
+
# Initialize results dictionary.
|
|
223
|
+
results = _initialize_results(callback_functions)
|
|
224
|
+
found_set = set()
|
|
225
|
+
|
|
226
|
+
_search_archive_content(
|
|
227
|
+
file_path, file_bytes, file_names_to_search, results, found_set, case_sensitive, return_first_only, recursive,
|
|
228
|
+
callback_functions, extract_file_to_path)
|
|
229
|
+
|
|
230
|
+
if not return_empty_list_per_file_name:
|
|
231
|
+
# Filter out keys with empty lists.
|
|
232
|
+
results = {key: value for key, value in results.items() if value}
|
|
233
|
+
|
|
234
|
+
return results
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import py7zr
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def is_7z(file_path: str) -> bool:
|
|
5
|
+
"""
|
|
6
|
+
Function checks if the file is a 7z file.
|
|
7
|
+
:param file_path: string, full path to the file.
|
|
8
|
+
:return: boolean.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
with py7zr.SevenZipFile(file_path) as archive:
|
|
13
|
+
archive.testzip()
|
|
14
|
+
return True
|
|
15
|
+
except py7zr.Bad7zFile:
|
|
16
|
+
return False
|