atomicshop 2.6.8__py3-none-any.whl → 2.6.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of atomicshop might be problematic. Click here for more details.

atomicshop/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
1
  """Atomic Basic functions and classes to make developer life easier"""
2
2
 
3
3
  __author__ = "Den Kras"
4
- __version__ = '2.6.8'
4
+ __version__ = '2.6.10'
@@ -1,8 +1,15 @@
1
+ import os
2
+
1
3
  from atomicshop.wrappers.factw.fact_extractor import get_extractor
4
+ from atomicshop.wrappers.factw import config_install
5
+ from atomicshop import permissions, filesystem
2
6
 
3
7
 
4
8
  def main():
5
9
  get_extractor.get_extractor_script()
10
+ fact_extractor_executable_path: str = (
11
+ filesystem.get_working_directory() + os.sep + config_install.FACT_EXTRACTOR_FILE_NAME)
12
+ permissions.set_executable_permission(fact_extractor_executable_path)
6
13
 
7
14
 
8
15
  if __name__ == '__main__':
@@ -0,0 +1,189 @@
1
+ # This was written before 'search_in_archive', currently search_in_archive is in test mode.
2
+ # So probably this will go away eventually.
3
+ import os
4
+ import zipfile
5
+ from io import BytesIO
6
+
7
+
8
+ def _get_unique_filename(directory, filename):
9
+ """
10
+ Generates a unique filename by appending a number if the file already exists.
11
+ """
12
+ name, ext = os.path.splitext(filename)
13
+ counter = 1
14
+ unique_filename = filename
15
+ while os.path.exists(os.path.join(directory, unique_filename)):
16
+ unique_filename = f"{name}_{counter}{ext}"
17
+ counter += 1
18
+ return unique_filename
19
+
20
+
21
+ def _is_zip_file(file, zip_obj):
22
+ try:
23
+ with zip_obj.open(file) as file_data:
24
+ with zipfile.ZipFile(BytesIO(file_data.read())) as zip_file:
25
+ if zip_file.testzip() is None: # No errors found
26
+ return True
27
+ except zipfile.BadZipFile:
28
+ return False
29
+ return False
30
+
31
+
32
+ def _match_file_name(target, current, case_sensitive):
33
+ if case_sensitive:
34
+ return current.endswith(target)
35
+ else:
36
+ return current.lower().endswith(target.lower())
37
+
38
+
39
+ def _handle_nested_zip(
40
+ zip_obj, item, archived_file_bytes, file_names, results, found_set, recursive, return_first_only,
41
+ case_sensitive, callback_functions, extract_file_to_path):
42
+
43
+ if recursive and _is_zip_file(item.filename, zip_obj):
44
+ nested_zip_bytes = BytesIO(archived_file_bytes)
45
+ with zipfile.ZipFile(nested_zip_bytes) as nested_zip:
46
+ _search_in_zip(
47
+ nested_zip, file_names, results, found_set, case_sensitive, return_first_only, recursive,
48
+ callback_functions, extract_file_to_path)
49
+
50
+
51
+ def _handle_file_extraction(item, extract_file_to_path, archived_file_bytes):
52
+ if extract_file_to_path:
53
+ unique_filename = _get_unique_filename(extract_file_to_path, os.path.basename(item.filename))
54
+ with open(os.path.join(extract_file_to_path, unique_filename), 'wb') as f:
55
+ f.write(archived_file_bytes)
56
+
57
+
58
+ def _handle_callback_matching(item, archived_file_bytes, callback_functions, results, found_set, return_first_only):
59
+ for callback in callback_functions:
60
+ callback_result = callback(archived_file_bytes)
61
+ if callback_result:
62
+ # Initialize key for callback function name if not present
63
+ if callback.__name__ not in results:
64
+ results[callback.__name__] = []
65
+ file_info = {
66
+ 'bytes': archived_file_bytes,
67
+ 'name': item.filename,
68
+ 'size': item.file_size,
69
+ 'modified_time': item.date_time
70
+ }
71
+ results[callback.__name__].append(file_info)
72
+ if return_first_only:
73
+ found_set.add(item.filename)
74
+ return True
75
+ return False
76
+
77
+
78
+ def _handle_name_matching(item, archived_file_bytes, file_names, case_sensitive, results, found_set, return_first_only):
79
+ if any(_match_file_name(file_name, item.filename, case_sensitive) for file_name in file_names):
80
+ if item.filename not in results:
81
+ results[item.filename] = []
82
+ file_info = {
83
+ 'bytes': archived_file_bytes,
84
+ 'name': item.filename,
85
+ 'size': item.file_size,
86
+ 'modified_time': item.date_time
87
+ }
88
+ results[item.filename].append(file_info)
89
+ if return_first_only:
90
+ found_set.add(item.filename)
91
+
92
+
93
+ def _search_in_zip(
94
+ zip_obj, file_names, results, found_set, case_sensitive, return_first_only, recursive, callback_functions,
95
+ extract_file_to_path):
96
+
97
+ for item in zip_obj.infolist():
98
+ if item.filename.endswith('/'): # Skip directories
99
+ continue
100
+
101
+ with zip_obj.open(item) as file_data:
102
+ archived_file_bytes = file_data.read()
103
+
104
+ callback_matched = False
105
+ if callback_functions:
106
+ callback_matched = _handle_callback_matching(
107
+ item, archived_file_bytes, callback_functions, results, found_set, return_first_only)
108
+
109
+ if callback_matched:
110
+ _handle_file_extraction(item, extract_file_to_path, archived_file_bytes)
111
+ else:
112
+ _handle_nested_zip(
113
+ zip_obj, item, archived_file_bytes, file_names, results, found_set, recursive, return_first_only,
114
+ case_sensitive, callback_functions, extract_file_to_path)
115
+ if file_names and not callback_matched:
116
+ _handle_name_matching(
117
+ item, archived_file_bytes, file_names, case_sensitive, results, found_set, return_first_only)
118
+
119
+ if file_names is not None and len(found_set) == len(file_names):
120
+ break # All files found, stop searching
121
+
122
+
123
+ def _initialize_results(callback_functions):
124
+ if callback_functions:
125
+ return {callback.__name__: [] for callback in callback_functions}
126
+ else:
127
+ return {}
128
+
129
+
130
+ def _search_zip_content(
131
+ file_path, file_bytes, file_names_to_search, results, found_set, case_sensitive, return_first_only, recursive,
132
+ callback_functions, extract_file_to_path):
133
+ if file_bytes is not None:
134
+ with zipfile.ZipFile(BytesIO(file_bytes), 'r') as zip_ref:
135
+ _search_in_zip(zip_ref, file_names_to_search, results, found_set, case_sensitive, return_first_only,
136
+ recursive, callback_functions, extract_file_to_path)
137
+ elif file_path is not None:
138
+ with zipfile.ZipFile(file_path, 'r') as zip_ref:
139
+ _search_in_zip(zip_ref, file_names_to_search, results, found_set, case_sensitive, return_first_only,
140
+ recursive, callback_functions, extract_file_to_path)
141
+ else:
142
+ raise ValueError("Either file_path or file_bytes must be provided.")
143
+
144
+
145
+ def search_file_in_zip(
146
+ file_path: str = None,
147
+ file_bytes: bytes = None,
148
+ file_names_to_search: list[str] = None,
149
+ case_sensitive: bool = True,
150
+ return_first_only: bool = False,
151
+ return_empty_list_per_file_name: bool = False,
152
+ recursive: bool = False,
153
+ callback_functions: list = None,
154
+ extract_file_to_path: str = None
155
+ ) -> dict[str, list[bytes]]:
156
+ """
157
+ Function searches for the file names inside the zip file and returns a dictionary where the keys are the
158
+ names of the callback functions and the values are lists of found file bytes.
159
+ :param file_path: string, full path to the zip file.
160
+ :param file_bytes: bytes, the bytes of the zip file.
161
+ :param file_names_to_search: list of strings, the names of the files to search.
162
+ :param case_sensitive: boolean, default is 'True'. Determines if file name search should be case sensitive.
163
+ :param return_first_only: boolean, default is 'False'. Return only the first found file for each file name.
164
+ :param return_empty_list_per_file_name: boolean, default is 'False'.
165
+ True: Return empty list for each file name that wasn't found.
166
+ False: Don't return empty list for each file name that wasn't found.
167
+ :param recursive: boolean, default is 'False'. If True, search for file names recursively in nested zip files.
168
+ :param callback_functions: list of callables, default is None. Each function takes a file name and should return a
169
+ boolean that will tell the main function if this file is 'found' or not.
170
+ :param extract_file_to_path: string, full path to the directory where the found files should be extracted.
171
+ :return: dictionary of lists of bytes.
172
+ """
173
+
174
+ if file_names_to_search is None and callback_functions is None:
175
+ raise ValueError("Either file_names_to_search or callback_functions must be provided.")
176
+
177
+ # Initialize results dictionary.
178
+ results = _initialize_results(callback_functions)
179
+ found_set = set()
180
+
181
+ _search_zip_content(
182
+ file_path, file_bytes, file_names_to_search, results, found_set, case_sensitive, return_first_only, recursive,
183
+ callback_functions, extract_file_to_path)
184
+
185
+ if not return_empty_list_per_file_name:
186
+ # Filter out keys with empty lists.
187
+ results = {key: value for key, value in results.items() if value}
188
+
189
+ return results
@@ -0,0 +1,34 @@
1
+ import os
2
+ import shutil
3
+
4
+ from ..print_api import print_api
5
+
6
+
7
+ def extract_archive_with_shutil(file_path: str, target_directory: str, **kwargs) -> str:
8
+ """
9
+ Function extracts the archive to target directory.
10
+ Returns full path to extracted directory.
11
+ This function doesn't preserve the original date and time of files from the archive, instead the time of extraction
12
+ will be applied.
13
+
14
+ :param file_path: Full file path to archived file to extract.
15
+ :param target_directory: The directory on the filesystem to extract the file to.
16
+ :return: str.
17
+ """
18
+
19
+ print_api(f'Extracting {file_path}', **kwargs)
20
+
21
+ extracted_directory: str = str()
22
+
23
+ try:
24
+ shutil.unpack_archive(file_path, target_directory)
25
+ file_name = file_path.rsplit(os.sep, maxsplit=1)[1]
26
+ file_name_no_extension = file_name.rsplit('.', maxsplit=1)[0]
27
+ extracted_directory: str = target_directory + os.sep + file_name_no_extension
28
+ except Exception as exception_object:
29
+ print_api(f'Error extracting: {file_path}', error_type=True, **kwargs)
30
+ print_api(exception_object, error_type=True, **kwargs)
31
+ pass
32
+
33
+ print_api(f'Extracted to: {extracted_directory}', **kwargs)
34
+ return extracted_directory
@@ -0,0 +1,234 @@
1
+ import os
2
+ import zipfile
3
+ from io import BytesIO
4
+
5
+ from . import zip, sevenz
6
+
7
+ import py7zr
8
+
9
+
10
+ def _get_unique_filename(directory, filename):
11
+ """
12
+ Generates a unique filename by appending a number if the file already exists.
13
+ """
14
+ name, ext = os.path.splitext(filename)
15
+ counter = 1
16
+ unique_filename = filename
17
+ while os.path.exists(os.path.join(directory, unique_filename)):
18
+ unique_filename = f"{name}_{counter}{ext}"
19
+ counter += 1
20
+ return unique_filename
21
+
22
+
23
+ def _is_zip_file(file, zip_obj):
24
+ try:
25
+ with zip_obj.open(file) as file_data:
26
+ with zipfile.ZipFile(BytesIO(file_data.read())) as zip_file:
27
+ if zip_file.testzip() is None: # No errors found
28
+ return True
29
+ except zipfile.BadZipFile:
30
+ return False
31
+ return False
32
+
33
+
34
+ def _match_file_name(target, current, case_sensitive):
35
+ if case_sensitive:
36
+ return current.endswith(target)
37
+ else:
38
+ return current.lower().endswith(target.lower())
39
+
40
+
41
+ def _handle_nested_zip(
42
+ zip_obj, item, archived_file_bytes, file_names, results, found_set, recursive, return_first_only,
43
+ case_sensitive, callback_functions, extract_file_to_path):
44
+
45
+ if recursive and _is_zip_file(item.filename, zip_obj):
46
+ nested_zip_bytes = BytesIO(archived_file_bytes)
47
+ with zipfile.ZipFile(nested_zip_bytes) as nested_zip:
48
+ _search_in_archive(
49
+ nested_zip, file_names, results, found_set, case_sensitive, return_first_only, recursive,
50
+ callback_functions, extract_file_to_path)
51
+
52
+
53
+ def _handle_file_extraction(item, extract_file_to_path, archived_file_bytes):
54
+ if extract_file_to_path:
55
+ unique_filename = _get_unique_filename(extract_file_to_path, os.path.basename(item.filename))
56
+ with open(os.path.join(extract_file_to_path, unique_filename), 'wb') as f:
57
+ f.write(archived_file_bytes)
58
+
59
+
60
+ def _handle_callback_matching(
61
+ item, archive_type, archived_file_bytes, callback_functions, results, found_set, return_first_only):
62
+
63
+ for callback in callback_functions:
64
+ callback_result = callback(archived_file_bytes)
65
+ if callback_result:
66
+ # Initialize key for callback function name if not present
67
+ if callback.__name__ not in results:
68
+ results[callback.__name__] = []
69
+
70
+ if archive_type == 'zip':
71
+ file_info = {
72
+ 'bytes': archived_file_bytes,
73
+ 'name': item.filename,
74
+ 'size': item.file_size,
75
+ 'modified_time': item.date_time
76
+ }
77
+ elif archive_type == '7z':
78
+ file_info = {
79
+ 'bytes': archived_file_bytes,
80
+ 'name': item.filename,
81
+ 'size': item.uncompressed,
82
+ 'modified_time': item.creationtime
83
+ }
84
+ results[callback.__name__].append(file_info)
85
+ if return_first_only:
86
+ found_set.add(item.filename)
87
+ return True
88
+ return False
89
+
90
+
91
+ def _handle_name_matching(item, archived_file_bytes, file_names, case_sensitive, results, found_set, return_first_only):
92
+ if any(_match_file_name(file_name, item.filename, case_sensitive) for file_name in file_names):
93
+ if item.filename not in results:
94
+ results[item.filename] = []
95
+ file_info = {
96
+ 'bytes': archived_file_bytes,
97
+ 'name': item.filename,
98
+ 'size': item.file_size,
99
+ 'modified_time': item.date_time
100
+ }
101
+ results[item.filename].append(file_info)
102
+ if return_first_only:
103
+ found_set.add(item.filename)
104
+
105
+
106
+ def _search_in_archive(
107
+ arch_obj, archive_type, file_names, results, found_set, case_sensitive, return_first_only, recursive,
108
+ callback_functions, extract_file_to_path):
109
+
110
+ file_info_list = None
111
+ if archive_type == 'zip':
112
+ file_info_list = arch_obj.infolist()
113
+ elif archive_type == '7z':
114
+ file_info_list = arch_obj.list()
115
+
116
+ for item in file_info_list:
117
+ if item.filename.endswith('/'): # Skip directories
118
+ continue
119
+
120
+ archived_file_bytes = None
121
+ if archive_type == 'zip':
122
+ with arch_obj.open(item) as file_data:
123
+ archived_file_bytes = file_data.read()
124
+ elif archive_type == '7z':
125
+ file_dict = arch_obj.read(item.filename)
126
+ archived_file_bytes = file_dict[item.filename].read()
127
+
128
+ callback_matched = False
129
+ if callback_functions:
130
+ callback_matched = _handle_callback_matching(
131
+ item, archive_type, archived_file_bytes, callback_functions, results, found_set, return_first_only)
132
+
133
+ if callback_matched:
134
+ _handle_file_extraction(item, extract_file_to_path, archived_file_bytes)
135
+ else:
136
+ _handle_nested_zip(
137
+ arch_obj, item, archived_file_bytes, file_names, results, found_set, recursive, return_first_only,
138
+ case_sensitive, callback_functions, extract_file_to_path)
139
+ if file_names and not callback_matched:
140
+ _handle_name_matching(
141
+ item, archived_file_bytes, file_names, case_sensitive, results, found_set, return_first_only)
142
+
143
+ if file_names is not None and len(found_set) == len(file_names):
144
+ break # All files found, stop searching
145
+
146
+
147
+ def _initialize_results(callback_functions):
148
+ if callback_functions:
149
+ return {callback.__name__: [] for callback in callback_functions}
150
+ else:
151
+ return {}
152
+
153
+
154
+ def _open_archive(archive_type, file_like_object):
155
+ if archive_type == 'zip':
156
+ return zipfile.ZipFile(file_like_object, 'r')
157
+ elif archive_type == '7z':
158
+ return py7zr.SevenZipFile(file_like_object, 'r')
159
+ else:
160
+ raise ValueError("Unsupported archive format.")
161
+
162
+
163
+ def _get_archive_type(file_path, file_bytes) -> tuple:
164
+ if file_bytes is not None:
165
+ file_like_object = BytesIO(file_bytes)
166
+ elif file_path is not None:
167
+ file_like_object = file_path
168
+ else:
169
+ raise ValueError("Either file_path or file_bytes must be provided.")
170
+
171
+ if zip.is_zip_zipfile(file_path=file_like_object):
172
+ return 'zip', file_like_object
173
+ elif sevenz.is_7z(file_path=file_like_object):
174
+ return '7z', file_like_object
175
+ else:
176
+ raise ValueError("Unsupported archive format.")
177
+
178
+
179
+ def _search_archive_content(
180
+ file_path, file_bytes, file_names_to_search, results, found_set, case_sensitive, return_first_only, recursive,
181
+ callback_functions, extract_file_to_path):
182
+
183
+ archive_type, file_like_object = _get_archive_type(file_path, file_bytes)
184
+
185
+ with _open_archive(archive_type, file_like_object) as archive_ref:
186
+ _search_in_archive(archive_ref, archive_type, file_names_to_search, results, found_set, case_sensitive, return_first_only,
187
+ recursive, callback_functions, extract_file_to_path)
188
+
189
+
190
+ def search_file_in_archive(
191
+ file_path: str = None,
192
+ file_bytes: bytes = None,
193
+ file_names_to_search: list[str] = None,
194
+ case_sensitive: bool = True,
195
+ return_first_only: bool = False,
196
+ return_empty_list_per_file_name: bool = False,
197
+ recursive: bool = False,
198
+ callback_functions: list = None,
199
+ extract_file_to_path: str = None
200
+ ) -> dict[str, list[bytes]]:
201
+ """
202
+ Function searches for the file names inside the zip file and returns a dictionary where the keys are the
203
+ names of the callback functions and the values are lists of found file bytes.
204
+ :param file_path: string, full path to the zip file.
205
+ :param file_bytes: bytes, the bytes of the zip file.
206
+ :param file_names_to_search: list of strings, the names of the files to search.
207
+ :param case_sensitive: boolean, default is 'True'. Determines if file name search should be case sensitive.
208
+ :param return_first_only: boolean, default is 'False'. Return only the first found file for each file name.
209
+ :param return_empty_list_per_file_name: boolean, default is 'False'.
210
+ True: Return empty list for each file name that wasn't found.
211
+ False: Don't return empty list for each file name that wasn't found.
212
+ :param recursive: boolean, default is 'False'. If True, search for file names recursively in nested zip files.
213
+ :param callback_functions: list of callables, default is None. Each function takes a file name and should return a
214
+ boolean that will tell the main function if this file is 'found' or not.
215
+ :param extract_file_to_path: string, full path to the directory where the found files should be extracted.
216
+ :return: dictionary of lists of bytes.
217
+ """
218
+
219
+ if file_names_to_search is None and callback_functions is None:
220
+ raise ValueError("Either file_names_to_search or callback_functions must be provided.")
221
+
222
+ # Initialize results dictionary.
223
+ results = _initialize_results(callback_functions)
224
+ found_set = set()
225
+
226
+ _search_archive_content(
227
+ file_path, file_bytes, file_names_to_search, results, found_set, case_sensitive, return_first_only, recursive,
228
+ callback_functions, extract_file_to_path)
229
+
230
+ if not return_empty_list_per_file_name:
231
+ # Filter out keys with empty lists.
232
+ results = {key: value for key, value in results.items() if value}
233
+
234
+ return results
@@ -0,0 +1,16 @@
1
+ import py7zr
2
+
3
+
4
+ def is_7z(file_path: str) -> bool:
5
+ """
6
+ Function checks if the file is a 7z file.
7
+ :param file_path: string, full path to the file.
8
+ :return: boolean.
9
+ """
10
+
11
+ try:
12
+ with py7zr.SevenZipFile(file_path) as archive:
13
+ archive.testzip()
14
+ return True
15
+ except py7zr.Bad7zFile:
16
+ return False