opsci-toolbox 0.0.5__py3-none-any.whl → 0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,12 +15,27 @@ import pyarrow.parquet as pq
15
15
  from datetime import datetime
16
16
  import hashlib
17
17
  import ast
18
+ import subprocess
18
19
 
19
20
  ####################################################################################################
20
21
  # FILE LOADERS
21
22
  ####################################################################################################
22
23
 
23
- def load_file(path, delimiter = ";", decimal ="."):
24
+ def load_file(path: str, delimiter: str = ";", decimal: str = ".") -> pd.DataFrame:
25
+ """
26
+ Load a file into a Pandas DataFrame based on the file extension.
27
+
28
+ Parameters:
29
+ path (str): The file path to load.
30
+ delimiter (str, optional): The delimiter used in CSV/TSV files. Default is ";".
31
+ decimal (str, optional): The character used for decimal points in CSV/TSV files. Default is ".".
32
+
33
+ Returns:
34
+ pd.DataFrame: The loaded data as a Pandas DataFrame.
35
+
36
+ Raises:
37
+ ValueError: If the file extension is not supported.
38
+ """
24
39
  extension = os.path.splitext(os.path.basename(path))[1]
25
40
  if extension == ".parquet":
26
41
  df = load_parquet(path)
@@ -38,9 +53,18 @@ def load_file(path, delimiter = ";", decimal ="."):
38
53
  print("Check your input file. Extension isn't supported : .parquet, .pickle, .json, .jsonl, .csv, .tsv")
39
54
  return df
40
55
 
41
- def load_parquet(path):
56
+ def load_parquet(path: str) -> pd.DataFrame:
42
57
  """
43
- Load a parquet file into a DataFrame
58
+ Load a parquet file into a DataFrame.
59
+
60
+ Parameters:
61
+ path (str): The file path to the parquet file.
62
+
63
+ Returns:
64
+ pd.DataFrame: The loaded data as a Pandas DataFrame.
65
+
66
+ Raises:
67
+ Exception: If there is an error reading the parquet file.
44
68
  """
45
69
  try:
46
70
  table = pq.read_table(path)
@@ -50,78 +74,108 @@ def load_parquet(path):
50
74
  print(e)
51
75
  return df
52
76
 
53
- # def load_pickle(path: str):
54
- # """
55
- # Load a pickle file into a dataframe
56
- # """
57
-
58
- # with open(path, 'rb') as f:
59
- # df=pickle.load(f)
60
- # return df
61
- def load_pickle(path):
62
- return pd.read_pickle(path)
63
-
64
- def write_pickle(data, path, filename):
77
+ def load_pickle(path: str) -> pd.DataFrame:
65
78
  """
66
- Write a dataframe into a pickle file
79
+ Load a pickle file into a DataFrame.
80
+
81
+ Parameters:
82
+ path (str): The file path to the pickle file.
83
+
84
+ Returns:
85
+ pd.DataFrame: The loaded data as a Pandas DataFrame.
67
86
  """
68
- file_path=os.path.join(path, filename+'.pickle')
69
- with open(file_path, 'wb') as f:
70
- pickle.dump(data, f)
71
- return file_path
87
+ return pd.read_pickle(path)
72
88
 
73
89
 
74
- def load_json(path: str):
90
+ def load_json(path: str) -> pd.DataFrame:
75
91
  """
76
- Load a json file into a DataFrame
92
+ Load a JSON file into a DataFrame.
93
+
94
+ Parameters:
95
+ path (str): The file path to the JSON file.
96
+
97
+ Returns:
98
+ pd.DataFrame: The loaded data as a Pandas DataFrame.
99
+
100
+ Raises:
101
+ Exception: If there is an error reading the JSON file.
77
102
  """
78
- df=pd.DataFrame()
103
+ df = pd.DataFrame()
79
104
  try:
80
105
  with open(path, 'r') as json_file:
81
106
  data = json.load(json_file)
82
- df=pd.json_normalize(data)
83
-
107
+ df = pd.json_normalize(data)
84
108
  except Exception as e:
85
- pass
86
- print(e)
109
+ print(f"Error reading the JSON file: {e}")
110
+ raise
87
111
  return df
88
112
 
89
- def load_jsonl(path: str):
113
+ def load_jsonl(path: str) -> pd.DataFrame:
90
114
  """
91
- Load a jsonl file into a dataframe
115
+ Load a JSON Lines (jsonl) file into a DataFrame.
116
+
117
+ Parameters:
118
+ path (str): The file path to the jsonl file.
119
+
120
+ Returns:
121
+ pd.DataFrame: The loaded data as a Pandas DataFrame.
122
+
123
+ Raises:
124
+ Exception: If there is an error reading the jsonl file.
92
125
  """
93
126
  df = pd.DataFrame()
94
127
  try:
95
128
  data = []
96
129
  with open(path, 'r') as json_file:
97
- for line in tqdm(json_file):
130
+ for line in tqdm(json_file, desc="Loading JSON Lines"):
98
131
  try:
99
132
  data.append(json.loads(line))
100
- except:
101
- pass
133
+ except json.JSONDecodeError as line_error:
134
+ print(f"Error decoding line: {line_error}")
102
135
 
103
136
  df = pd.json_normalize(data)
104
137
  except Exception as e:
105
- pass
106
- print(e)
138
+ print(f"Error reading the jsonl file: {e}")
139
+ raise
107
140
  return df
108
141
 
109
142
 
110
- def load_csv(path: str, delimiter: str =";", decimal:str ="."):
143
+ def load_csv(path: str, delimiter: str = ";", decimal: str = ".") -> pd.DataFrame:
111
144
  """
112
- Load a csv file into a dataframe
145
+ Load a CSV file into a DataFrame.
146
+
147
+ Parameters:
148
+ path (str): The file path to the CSV file.
149
+ delimiter (str, optional): The delimiter used in the CSV file. Default is ";".
150
+ decimal (str, optional): The character used for decimal points in the CSV file. Default is ".".
151
+
152
+ Returns:
153
+ pd.DataFrame: The loaded data as a Pandas DataFrame.
154
+
155
+ Raises:
156
+ Exception: If there is an error reading the CSV file.
113
157
  """
114
- df= pd.DataFrame()
158
+ df = pd.DataFrame()
115
159
  try:
116
160
  df = pd.read_csv(path, delimiter=delimiter, encoding="utf-8", decimal=decimal)
117
161
  except Exception as e:
118
- pass
119
- print(e)
162
+ print(f"Error reading the CSV file: {e}")
163
+ raise
120
164
  return df
121
165
 
122
- def read_txt_to_list(file_path: str):
166
+ def read_txt_to_list(file_path: str) -> list[str]:
123
167
  """
124
- Read a text file line by line and append to a Python list
168
+ Read a text file line by line and append to a Python list.
169
+
170
+ Parameters:
171
+ file_path (str): The file path to the text file.
172
+
173
+ Returns:
174
+ list[str]: A list of lines read from the text file.
175
+
176
+ Raises:
177
+ FileNotFoundError: If the file does not exist.
178
+ Exception: If any other error occurs during file reading.
125
179
  """
126
180
 
127
181
  # Initialize an empty list to store the lines
@@ -136,12 +190,22 @@ def read_txt_to_list(file_path: str):
136
190
  print(f"File not found: {file_path}")
137
191
  except Exception as e:
138
192
  print(f"An error occurred: {e}")
193
+ raise
139
194
  return lines
140
195
 
141
-
142
- def read_json(path: str):
196
+ def read_json(path: str) -> dict:
143
197
  """
144
- Read a json file and return a dict
198
+ Read a JSON file and return a dictionary.
199
+
200
+ Parameters:
201
+ path (str): The file path to the JSON file.
202
+
203
+ Returns:
204
+ dict: The data read from the JSON file as a dictionary.
205
+
206
+ Raises:
207
+ FileNotFoundError: If the file does not exist.
208
+ Exception: If there is an error reading the JSON file.
145
209
  """
146
210
  with open(path, 'r') as json_file:
147
211
  data = json.load(json_file)
@@ -149,25 +213,55 @@ def read_json(path: str):
149
213
 
150
214
  def read_txt_file(file_path: str) -> str:
151
215
  """
152
- Read a text file
216
+ Read the content of a text file and return it as a string.
217
+
218
+ Parameters:
219
+ file_path (str): The file path to the text file.
220
+
221
+ Returns:
222
+ str: The content of the text file as a string.
223
+
224
+ Raises:
225
+ FileNotFoundError: If the file does not exist.
226
+ Exception: If there is an error reading the text file.
153
227
  """
154
- with open(file_path, 'r') as file:
155
- content = file.read()
228
+ try:
229
+ with open(file_path, 'r') as file:
230
+ content = file.read()
231
+ except FileNotFoundError:
232
+ print(f"File not found: {file_path}")
233
+ raise
234
+ except Exception as e:
235
+ print(f"An error occurred while reading the file: {e}")
236
+ raise
156
237
  return content
157
238
 
158
- def read_jsonl(path: str):
239
+ def read_jsonl(path: str) -> list[dict]:
159
240
  """
160
- Load a jsonl file into a dataframe
241
+ Load a JSON Lines (jsonl) file into a list of dictionaries.
242
+
243
+ Parameters:
244
+ path (str): The file path to the jsonl file.
245
+
246
+ Returns:
247
+ list[dict]: A list of dictionaries containing the data read from the JSON Lines file.
248
+
249
+ Raises:
250
+ FileNotFoundError: If the file does not exist.
251
+ Exception: If there is an error reading the jsonl file.
161
252
  """
162
253
  json_data = []
163
- with open(path, 'r') as json_file:
164
- for line in tqdm(json_file):
165
- try:
166
- json_data.append(json.loads(line))
167
- except Exception as e:
168
- pass
169
- print(e)
170
-
254
+ try:
255
+ with open(path, 'r') as json_file:
256
+ for line in tqdm(json_file, desc="Reading JSON Lines"):
257
+ try:
258
+ json_data.append(json.loads(line))
259
+ except Exception as e:
260
+ print(f"Error decoding line: {e}")
261
+ raise
262
+ except FileNotFoundError:
263
+ print(f"File not found: {path}")
264
+ raise
171
265
  return json_data
172
266
 
173
267
 
@@ -176,37 +270,55 @@ def read_jsonl(path: str):
176
270
  #########################################################################################
177
271
 
178
272
 
179
- # def write_pickle(df: pd.DataFrame, path: str, name: str):
180
- # """
181
- # Write a dataframe into a pickle file
182
- # """
183
- # file_path=os.path.join(path, name+'.pickle')
273
+ def write_pickle(data: pd.DataFrame, path: str, filename: str) -> str:
274
+ """
275
+ Write a DataFrame into a pickle file.
276
+
277
+ Parameters:
278
+ data (pd.DataFrame): The DataFrame to be written to the pickle file.
279
+ path (str): The directory where the pickle file will be saved.
280
+ filename (str): The name of the pickle file (without the extension).
184
281
 
185
- # with open(file_path, 'wb') as f:
186
- # pickle.dump(df, f)
187
- # return file_path
282
+ Returns:
283
+ str: The full path to the saved pickle file.
284
+ """
285
+ file_path = os.path.join(path, filename + '.pickle')
286
+ with open(file_path, 'wb') as f:
287
+ pickle.dump(data, f)
288
+ return file_path
188
289
 
189
290
 
190
- def write_list_to_txt(input_list: list, path: str, name: str):
291
+ def write_list_to_txt(input_list: list, path: str, name: str) -> str:
191
292
  """
192
293
  Write a list to a text file, with each item on a new line.
193
294
 
194
295
  Parameters:
195
- - file_path (str): The path to the text file.
196
296
  - input_list (list): The list to be written to the text file.
297
+ - path (str): The directory path where the text file will be saved.
298
+ - name (str): The name of the text file (without the extension).
299
+
300
+ Returns:
301
+ str: The full path to the saved text file.
197
302
  """
198
- file_path=os.path.join(path, name+'.txt')
303
+ file_path = os.path.join(path, name + '.txt')
199
304
  with open(file_path, 'w') as file:
200
305
  for item in input_list:
201
306
  file.write(str(item) + '\n')
202
-
203
307
  return file_path
204
308
 
205
- def write_jsonl(data: list, path: str, name: str):
309
+ def write_jsonl(data: list[dict], path: str, name: str) -> str:
206
310
  """
207
- Write a jsonl file. Function takes as input a list of dict.
311
+ Write data to a JSON Lines (jsonl) file. Each dictionary in the list represents a single JSON object.
312
+
313
+ Parameters:
314
+ - data (list[dict]): The list of dictionaries to be written to the JSON Lines file.
315
+ - path (str): The directory path where the JSON Lines file will be saved.
316
+ - name (str): The name of the JSON Lines file (without the extension).
317
+
318
+ Returns:
319
+ str: The full path to the saved JSON Lines file.
208
320
  """
209
- file_path=os.path.join(path, name+'.jsonl')
321
+ file_path = os.path.join(path, name + '.jsonl')
210
322
  with open(file_path, 'w') as file:
211
323
  for entry in data:
212
324
  json.dump(entry, file)
@@ -214,41 +326,67 @@ def write_jsonl(data: list, path: str, name: str):
214
326
  return file_path
215
327
 
216
328
 
217
- def write_json(json_dict: dict, path: str, name: str):
329
+ def write_json(json_dict: dict, path: str, name: str) -> str:
218
330
  """
219
- Write a dict into a json file
331
+ Write a dictionary to a JSON file.
332
+
333
+ Parameters:
334
+ - json_dict (dict): The dictionary to be written to the JSON file.
335
+ - path (str): The directory path where the JSON file will be saved.
336
+ - name (str): The name of the JSON file (without the extension).
337
+
338
+ Returns:
339
+ str: The full path to the saved JSON file.
220
340
  """
221
- file_path=os.path.join(path, name+'.json')
341
+ file_path = os.path.join(path, name + '.json')
222
342
  with open(file_path, 'w') as outfile:
223
343
  json.dump(json_dict, outfile)
224
344
  return file_path
225
345
 
226
346
 
227
- def write_dataframe_to_json(df: pd.DataFrame, path: str, name: str, orient='records'):
347
+ def write_dataframe_to_json(df: pd.DataFrame, path: str, name: str, orient: str = 'records') -> str:
228
348
  """
229
- Write a dataframe into a json file
349
+ Write a DataFrame to a JSON file.
350
+
351
+ Parameters:
352
+ - df (pd.DataFrame): The DataFrame to be written to the JSON file.
353
+ - path (str): The directory path where the JSON file will be saved.
354
+ - name (str): The name of the JSON file (without the extension).
355
+ - orient (str, optional): The format of the JSON file. Default is 'records'.
356
+
357
+ Returns:
358
+ str: The full path to the saved JSON file.
230
359
  """
231
- file_path=os.path.join(path, name+".json")
360
+ file_path = os.path.join(path, name + ".json")
232
361
  df.to_json(file_path, orient=orient, lines=True)
362
+ return file_path
233
363
 
234
364
 
235
- def save_dataframe_excel(df: pd.DataFrame, path: str, name :str, sheet_name:str):
365
+ def save_dataframe_excel(df: pd.DataFrame, path: str, name: str, sheet_name: str) -> str:
236
366
  """
237
- Write a dataframe into a XLSX file
367
+ Write a DataFrame to an Excel file.
368
+
369
+ Parameters:
370
+ - df (pd.DataFrame): The DataFrame to be written to the Excel file.
371
+ - path (str): The directory path where the Excel file will be saved.
372
+ - name (str): The name of the Excel file (without the extension).
373
+ - sheet_name (str): The name of the Excel sheet.
374
+
375
+ Returns:
376
+ str: The full path to the saved Excel file.
238
377
  """
239
-
240
- file_path=os.path.join(path, f"{name}.xlsx")
378
+ file_path = os.path.join(path, f"{name}.xlsx")
241
379
  df.to_excel(file_path, sheet_name=sheet_name, index=False)
242
380
  print(file_path, "- File created")
243
381
  return file_path
244
382
 
245
- def add_dataframe_to_excel(df: pd.DataFrame, existing_file_path: str, new_sheet_name: str):
383
+ def add_dataframe_to_excel(df: pd.DataFrame, existing_file_path: str, new_sheet_name: str) -> None:
246
384
  """
247
385
  Adds a DataFrame to an existing Excel file as a new sheet.
248
386
 
249
387
  Parameters:
388
+ - df (pd.DataFrame): The DataFrame to be added.
250
389
  - existing_file_path (str): Path to the existing Excel file.
251
- - dataframe (pd.DataFrame): The DataFrame to be added.
252
390
  - new_sheet_name (str): Name of the new sheet in the Excel file.
253
391
 
254
392
  Returns:
@@ -257,7 +395,7 @@ def add_dataframe_to_excel(df: pd.DataFrame, existing_file_path: str, new_sheet_
257
395
  # Read existing Excel file into a dictionary of DataFrames
258
396
  excel_file = pd.read_excel(existing_file_path, sheet_name=None)
259
397
 
260
- # Add the new DataFrame to the dictionary with the specified sheet aname
398
+ # Add the new DataFrame to the dictionary with the specified sheet name
261
399
  excel_file[new_sheet_name] = df
262
400
 
263
401
  # Write the updated dictionary of DataFrames back to the Excel file
@@ -265,46 +403,62 @@ def add_dataframe_to_excel(df: pd.DataFrame, existing_file_path: str, new_sheet_
265
403
  for sheet_name, df in excel_file.items():
266
404
  df.to_excel(writer, sheet_name=sheet_name, index=False)
267
405
 
268
- def save_dataframe_csv(df: pd.DataFrame, path: str, name: str):
406
+ def save_dataframe_csv(df: pd.DataFrame, path: str, name: str) -> str:
269
407
  """
270
- This function saves a DataFrame to a CSV file within a project directory.
271
-
272
- :param df: The DataFrame to be saved.
273
- :type df: pandas.DataFrame
274
-
275
- :param dir_csv: The directory where the CSV file will be saved.
276
- :type dir_csv: str
277
-
278
- :param name: The desired name for the CSV file (without extension).
279
- :type name: str
408
+ Save a DataFrame to a CSV file within a specified directory.
409
+
410
+ Parameters:
411
+ - df (pd.DataFrame): The DataFrame to be saved.
412
+ - path (str): The directory where the CSV file will be saved.
413
+ - name (str): The desired name for the CSV file (without extension).
414
+
415
+ Returns:
416
+ str: The full path to the saved CSV file.
280
417
  """
281
- names = df.columns
418
+ file_path = os.path.join(path, f"{name}.csv")
282
419
  df.to_csv(
283
- os.path.join(path, f"{name}.csv"),
284
- header=names,
420
+ file_path,
285
421
  sep=";",
286
422
  encoding="utf-8",
287
423
  index=False,
288
424
  decimal=",",
289
425
  )
290
- print("FILE SAVED: ", os.path.join(path, f"{name}.csv"))
426
+ print("File saved:", file_path)
427
+ return file_path
291
428
 
292
- def write_txt_file(data: str, path: str, name: str):
429
+ def write_txt_file(data: str, path: str, name: str) -> str:
293
430
  """
294
- Write a text file
431
+ Write a string to a text file.
432
+
433
+ Parameters:
434
+ - data (str): The string to be written to the text file.
435
+ - path (str): The directory path where the text file will be saved.
436
+ - name (str): The name of the text file (without the extension).
437
+
438
+ Returns:
439
+ str: The full path to the saved text file.
295
440
  """
296
- file_path=os.path.join(path, name+'.txt')
441
+ file_path = os.path.join(path, name + '.txt')
297
442
  with open(file_path, "w") as file:
298
443
  file.write(data)
299
444
  return file_path
300
445
 
301
- def split_df_into_chunks(df, path, name, chunk_size = 10000):
446
+ def split_df_into_chunks(df: pd.DataFrame, path: str, name: str, chunk_size: int = 10000) -> list[str]:
302
447
  """
303
- Split a dataframe into n pickle files
448
+ Split a DataFrame into multiple pickle files with a specified chunk size.
449
+
450
+ Parameters:
451
+ - df (pd.DataFrame): The DataFrame to be split.
452
+ - path (str): The directory path where the pickle files will be saved.
453
+ - name (str): The base name for the pickle files.
454
+ - chunk_size (int, optional): The size of each chunk. Default is 10000.
455
+
456
+ Returns:
457
+ list[str]: A list of file paths to the saved pickle files.
304
458
  """
305
459
  num_chunks = -(-len(df) // chunk_size) # Calculate the number of chunks using ceil division
306
460
 
307
- file_paths=[]
461
+ file_paths = []
308
462
 
309
463
  # create smaller datasets of chunk_size each
310
464
  for i in range(num_chunks):
@@ -317,16 +471,19 @@ def split_df_into_chunks(df, path, name, chunk_size = 10000):
317
471
 
318
472
  return file_paths
319
473
 
320
-
321
-
322
474
  ###################################################################################################
323
475
  # FOLDERS / FILES HELPERS
324
476
  ###################################################################################################
325
477
 
326
- def create_dir(path:str):
478
+ def create_dir(path: str) -> str:
327
479
  """
328
- Create a local directory
480
+ Create a local directory if it doesn't exist.
481
+
482
+ Parameters:
483
+ - path (str): The directory path to be created.
329
484
 
485
+ Returns:
486
+ str: The path of the created directory.
330
487
  """
331
488
  if not os.path.exists(path):
332
489
  os.makedirs(path)
@@ -334,18 +491,31 @@ def create_dir(path:str):
334
491
  return path
335
492
 
336
493
 
337
- def list_files_in_dir(path: str, filetype:str ='*.json'):
494
+ def list_files_in_dir(path: str, filetype: str = '*.json') -> list[str]:
338
495
  """
339
- List files of a specific format in a directory
496
+ List files of a specific format in a directory.
497
+
498
+ Parameters:
499
+ - path (str): The directory path to search for files.
500
+ - filetype (str, optional): The file type pattern to search for. Default is '*.json'.
501
+
502
+ Returns:
503
+ list[str]: A list of file paths matching the specified file type pattern.
340
504
  """
341
505
  pattern = os.path.join(path, filetype)
342
506
  files = glob.glob(pattern)
343
507
  return files
344
508
 
345
509
 
346
- def list_subdirectories(root_directory: str):
510
+ def list_subdirectories(root_directory: str) -> list[str]:
347
511
  """
348
- List subdirectories in a root directory
512
+ List subdirectories in a root directory.
513
+
514
+ Parameters:
515
+ - root_directory (str): The root directory path.
516
+
517
+ Returns:
518
+ list[str]: A list of subdirectory names.
349
519
  """
350
520
  subdirectories = []
351
521
  for entry in os.scandir(root_directory):
@@ -354,9 +524,15 @@ def list_subdirectories(root_directory: str):
354
524
  return subdirectories
355
525
 
356
526
 
357
- def list_recursive_subdirectories(root_directory: str):
527
+ def list_recursive_subdirectories(root_directory: str) -> list[str]:
358
528
  """
359
- List recursively all subdirectories from a root directory
529
+ List recursively all subdirectories from a root directory.
530
+
531
+ Parameters:
532
+ - root_directory (str): The root directory path.
533
+
534
+ Returns:
535
+ list[str]: A list of subdirectory paths.
360
536
  """
361
537
  subdirectories = []
362
538
  for root, dirs, files in os.walk(root_directory):
@@ -364,9 +540,16 @@ def list_recursive_subdirectories(root_directory: str):
364
540
  return subdirectories
365
541
 
366
542
 
367
- def list_files_in_subdirectories(path:str, filetype:str='*.json'):
543
+ def list_files_in_subdirectories(path: str, filetype: str = '*.json') -> list[str]:
368
544
  """
369
- Walk through subdirectories of a root directory to list files of a specific format
545
+ Walk through subdirectories of a root directory to list files of a specific format.
546
+
547
+ Parameters:
548
+ - path (str): The root directory path.
549
+ - filetype (str, optional): The file type pattern to search for. Default is '*.json'.
550
+
551
+ Returns:
552
+ list[str]: A list of file paths matching the specified file type pattern in subdirectories.
370
553
  """
371
554
  files = []
372
555
 
@@ -381,21 +564,36 @@ def list_files_in_subdirectories(path:str, filetype:str='*.json'):
381
564
 
382
565
  return files
383
566
 
384
- def copy_file(source_path: str, destination_path: str, new_filename:str):
567
+ def copy_file(source_path: str, destination_path: str, new_filename: str = '') -> str:
385
568
  """
386
- Function to copy a file to another path
569
+ Copy a file from a source path to a destination path.
570
+
571
+ Parameters:
572
+ - source_path (str): The path of the source file.
573
+ - destination_path (str): The path of the destination directory.
574
+ - new_filename (str, optional): The new filename. If not provided, the original filename is used.
575
+
576
+ Returns:
577
+ str: The path of the copied file.
387
578
  """
388
579
  if new_filename:
389
- file_path=os.path.join(destination_path, new_filename)
580
+ file_path = os.path.join(destination_path, new_filename)
390
581
  else:
391
- filename=os.path.basename(source_path)
392
- file_path=os.path.join(destination_path,filename)
582
+ filename = os.path.basename(source_path)
583
+ file_path = os.path.join(destination_path, filename)
584
+
393
585
  shutil.copy(source_path, file_path)
394
586
  return file_path
395
587
 
396
- def remove_file(file_path):
588
+ def remove_file(file_path: str) -> None:
397
589
  """
398
- Remove a single file
590
+ Remove a single file.
591
+
592
+ Parameters:
593
+ - file_path (str): The path of the file to be removed.
594
+
595
+ Returns:
596
+ None
399
597
  """
400
598
  try:
401
599
  os.remove(file_path)
@@ -403,20 +601,33 @@ def remove_file(file_path):
403
601
  except OSError as e:
404
602
  print(f"Error removing file {file_path}: {e}")
405
603
 
406
- def remove_folder(folder_path):
604
+ def remove_folder(folder_path: str) -> None:
407
605
  """
408
- Remove a folder and all the files inside
606
+ Remove a folder and all its contents.
607
+
608
+ Parameters:
609
+ - folder_path (str): The path of the folder to be removed.
610
+
611
+ Returns:
612
+ None
409
613
  """
410
614
  try:
411
615
  shutil.rmtree(folder_path)
412
616
  print(f"Folder {folder_path} and its contents removed successfully.")
413
617
  except OSError as e:
414
- print(f"Error removing folder {folder_path}: {e}")
618
+ print(f"Error removing folder {folder_path}: {e}")
415
619
 
416
620
 
417
- def get_file_size(file_path):
621
+ def get_file_size(file_path: str) -> tuple[int, str]:
418
622
  """
419
- Get a single file size in a readable format (KB, MB, GB)
623
+ Get the size of a single file in a readable format (KB, MB, GB).
624
+
625
+ Parameters:
626
+ - file_path (str): The path of the file.
627
+
628
+ Returns:
629
+ tuple[int, str]: A tuple containing the size of the file in bytes and its formatted size.
630
+ If the file is not found, returns None.
420
631
  """
421
632
  try:
422
633
  size = os.path.getsize(file_path)
@@ -439,9 +650,16 @@ def get_file_size(file_path):
439
650
  print(f"File not found: {file_path}")
440
651
  return None
441
652
 
442
- def get_folder_size(folder_path):
653
+ def get_folder_size(folder_path: str) -> tuple[int, str]:
443
654
  """
444
- Get size of all files contained in a folder in a readable format (KB, MB, GB)
655
+ Get the size of all files contained in a folder in a readable format (KB, MB, GB).
656
+
657
+ Parameters:
658
+ - folder_path (str): The path of the folder.
659
+
660
+ Returns:
661
+ tuple[int, str]: A tuple containing the total size of all files in bytes and its formatted size.
662
+ If the folder is not found, returns None.
445
663
  """
446
664
  total_size = 0
447
665
 
@@ -469,9 +687,16 @@ def get_folder_size(folder_path):
469
687
  print(f"Folder not found: {folder_path}")
470
688
  return None
471
689
 
472
- def file_creation_date(file_path):
690
+ def file_creation_date(file_path: str) -> datetime:
473
691
  """
474
- Return the last update timestamp
692
+ Return the last update timestamp of a file.
693
+
694
+ Parameters:
695
+ - file_path (str): The path of the file.
696
+
697
+ Returns:
698
+ datetime: The last update timestamp as a datetime object.
699
+ If the file does not exist, returns None.
475
700
  """
476
701
  # Check if the file exists
477
702
  if os.path.exists(file_path):
@@ -488,27 +713,34 @@ def file_creation_date(file_path):
488
713
  ############################################################################
489
714
 
490
715
 
491
- def transform_to_n_items_list(input_list : list, n: int):
716
+ def transform_to_n_items_list(lst: list, n: int) -> list[list]:
492
717
  """
493
718
  Transform a list into a list of n-items sublists.
494
719
 
495
720
  Parameters:
496
- - input_list: The input list to be transformed.
497
- - n: The number of items in each sublist.
721
+ - lst (list): The input list to be transformed.
722
+ - n (int): The number of items in each sublist.
498
723
 
499
724
  Returns:
500
- A list of n-items sublists.
725
+ list[list]: A list of n-items sublists.
501
726
  """
502
- return [input_list[i:i + n] for i in range(0, len(input_list), n)]
727
+ return [lst[i:i + n] for i in range(0, len(lst), n)]
503
728
 
504
- def unduplicate_list(lst):
729
+
730
+ def unduplicate_list(lst: list) -> list:
505
731
  """
506
- Unduplicate elements of a list
732
+ Remove duplicate elements from a list.
733
+
734
+ Parameters:
735
+ - lst (list): The input list with possible duplicate elements.
736
+
737
+ Returns:
738
+ list: A list with duplicate elements removed.
507
739
  """
508
740
  return list(set(lst))
509
741
 
510
742
 
511
- def sort_list(lst, reverse=False):
743
+ def sort_list(lst: list, reverse: bool = False) -> list:
512
744
  """
513
745
  Sort the list in ascending or descending order.
514
746
 
@@ -518,12 +750,12 @@ def sort_list(lst, reverse=False):
518
750
  If False (default), sort the list in ascending order.
519
751
 
520
752
  Returns:
521
- - list: A new list sorted based on the specified order.
753
+ list: A new list sorted based on the specified order.
522
754
  """
523
755
  return sorted(lst, reverse=reverse)
524
756
 
525
757
 
526
- def map_list(lst, function):
758
+ def map_list(lst: list, function: callable) -> list:
527
759
  """
528
760
  Apply a function to each element of the list.
529
761
 
@@ -532,12 +764,12 @@ def map_list(lst, function):
532
764
  - function (callable): The function to apply to each element.
533
765
 
534
766
  Returns:
535
- - list: A new list with the function applied to each element.
767
+ list: A new list with the function applied to each element.
536
768
  """
537
769
  return [function(element) for element in lst]
538
770
 
539
771
 
540
- def flatten_list(lst):
772
+ def flatten_list(lst: list) -> list:
541
773
  """
542
774
  Flatten a nested list into a single list.
543
775
 
@@ -545,7 +777,7 @@ def flatten_list(lst):
545
777
  - lst (list): The input nested list.
546
778
 
547
779
  Returns:
548
- - list: A new list with all nested elements flattened.
780
+ list: A new list with all nested elements flattened.
549
781
  """
550
782
  flattened_list = []
551
783
 
@@ -560,7 +792,7 @@ def flatten_list(lst):
560
792
  return flattened_list
561
793
 
562
794
 
563
- def find_occurrences(lst, element):
795
+ def find_occurrences(lst: list, element) -> int:
564
796
  """
565
797
  Find the occurrences of a specific element in the list.
566
798
 
@@ -569,12 +801,12 @@ def find_occurrences(lst, element):
569
801
  - element: The element to find occurrences of.
570
802
 
571
803
  Returns:
572
- - int: The number of occurrences of the specified element in the list.
804
+ int: The number of occurrences of the specified element in the list.
573
805
  """
574
806
  return lst.count(element)
575
807
 
576
808
 
577
- def is_subset(subset, superset):
809
+ def is_subset(subset: list, superset: list) -> bool:
578
810
  """
579
811
  Check if one list is a subset of another.
580
812
 
@@ -583,11 +815,11 @@ def is_subset(subset, superset):
583
815
  - superset (list): The superset list.
584
816
 
585
817
  Returns:
586
- - bool: True if the subset is a subset of the superset, False otherwise.
818
+ bool: True if the subset is a subset of the superset, False otherwise.
587
819
  """
588
820
  return all(element in superset for element in subset)
589
821
 
590
- def common_elements(list1, list2):
822
+ def common_elements(list1: list, list2: list) -> list:
591
823
  """
592
824
  Find the common elements between two lists.
593
825
 
@@ -596,12 +828,12 @@ def common_elements(list1, list2):
596
828
  - list2 (list): The second list.
597
829
 
598
830
  Returns:
599
- - list: A new list containing the common elements between list1 and list2.
831
+ list: A new list containing the common elements between list1 and list2.
600
832
  """
601
833
  return list(set(list1) & set(list2))
602
834
 
603
835
 
604
- def shuffle_list(lst):
836
+ def shuffle_list(lst: list) -> list:
605
837
  """
606
838
  Shuffle the elements of the list randomly.
607
839
 
@@ -609,14 +841,14 @@ def shuffle_list(lst):
609
841
  - lst (list): The input list.
610
842
 
611
843
  Returns:
612
- - list: A new list with the elements shuffled randomly.
844
+ list: A new list with the elements shuffled randomly.
613
845
  """
614
846
  shuffled_list = lst.copy()
615
847
  random.shuffle(shuffled_list)
616
848
  return shuffled_list
617
849
 
618
850
 
619
- def sample_list(lst, sample_size):
851
+ def sample_list(lst: list, sample_size) -> list:
620
852
  """
621
853
  Sample a list based on an integer or a float representing the sample size.
622
854
 
@@ -626,7 +858,11 @@ def sample_list(lst, sample_size):
626
858
  If a float, the percentage of elements to keep.
627
859
 
628
860
  Returns:
629
- - list: A new list containing the sampled elements.
861
+ list: A new list containing the sampled elements.
862
+
863
+ Raises:
864
+ - ValueError: If the sample size is invalid (negative integer or float outside [0, 1]).
865
+ - TypeError: If the sample size is neither an integer nor a float.
630
866
  """
631
867
  if isinstance(sample_size, int):
632
868
  if sample_size < 0:
@@ -640,7 +876,7 @@ def sample_list(lst, sample_size):
640
876
  else:
641
877
  raise TypeError("Sample size must be an integer or a float.")
642
878
 
643
- def count_elements(lst):
879
+ def count_elements(lst: list) -> dict:
644
880
  """
645
881
  Count the occurrences of each element in the list.
646
882
 
@@ -648,46 +884,70 @@ def count_elements(lst):
648
884
  - lst (list): The input list.
649
885
 
650
886
  Returns:
651
- - dict: A dictionary where keys are unique elements from the list, and values are their counts.
887
+ dict: A dictionary where keys are unique elements from the list, and values are their counts.
652
888
  """
653
889
  return dict(Counter(lst))
654
890
 
655
- def scale_list(lst, min_val=1, max_val=5):
891
+ def scale_list(lst: list, min_val: float = 1, max_val: float = 5) -> list:
892
+ """
893
+ Scale the values of a list to a specified range.
894
+
895
+ Parameters:
896
+ - lst (list): The input list of values to be scaled.
897
+ - min_val (float): The minimum value of the output range (default is 1).
898
+ - max_val (float): The maximum value of the output range (default is 5).
899
+
900
+ Returns:
901
+ - list: A new list with values scaled to the specified range.
902
+ """
656
903
  min_w = min(lst)
657
904
  max_w = max(lst)
658
- scaled_w = [ ]
905
+ scaled_w = []
659
906
  for x in lst:
660
907
  try:
661
908
  scaled_value = (x - min_w) / (max_w - min_w) * (max_val - min_val) + min_val
662
- except :
663
- pass
909
+ except ZeroDivisionError:
664
910
  scaled_value = min_val
665
-
666
911
  scaled_w.append(scaled_value)
667
912
  return scaled_w
668
913
 
669
- def df_scale_column(df, col_to_scale, col_out, min_val, max_val):
914
+
915
+ def df_scale_column(df: pd.DataFrame, col_to_scale: str, col_out: str, min_val: float, max_val: float) -> pd.DataFrame:
916
+ """
917
+ Scale values in a DataFrame column to a specified range.
918
+
919
+ Parameters:
920
+ - df (pd.DataFrame): The input DataFrame.
921
+ - col_to_scale (str): The name of the column to be scaled.
922
+ - col_out (str): The name of the new column to store scaled values.
923
+ - min_val (float): The minimum value of the output range.
924
+ - max_val (float): The maximum value of the output range.
925
+
926
+ Returns:
927
+ - pd.DataFrame: The DataFrame with a new column containing scaled values.
928
+ """
670
929
  min_freq = df[col_to_scale].min()
671
930
  max_freq = df[col_to_scale].max()
672
- df[col_out] = df[col_to_scale].apply(lambda x : ((x - min_freq) / (max_freq - min_freq)) * (max_val - min_val) + min_val)
931
+ df[col_out] = df[col_to_scale].apply(lambda x: ((x - min_freq) / (max_freq - min_freq)) * (max_val - min_val) + min_val)
673
932
  return df
674
933
 
675
934
  ############################################################################
676
935
  # ZIP HELPERS
677
936
  ############################################################################
678
937
 
679
- def zip_file(source_file_path, zip_file_path, name):
938
+ def zip_file(source_file_path: str, zip_file_path: str, name: str) -> str:
680
939
  """
681
940
  Zip a single file.
682
941
 
683
- Args:
684
- source_file_path (str): Path to the file to be zipped.
685
- zip_file_path (str): Path for the resulting zip file.
942
+ Parameters:
943
+ - source_file_path (str): Path to the file to be zipped.
944
+ - zip_file_path (str): Path for the resulting zip file.
945
+ - name (str): Name for the resulting zip file (without extension).
686
946
 
687
947
  Returns:
688
- None
948
+ str: Path to the resulting zip file.
689
949
  """
690
- file_path=os.path.join(zip_file_path, name+".zip")
950
+ file_path = os.path.join(zip_file_path, f"{name}.zip")
691
951
 
692
952
  with zipfile.ZipFile(file_path, 'w') as zip_file:
693
953
  # The second argument to `arcname` is used to set the name of the file inside the zip
@@ -695,18 +955,19 @@ def zip_file(source_file_path, zip_file_path, name):
695
955
 
696
956
  return file_path
697
957
 
698
- def zip_folder(source_folder_path, zip_file_path, name):
958
+ def zip_folder(source_folder_path: str, zip_file_path: str, name: str) -> str:
699
959
  """
700
960
  Zip an entire folder.
701
961
 
702
- Args:
703
- source_folder_path (str): Path to the folder to be zipped.
704
- zip_file_path (str): Path for the resulting zip file.
962
+ Parameters:
963
+ - source_folder_path (str): Path to the folder to be zipped.
964
+ - zip_file_path (str): Path for the resulting zip file.
965
+ - name (str): Name for the resulting zip file (without extension).
705
966
 
706
967
  Returns:
707
- None
968
+ str: Path to the resulting zip file.
708
969
  """
709
- file_path=os.path.join(zip_file_path, name+".zip")
970
+ file_path = os.path.join(zip_file_path, f"{name}.zip")
710
971
 
711
972
  with zipfile.ZipFile(file_path, 'w', zipfile.ZIP_DEFLATED) as zip_file:
712
973
  for foldername, subfolders, filenames in os.walk(source_folder_path):
@@ -717,13 +978,19 @@ def zip_folder(source_folder_path, zip_file_path, name):
717
978
 
718
979
  return file_path
719
980
 
720
- def unzip_file(zip_file_path, destination_path):
981
+ def unzip_file(zip_file_path: str, destination_path: str) -> None:
721
982
  """
722
- unzip a zip file
983
+ Unzip a zip file.
984
+
985
+ Parameters:
986
+ - zip_file_path (str): Path to the zip file to be unzipped.
987
+ - destination_path (str): Path where the contents of the zip file will be extracted.
988
+
989
+ Returns:
990
+ None
723
991
  """
724
992
  with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
725
993
  zip_ref.extractall(destination_path)
726
-
727
994
 
728
995
 
729
996
  ############################################################################
@@ -731,19 +998,32 @@ def unzip_file(zip_file_path, destination_path):
731
998
  ############################################################################
732
999
 
733
1000
 
734
- def create_google_spreadsheet_client(credentials:str):
1001
+ def create_google_spreadsheet_client(credentials: str):
735
1002
  """
736
- Create a Gspread client to interact with Google Sheets
1003
+ Create a Gspread client to interact with Google Sheets.
1004
+
1005
+ Parameters:
1006
+ - credentials (str): Path to the JSON file containing Google Service Account credentials.
1007
+
1008
+ Returns:
1009
+ gspread.Client: A client object for interacting with Google Sheets.
737
1010
  """
738
1011
  return gspread.service_account(filename=credentials)
739
1012
 
740
- def read_google_spreadsheet(client, sheet_id: str, worksheet_name: str):
1013
+ def read_google_spreadsheet(client: gspread.Client, sheet_id: str, worksheet_name: str) -> pd.DataFrame:
741
1014
  """
742
- Function to read a Google spreadsheet in a DataFrame
1015
+ Read data from a Google spreadsheet and return it as a DataFrame.
1016
+
1017
+ Parameters:
1018
+ - client (gspread.Client): A Gspread client object authenticated with Google Sheets API.
1019
+ - sheet_id (str): The ID of the Google spreadsheet.
1020
+ - worksheet_name (str): The name of the worksheet within the spreadsheet.
1021
+
1022
+ Returns:
1023
+ pd.DataFrame: A DataFrame containing the data from the specified worksheet.
743
1024
  """
744
1025
  try:
745
-
746
- # Open the Google Spreadsheet by name
1026
+ # Open the Google Spreadsheet by ID
747
1027
  sheet = client.open_by_key(sheet_id)
748
1028
 
749
1029
  # Select a specific worksheet by name
@@ -763,29 +1043,52 @@ def read_google_spreadsheet(client, sheet_id: str, worksheet_name: str):
763
1043
  print(f"An error occurred: {e}")
764
1044
 
765
1045
 
766
- def list_google_worksheets(client, sheet_id:str):
1046
+ def list_google_worksheets(client: gspread.Client, sheet_id: str) -> list:
767
1047
  """
768
- Return a list of worksheet names for a spreadsheet ID
1048
+ Return a list of worksheet names for a spreadsheet ID.
1049
+
1050
+ Parameters:
1051
+ - client (gspread.Client): A Gspread client object authenticated with Google Sheets API.
1052
+ - sheet_id (str): The ID of the Google spreadsheet.
1053
+
1054
+ Returns:
1055
+ list: A list of worksheet names.
769
1056
  """
770
1057
  sheet = client.open_by_key(sheet_id)
771
1058
  worksheet_obj = sheet.worksheets()
772
1059
  worksheet_list = [sheet.title for sheet in worksheet_obj]
773
1060
  return worksheet_list
774
1061
 
775
- def get_spreadsheet_permissions(client, sheet_id:str):
1062
+ def get_spreadsheet_permissions(client: gspread.Client, sheet_id: str) -> pd.DataFrame:
776
1063
  """
777
- Return a DataFrame with the list of user email and type that can access the document
1064
+ Return a DataFrame with the list of user email and type that can access the document.
1065
+
1066
+ Parameters:
1067
+ - client (gspread.Client): A Gspread client object authenticated with Google Sheets API.
1068
+ - sheet_id (str): The ID of the Google spreadsheet.
1069
+
1070
+ Returns:
1071
+ pd.DataFrame: A DataFrame containing the list of user email addresses and their access types.
778
1072
  """
779
1073
  sheet = client.open_by_key(sheet_id)
780
- permissions=sheet.list_permissions()
781
- user_list=[(user.get("emailAddress"),user.get("type")) for user in permissions if user.get("emailAddress") is not None]
1074
+ permissions = sheet.list_permissions()
1075
+ user_list = [(user.get("emailAddress"), user.get("type")) for user in permissions if user.get("emailAddress") is not None]
782
1076
  df = pd.DataFrame(user_list, columns=['email', 'type'])
783
1077
  return df
784
1078
 
785
1079
 
786
- def create_google_spreadsheet(client, df, filename:str, worksheet_name:str = "Sheet1"):
1080
+ def create_google_spreadsheet(client: gspread.Client, df: pd.DataFrame, filename: str, worksheet_name: str = "Sheet1") -> gspread.Spreadsheet:
787
1081
  """
788
- Load a dataframe in a new spreadsheet
1082
+ Create a new Google spreadsheet and load a DataFrame into it.
1083
+
1084
+ Parameters:
1085
+ - client (gspread.Client): A Gspread client object authenticated with Google Sheets API.
1086
+ - df (pd.DataFrame): The DataFrame to be loaded into the spreadsheet.
1087
+ - filename (str): The desired filename for the new spreadsheet.
1088
+ - worksheet_name (str, optional): The name of the worksheet within the spreadsheet. Defaults to "Sheet1".
1089
+
1090
+ Returns:
1091
+ gspread.Spreadsheet: The created spreadsheet object.
789
1092
  """
790
1093
  spreadsheet = client.create(filename)
791
1094
  worksheet = spreadsheet.sheet1
@@ -795,17 +1098,34 @@ def create_google_spreadsheet(client, df, filename:str, worksheet_name:str = "Sh
795
1098
 
796
1099
  return spreadsheet
797
1100
 
798
- def share_google_spreadsheet(spreadsheet, email, user_type="user", user_role="writer", notify=False, email_message=None, with_link=False):
1101
+ def share_google_spreadsheet(spreadsheet: gspread.Spreadsheet, email: str, user_type: str = "user", user_role: str = "writer", notify: bool = False, email_message: str = None, with_link: bool = False) -> gspread.Spreadsheet:
799
1102
  """
800
- Share a spreadsheet with a user
1103
+ Share a spreadsheet with a user.
1104
+
1105
+ Parameters:
1106
+ - spreadsheet (gspread.Spreadsheet): The Google spreadsheet object to be shared.
1107
+ - email (str): The email address of the user with whom the spreadsheet will be shared.
1108
+ - user_type (str, optional): The permission type for the user. Defaults to "user".
1109
+ - user_role (str, optional): The role assigned to the user. Defaults to "writer".
1110
+ - notify (bool, optional): Whether to notify the user about the sharing. Defaults to False.
1111
+ - email_message (str, optional): The message to include in the notification email.
1112
+ - with_link (bool, optional): Whether to include a link to the shared document in the notification email. Defaults to False.
1113
+
1114
+ Returns:
1115
+ gspread.Spreadsheet: The updated spreadsheet object.
801
1116
  """
802
- spreadsheet.share(email, perm_type=user_type, role=user_role, notify = notify, email_message=email_message, with_link=with_link)
1117
+ spreadsheet.share(email, perm_type=user_type, role=user_role, notify=notify, email_message=email_message, with_link=with_link)
803
1118
  return spreadsheet
804
1119
 
805
-
806
- def generate_short_id(variables : dict):
1120
+ def generate_short_id(variables: dict) -> tuple[str, str]:
807
1121
  """
808
- Generate a 8 characters ID using a dict as input
1122
+ Generate an 8-character ID using a dictionary as input.
1123
+
1124
+ Parameters:
1125
+ - variables (dict): A dictionary containing the variables to be serialized.
1126
+
1127
+ Returns:
1128
+ tuple: A tuple containing the generated short ID and the serialized variables.
809
1129
  """
810
1130
  # Serialize variables into JSON string
811
1131
  serialized_variables = json.dumps(variables, sort_keys=True)
@@ -815,7 +1135,7 @@ def generate_short_id(variables : dict):
815
1135
  short_id = hash_value[:8]
816
1136
  return short_id, serialized_variables
817
1137
 
818
- def df_transform_column_as_list(column):
1138
+ def df_transform_column_as_list(column: pd.Series) -> pd.Series:
819
1139
  def transform(cell):
820
1140
  if isinstance(cell, str):
821
1141
  # Check if it's a list formatted as string, and convert to list
@@ -824,9 +1144,7 @@ def df_transform_column_as_list(column):
824
1144
  else:
825
1145
  try:
826
1146
  values = ast.literal_eval(cell)
827
-
828
1147
  except Exception as e:
829
- pass
830
1148
  # If it's a single URL as string, make it a list
831
1149
  values = [cell]
832
1150
  elif isinstance(cell, (int, float, bool)):
@@ -844,7 +1162,11 @@ def df_transform_column_as_list(column):
844
1162
  return column.apply(transform)
845
1163
 
846
1164
 
847
- def top_rows_per_category(df, col_to_sort, col_to_gb, cols_to_keep, top_rows) :
1165
+ def top_rows_per_category(df: pd.DataFrame,
1166
+ col_to_sort: str,
1167
+ col_to_gb: str,
1168
+ cols_to_keep: list[str],
1169
+ top_rows: int) -> pd.DataFrame:
848
1170
  """
849
1171
  Select top rows for each category in a dataframe
850
1172
  """
@@ -855,7 +1177,7 @@ def top_rows_per_category(df, col_to_sort, col_to_gb, cols_to_keep, top_rows) :
855
1177
  )[cols_to_keep]
856
1178
  return df_gb
857
1179
 
858
- def format_number(number):
1180
+ def format_number(number: int) -> str:
859
1181
  """
860
1182
  Function to format a number in K, M or B
861
1183
  """
@@ -866,4 +1188,32 @@ def format_number(number):
866
1188
  elif number < 1000000000:
867
1189
  return f"{number / 1000000:.1f}M"
868
1190
  else:
869
- return f"{number / 1000000000:.1f}B"
1191
+ return f"{number / 1000000000:.1f}B"
1192
+
1193
+
1194
+
1195
+ def unrar_file(rar_file_path : str, output_dir : str) -> None:
1196
+ """
1197
+ Extracts a .rar file to the specified output directory using the unrar command.
1198
+
1199
+ Parameters:
1200
+ rar_file_path (str): The path to the .rar file.
1201
+ output_dir (str): The directory where the contents should be extracted.
1202
+
1203
+ Returns:
1204
+ None
1205
+ """
1206
+ try:
1207
+ # Ensure the output directory exists
1208
+ subprocess.run(['mkdir', '-p', output_dir], check=True)
1209
+
1210
+ # Run the unrar command
1211
+ result = subprocess.run(['unrar', 'x', '-y', rar_file_path, output_dir],
1212
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
1213
+
1214
+ # Check if the extraction was successful
1215
+ if result.returncode != 0:
1216
+ print(f"Extraction failed. Error: {result.stderr}")
1217
+
1218
+ except Exception as e:
1219
+ print(f"An error occurred: {e}")