opsci-toolbox 0.0.2__py3-none-any.whl → 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,12 +15,27 @@ import pyarrow.parquet as pq
15
15
  from datetime import datetime
16
16
  import hashlib
17
17
  import ast
18
+ import subprocess
18
19
 
19
20
  ####################################################################################################
20
21
  # FILE LOADERS
21
22
  ####################################################################################################
22
23
 
23
- def load_file(path, delimiter = ";", decimal ="."):
24
+ def load_file(path: str, delimiter: str = ";", decimal: str = ".") -> pd.DataFrame:
25
+ """
26
+ Load a file into a Pandas DataFrame based on the file extension.
27
+
28
+ Parameters:
29
+ path (str): The file path to load.
30
+ delimiter (str, optional): The delimiter used in CSV/TSV files. Default is ";".
31
+ decimal (str, optional): The character used for decimal points in CSV/TSV files. Default is ".".
32
+
33
+ Returns:
34
+ pd.DataFrame: The loaded data as a Pandas DataFrame.
35
+
36
+ Raises:
37
+ ValueError: If the file extension is not supported.
38
+ """
24
39
  extension = os.path.splitext(os.path.basename(path))[1]
25
40
  if extension == ".parquet":
26
41
  df = load_parquet(path)
@@ -38,9 +53,18 @@ def load_file(path, delimiter = ";", decimal ="."):
38
53
  print("Check your input file. Extension isn't supported : .parquet, .pickle, .json, .jsonl, .csv, .tsv")
39
54
  return df
40
55
 
41
- def load_parquet(path):
56
+ def load_parquet(path: str) -> pd.DataFrame:
42
57
  """
43
- Load a parquet file into a DataFrame
58
+ Load a parquet file into a DataFrame.
59
+
60
+ Parameters:
61
+ path (str): The file path to the parquet file.
62
+
63
+ Returns:
64
+ pd.DataFrame: The loaded data as a Pandas DataFrame.
65
+
66
+ Raises:
67
+ Exception: If there is an error reading the parquet file.
44
68
  """
45
69
  try:
46
70
  table = pq.read_table(path)
@@ -50,66 +74,108 @@ def load_parquet(path):
50
74
  print(e)
51
75
  return df
52
76
 
53
- def load_pickle(path: str):
54
- """
55
- Load a pickle file into a dataframe
77
+ def load_pickle(path: str) -> pd.DataFrame:
56
78
  """
79
+ Load a pickle file into a DataFrame.
57
80
 
58
- with open(path, 'rb') as f:
59
- df=pickle.load(f)
60
- return df
81
+ Parameters:
82
+ path (str): The file path to the pickle file.
83
+
84
+ Returns:
85
+ pd.DataFrame: The loaded data as a Pandas DataFrame.
86
+ """
87
+ return pd.read_pickle(path)
88
+
61
89
 
62
- def load_json(path: str):
90
+ def load_json(path: str) -> pd.DataFrame:
63
91
  """
64
- Load a json file into a DataFrame
92
+ Load a JSON file into a DataFrame.
93
+
94
+ Parameters:
95
+ path (str): The file path to the JSON file.
96
+
97
+ Returns:
98
+ pd.DataFrame: The loaded data as a Pandas DataFrame.
99
+
100
+ Raises:
101
+ Exception: If there is an error reading the JSON file.
65
102
  """
66
- df=pd.DataFrame()
103
+ df = pd.DataFrame()
67
104
  try:
68
105
  with open(path, 'r') as json_file:
69
106
  data = json.load(json_file)
70
- df=pd.json_normalize(data)
71
-
107
+ df = pd.json_normalize(data)
72
108
  except Exception as e:
73
- pass
74
- print(e)
109
+ print(f"Error reading the JSON file: {e}")
110
+ raise
75
111
  return df
76
112
 
77
- def load_jsonl(path: str):
113
+ def load_jsonl(path: str) -> pd.DataFrame:
78
114
  """
79
- Load a jsonl file into a dataframe
115
+ Load a JSON Lines (jsonl) file into a DataFrame.
116
+
117
+ Parameters:
118
+ path (str): The file path to the jsonl file.
119
+
120
+ Returns:
121
+ pd.DataFrame: The loaded data as a Pandas DataFrame.
122
+
123
+ Raises:
124
+ Exception: If there is an error reading the jsonl file.
80
125
  """
81
126
  df = pd.DataFrame()
82
127
  try:
83
128
  data = []
84
129
  with open(path, 'r') as json_file:
85
- for line in tqdm(json_file):
130
+ for line in tqdm(json_file, desc="Loading JSON Lines"):
86
131
  try:
87
132
  data.append(json.loads(line))
88
- except:
89
- pass
133
+ except json.JSONDecodeError as line_error:
134
+ print(f"Error decoding line: {line_error}")
90
135
 
91
136
  df = pd.json_normalize(data)
92
137
  except Exception as e:
93
- pass
94
- print(e)
138
+ print(f"Error reading the jsonl file: {e}")
139
+ raise
95
140
  return df
96
141
 
97
142
 
98
- def load_csv(path: str, delimiter: str =";", decimal:str ="."):
143
+ def load_csv(path: str, delimiter: str = ";", decimal: str = ".") -> pd.DataFrame:
99
144
  """
100
- Load a csv file into a dataframe
145
+ Load a CSV file into a DataFrame.
146
+
147
+ Parameters:
148
+ path (str): The file path to the CSV file.
149
+ delimiter (str, optional): The delimiter used in the CSV file. Default is ";".
150
+ decimal (str, optional): The character used for decimal points in the CSV file. Default is ".".
151
+
152
+ Returns:
153
+ pd.DataFrame: The loaded data as a Pandas DataFrame.
154
+
155
+ Raises:
156
+ Exception: If there is an error reading the CSV file.
101
157
  """
102
- df= pd.DataFrame()
158
+ df = pd.DataFrame()
103
159
  try:
104
160
  df = pd.read_csv(path, delimiter=delimiter, encoding="utf-8", decimal=decimal)
105
161
  except Exception as e:
106
- pass
107
- print(e)
162
+ print(f"Error reading the CSV file: {e}")
163
+ raise
108
164
  return df
109
165
 
110
- def read_txt_to_list(file_path: str):
166
+ def read_txt_to_list(file_path: str) -> list[str]:
111
167
  """
112
- Read a text file line by line and append to a Python list
168
+ Read a text file line by line and append to a Python list.
169
+
170
+ Parameters:
171
+ file_path (str): The file path to the text file.
172
+
173
+ Returns:
174
+ list[str]: A list of lines read from the text file.
175
+
176
+ Raises:
177
+ FileNotFoundError: If the file does not exist.
178
+ Exception: If any other error occurs during file reading.
113
179
  """
114
180
 
115
181
  # Initialize an empty list to store the lines
@@ -124,12 +190,22 @@ def read_txt_to_list(file_path: str):
124
190
  print(f"File not found: {file_path}")
125
191
  except Exception as e:
126
192
  print(f"An error occurred: {e}")
193
+ raise
127
194
  return lines
128
195
 
129
-
130
- def read_json(path: str):
196
+ def read_json(path: str) -> dict:
131
197
  """
132
- Read a json file and return a dict
198
+ Read a JSON file and return a dictionary.
199
+
200
+ Parameters:
201
+ path (str): The file path to the JSON file.
202
+
203
+ Returns:
204
+ dict: The data read from the JSON file as a dictionary.
205
+
206
+ Raises:
207
+ FileNotFoundError: If the file does not exist.
208
+ Exception: If there is an error reading the JSON file.
133
209
  """
134
210
  with open(path, 'r') as json_file:
135
211
  data = json.load(json_file)
@@ -137,25 +213,55 @@ def read_json(path: str):
137
213
 
138
214
  def read_txt_file(file_path: str) -> str:
139
215
  """
140
- Read a text file
216
+ Read the content of a text file and return it as a string.
217
+
218
+ Parameters:
219
+ file_path (str): The file path to the text file.
220
+
221
+ Returns:
222
+ str: The content of the text file as a string.
223
+
224
+ Raises:
225
+ FileNotFoundError: If the file does not exist.
226
+ Exception: If there is an error reading the text file.
141
227
  """
142
- with open(file_path, 'r') as file:
143
- content = file.read()
228
+ try:
229
+ with open(file_path, 'r') as file:
230
+ content = file.read()
231
+ except FileNotFoundError:
232
+ print(f"File not found: {file_path}")
233
+ raise
234
+ except Exception as e:
235
+ print(f"An error occurred while reading the file: {e}")
236
+ raise
144
237
  return content
145
238
 
146
- def read_jsonl(path: str):
239
+ def read_jsonl(path: str) -> list[dict]:
147
240
  """
148
- Load a jsonl file into a dataframe
241
+ Load a JSON Lines (jsonl) file into a list of dictionaries.
242
+
243
+ Parameters:
244
+ path (str): The file path to the jsonl file.
245
+
246
+ Returns:
247
+ list[dict]: A list of dictionaries containing the data read from the JSON Lines file.
248
+
249
+ Raises:
250
+ FileNotFoundError: If the file does not exist.
251
+ Exception: If there is an error reading the jsonl file.
149
252
  """
150
253
  json_data = []
151
- with open(path, 'r') as json_file:
152
- for line in tqdm(json_file):
153
- try:
154
- json_data.append(json.loads(line))
155
- except Exception as e:
156
- pass
157
- print(e)
158
-
254
+ try:
255
+ with open(path, 'r') as json_file:
256
+ for line in tqdm(json_file, desc="Reading JSON Lines"):
257
+ try:
258
+ json_data.append(json.loads(line))
259
+ except Exception as e:
260
+ print(f"Error decoding line: {e}")
261
+ raise
262
+ except FileNotFoundError:
263
+ print(f"File not found: {path}")
264
+ raise
159
265
  return json_data
160
266
 
161
267
 
@@ -164,37 +270,55 @@ def read_jsonl(path: str):
164
270
  #########################################################################################
165
271
 
166
272
 
167
- def write_pickle(df: pd.DataFrame, path: str, name: str):
273
+ def write_pickle(data: pd.DataFrame, path: str, filename: str) -> str:
168
274
  """
169
- Write a dataframe into a pickle file
170
- """
171
- file_path=os.path.join(path, name+'.pickle')
275
+ Write a DataFrame into a pickle file.
276
+
277
+ Parameters:
278
+ data (pd.DataFrame): The DataFrame to be written to the pickle file.
279
+ path (str): The directory where the pickle file will be saved.
280
+ filename (str): The name of the pickle file (without the extension).
172
281
 
282
+ Returns:
283
+ str: The full path to the saved pickle file.
284
+ """
285
+ file_path = os.path.join(path, filename + '.pickle')
173
286
  with open(file_path, 'wb') as f:
174
- pickle.dump(df, f)
287
+ pickle.dump(data, f)
175
288
  return file_path
176
289
 
177
290
 
178
- def write_list_to_txt(input_list: list, path: str, name: str):
291
+ def write_list_to_txt(input_list: list, path: str, name: str) -> str:
179
292
  """
180
293
  Write a list to a text file, with each item on a new line.
181
294
 
182
295
  Parameters:
183
- - file_path (str): The path to the text file.
184
296
  - input_list (list): The list to be written to the text file.
297
+ - path (str): The directory path where the text file will be saved.
298
+ - name (str): The name of the text file (without the extension).
299
+
300
+ Returns:
301
+ str: The full path to the saved text file.
185
302
  """
186
- file_path=os.path.join(path, name+'.txt')
303
+ file_path = os.path.join(path, name + '.txt')
187
304
  with open(file_path, 'w') as file:
188
305
  for item in input_list:
189
306
  file.write(str(item) + '\n')
190
-
191
307
  return file_path
192
308
 
193
- def write_jsonl(data: list, path: str, name: str):
309
+ def write_jsonl(data: list[dict], path: str, name: str) -> str:
194
310
  """
195
- Write a jsonl file. Function takes as input a list of dict.
311
+ Write data to a JSON Lines (jsonl) file. Each dictionary in the list represents a single JSON object.
312
+
313
+ Parameters:
314
+ - data (list[dict]): The list of dictionaries to be written to the JSON Lines file.
315
+ - path (str): The directory path where the JSON Lines file will be saved.
316
+ - name (str): The name of the JSON Lines file (without the extension).
317
+
318
+ Returns:
319
+ str: The full path to the saved JSON Lines file.
196
320
  """
197
- file_path=os.path.join(path, name+'.jsonl')
321
+ file_path = os.path.join(path, name + '.jsonl')
198
322
  with open(file_path, 'w') as file:
199
323
  for entry in data:
200
324
  json.dump(entry, file)
@@ -202,41 +326,67 @@ def write_jsonl(data: list, path: str, name: str):
202
326
  return file_path
203
327
 
204
328
 
205
- def write_json(json_dict: dict, path: str, name: str):
329
+ def write_json(json_dict: dict, path: str, name: str) -> str:
206
330
  """
207
- Write a dict into a json file
331
+ Write a dictionary to a JSON file.
332
+
333
+ Parameters:
334
+ - json_dict (dict): The dictionary to be written to the JSON file.
335
+ - path (str): The directory path where the JSON file will be saved.
336
+ - name (str): The name of the JSON file (without the extension).
337
+
338
+ Returns:
339
+ str: The full path to the saved JSON file.
208
340
  """
209
- file_path=os.path.join(path, name+'.json')
341
+ file_path = os.path.join(path, name + '.json')
210
342
  with open(file_path, 'w') as outfile:
211
343
  json.dump(json_dict, outfile)
212
344
  return file_path
213
345
 
214
346
 
215
- def write_dataframe_to_json(df: pd.DataFrame, path: str, name: str, orient='records'):
347
+ def write_dataframe_to_json(df: pd.DataFrame, path: str, name: str, orient: str = 'records') -> str:
216
348
  """
217
- Write a dataframe into a json file
349
+ Write a DataFrame to a JSON file.
350
+
351
+ Parameters:
352
+ - df (pd.DataFrame): The DataFrame to be written to the JSON file.
353
+ - path (str): The directory path where the JSON file will be saved.
354
+ - name (str): The name of the JSON file (without the extension).
355
+ - orient (str, optional): The format of the JSON file. Default is 'records'.
356
+
357
+ Returns:
358
+ str: The full path to the saved JSON file.
218
359
  """
219
- file_path=os.path.join(path, name+".json")
360
+ file_path = os.path.join(path, name + ".json")
220
361
  df.to_json(file_path, orient=orient, lines=True)
362
+ return file_path
221
363
 
222
364
 
223
- def save_dataframe_excel(df: pd.DataFrame, path: str, name :str, sheet_name:str):
365
+ def save_dataframe_excel(df: pd.DataFrame, path: str, name: str, sheet_name: str) -> str:
224
366
  """
225
- Write a dataframe into a XLSX file
367
+ Write a DataFrame to an Excel file.
368
+
369
+ Parameters:
370
+ - df (pd.DataFrame): The DataFrame to be written to the Excel file.
371
+ - path (str): The directory path where the Excel file will be saved.
372
+ - name (str): The name of the Excel file (without the extension).
373
+ - sheet_name (str): The name of the Excel sheet.
374
+
375
+ Returns:
376
+ str: The full path to the saved Excel file.
226
377
  """
227
-
228
- file_path=os.path.join(path, f"{name}.xlsx")
378
+ file_path = os.path.join(path, f"{name}.xlsx")
229
379
  df.to_excel(file_path, sheet_name=sheet_name, index=False)
230
380
  print(file_path, "- File created")
231
381
  return file_path
232
382
 
233
- def add_dataframe_to_excel(df: pd.DataFrame, existing_file_path: str, new_sheet_name: str):
383
+ def add_dataframe_to_excel(df: pd.DataFrame, existing_file_path: str, new_sheet_name: str) -> None:
234
384
  """
235
385
  Adds a DataFrame to an existing Excel file as a new sheet.
236
386
 
237
387
  Parameters:
388
+ - df (pd.DataFrame): The DataFrame to be added.
238
389
  - existing_file_path (str): Path to the existing Excel file.
239
- - dataframe (pd.DataFrame): The DataFrame to be added.
240
390
  - new_sheet_name (str): Name of the new sheet in the Excel file.
241
391
 
242
392
  Returns:
@@ -245,7 +395,7 @@ def add_dataframe_to_excel(df: pd.DataFrame, existing_file_path: str, new_sheet_
245
395
  # Read existing Excel file into a dictionary of DataFrames
246
396
  excel_file = pd.read_excel(existing_file_path, sheet_name=None)
247
397
 
248
- # Add the new DataFrame to the dictionary with the specified sheet aname
398
+ # Add the new DataFrame to the dictionary with the specified sheet name
249
399
  excel_file[new_sheet_name] = df
250
400
 
251
401
  # Write the updated dictionary of DataFrames back to the Excel file
@@ -253,46 +403,62 @@ def add_dataframe_to_excel(df: pd.DataFrame, existing_file_path: str, new_sheet_
253
403
  for sheet_name, df in excel_file.items():
254
404
  df.to_excel(writer, sheet_name=sheet_name, index=False)
255
405
 
256
- def save_dataframe_csv(df: pd.DataFrame, path: str, name: str):
406
+ def save_dataframe_csv(df: pd.DataFrame, path: str, name: str) -> str:
257
407
  """
258
- This function saves a DataFrame to a CSV file within a project directory.
259
-
260
- :param df: The DataFrame to be saved.
261
- :type df: pandas.DataFrame
262
-
263
- :param dir_csv: The directory where the CSV file will be saved.
264
- :type dir_csv: str
265
-
266
- :param name: The desired name for the CSV file (without extension).
267
- :type name: str
408
+ Save a DataFrame to a CSV file within a specified directory.
409
+
410
+ Parameters:
411
+ - df (pd.DataFrame): The DataFrame to be saved.
412
+ - path (str): The directory where the CSV file will be saved.
413
+ - name (str): The desired name for the CSV file (without extension).
414
+
415
+ Returns:
416
+ str: The full path to the saved CSV file.
268
417
  """
269
- names = df.columns
418
+ file_path = os.path.join(path, f"{name}.csv")
270
419
  df.to_csv(
271
- os.path.join(path, f"{name}.csv"),
272
- header=names,
420
+ file_path,
273
421
  sep=";",
274
422
  encoding="utf-8",
275
423
  index=False,
276
424
  decimal=",",
277
425
  )
278
- print("FILE SAVED: ", os.path.join(path, f"{name}.csv"))
426
+ print("File saved:", file_path)
427
+ return file_path
279
428
 
280
- def write_txt_file(data: str, path: str, name: str):
429
+ def write_txt_file(data: str, path: str, name: str) -> str:
281
430
  """
282
- Write a text file
431
+ Write a string to a text file.
432
+
433
+ Parameters:
434
+ - data (str): The string to be written to the text file.
435
+ - path (str): The directory path where the text file will be saved.
436
+ - name (str): The name of the text file (without the extension).
437
+
438
+ Returns:
439
+ str: The full path to the saved text file.
283
440
  """
284
- file_path=os.path.join(path, name+'.txt')
441
+ file_path = os.path.join(path, name + '.txt')
285
442
  with open(file_path, "w") as file:
286
443
  file.write(data)
287
444
  return file_path
288
445
 
289
- def split_df_into_chunks(df, path, name, chunk_size = 10000):
446
+ def split_df_into_chunks(df: pd.DataFrame, path: str, name: str, chunk_size: int = 10000) -> list[str]:
290
447
  """
291
- Split a dataframe into n pickle files
448
+ Split a DataFrame into multiple pickle files with a specified chunk size.
449
+
450
+ Parameters:
451
+ - df (pd.DataFrame): The DataFrame to be split.
452
+ - path (str): The directory path where the pickle files will be saved.
453
+ - name (str): The base name for the pickle files.
454
+ - chunk_size (int, optional): The size of each chunk. Default is 10000.
455
+
456
+ Returns:
457
+ list[str]: A list of file paths to the saved pickle files.
292
458
  """
293
459
  num_chunks = -(-len(df) // chunk_size) # Calculate the number of chunks using ceil division
294
460
 
295
- file_paths=[]
461
+ file_paths = []
296
462
 
297
463
  # create smaller datasets of chunk_size each
298
464
  for i in range(num_chunks):
@@ -305,16 +471,19 @@ def split_df_into_chunks(df, path, name, chunk_size = 10000):
305
471
 
306
472
  return file_paths
307
473
 
308
-
309
-
310
474
  ###################################################################################################
311
475
  # FOLDERS / FILES HELPERS
312
476
  ###################################################################################################
313
477
 
314
- def create_dir(path:str):
478
+ def create_dir(path: str) -> str:
315
479
  """
316
- Create a local directory
480
+ Create a local directory if it doesn't exist.
481
+
482
+ Parameters:
483
+ - path (str): The directory path to be created.
317
484
 
485
+ Returns:
486
+ str: The path of the created directory.
318
487
  """
319
488
  if not os.path.exists(path):
320
489
  os.makedirs(path)
@@ -322,18 +491,31 @@ def create_dir(path:str):
322
491
  return path
323
492
 
324
493
 
325
- def list_files_in_dir(path: str, filetype:str ='*.json'):
494
+ def list_files_in_dir(path: str, filetype: str = '*.json') -> list[str]:
326
495
  """
327
- List files of a specific format in a directory
496
+ List files of a specific format in a directory.
497
+
498
+ Parameters:
499
+ - path (str): The directory path to search for files.
500
+ - filetype (str, optional): The file type pattern to search for. Default is '*.json'.
501
+
502
+ Returns:
503
+ list[str]: A list of file paths matching the specified file type pattern.
328
504
  """
329
505
  pattern = os.path.join(path, filetype)
330
506
  files = glob.glob(pattern)
331
507
  return files
332
508
 
333
509
 
334
- def list_subdirectories(root_directory: str):
510
+ def list_subdirectories(root_directory: str) -> list[str]:
335
511
  """
336
- List subdirectories in a root directory
512
+ List subdirectories in a root directory.
513
+
514
+ Parameters:
515
+ - root_directory (str): The root directory path.
516
+
517
+ Returns:
518
+ list[str]: A list of subdirectory names.
337
519
  """
338
520
  subdirectories = []
339
521
  for entry in os.scandir(root_directory):
@@ -342,9 +524,15 @@ def list_subdirectories(root_directory: str):
342
524
  return subdirectories
343
525
 
344
526
 
345
- def list_recursive_subdirectories(root_directory: str):
527
+ def list_recursive_subdirectories(root_directory: str) -> list[str]:
346
528
  """
347
- List recursively all subdirectories from a root directory
529
+ List recursively all subdirectories from a root directory.
530
+
531
+ Parameters:
532
+ - root_directory (str): The root directory path.
533
+
534
+ Returns:
535
+ list[str]: A list of subdirectory paths.
348
536
  """
349
537
  subdirectories = []
350
538
  for root, dirs, files in os.walk(root_directory):
@@ -352,9 +540,16 @@ def list_recursive_subdirectories(root_directory: str):
352
540
  return subdirectories
353
541
 
354
542
 
355
- def list_files_in_subdirectories(path:str, filetype:str='*.json'):
543
+ def list_files_in_subdirectories(path: str, filetype: str = '*.json') -> list[str]:
356
544
  """
357
- Walk through subdirectories of a root directory to list files of a specific format
545
+ Walk through subdirectories of a root directory to list files of a specific format.
546
+
547
+ Parameters:
548
+ - path (str): The root directory path.
549
+ - filetype (str, optional): The file type pattern to search for. Default is '*.json'.
550
+
551
+ Returns:
552
+ list[str]: A list of file paths matching the specified file type pattern in subdirectories.
358
553
  """
359
554
  files = []
360
555
 
@@ -369,21 +564,36 @@ def list_files_in_subdirectories(path:str, filetype:str='*.json'):
369
564
 
370
565
  return files
371
566
 
372
- def copy_file(source_path: str, destination_path: str, new_filename:str):
567
+ def copy_file(source_path: str, destination_path: str, new_filename: str = '') -> str:
373
568
  """
374
- Function to copy a file to another path
569
+ Copy a file from a source path to a destination path.
570
+
571
+ Parameters:
572
+ - source_path (str): The path of the source file.
573
+ - destination_path (str): The path of the destination directory.
574
+ - new_filename (str, optional): The new filename. If not provided, the original filename is used.
575
+
576
+ Returns:
577
+ str: The path of the copied file.
375
578
  """
376
579
  if new_filename:
377
- file_path=os.path.join(destination_path, new_filename)
580
+ file_path = os.path.join(destination_path, new_filename)
378
581
  else:
379
- filename=os.path.basename(source_path)
380
- file_path=os.path.join(destination_path,filename)
582
+ filename = os.path.basename(source_path)
583
+ file_path = os.path.join(destination_path, filename)
584
+
381
585
  shutil.copy(source_path, file_path)
382
586
  return file_path
383
587
 
384
- def remove_file(file_path):
588
+ def remove_file(file_path: str) -> None:
385
589
  """
386
- Remove a single file
590
+ Remove a single file.
591
+
592
+ Parameters:
593
+ - file_path (str): The path of the file to be removed.
594
+
595
+ Returns:
596
+ None
387
597
  """
388
598
  try:
389
599
  os.remove(file_path)
@@ -391,20 +601,33 @@ def remove_file(file_path):
391
601
  except OSError as e:
392
602
  print(f"Error removing file {file_path}: {e}")
393
603
 
394
- def remove_folder(folder_path):
604
+ def remove_folder(folder_path: str) -> None:
395
605
  """
396
- Remove a folder and all the files inside
606
+ Remove a folder and all its contents.
607
+
608
+ Parameters:
609
+ - folder_path (str): The path of the folder to be removed.
610
+
611
+ Returns:
612
+ None
397
613
  """
398
614
  try:
399
615
  shutil.rmtree(folder_path)
400
616
  print(f"Folder {folder_path} and its contents removed successfully.")
401
617
  except OSError as e:
402
- print(f"Error removing folder {folder_path}: {e}")
618
+ print(f"Error removing folder {folder_path}: {e}")
403
619
 
404
620
 
405
- def get_file_size(file_path):
621
+ def get_file_size(file_path: str) -> tuple[int, str]:
406
622
  """
407
- Get a single file size in a readable format (KB, MB, GB)
623
+ Get the size of a single file in a readable format (KB, MB, GB).
624
+
625
+ Parameters:
626
+ - file_path (str): The path of the file.
627
+
628
+ Returns:
629
+ tuple[int, str]: A tuple containing the size of the file in bytes and its formatted size.
630
+ If the file is not found, returns None.
408
631
  """
409
632
  try:
410
633
  size = os.path.getsize(file_path)
@@ -427,9 +650,16 @@ def get_file_size(file_path):
427
650
  print(f"File not found: {file_path}")
428
651
  return None
429
652
 
430
- def get_folder_size(folder_path):
653
+ def get_folder_size(folder_path: str) -> tuple[int, str]:
431
654
  """
432
- Get size of all files contained in a folder in a readable format (KB, MB, GB)
655
+ Get the size of all files contained in a folder in a readable format (KB, MB, GB).
656
+
657
+ Parameters:
658
+ - folder_path (str): The path of the folder.
659
+
660
+ Returns:
661
+ tuple[int, str]: A tuple containing the total size of all files in bytes and its formatted size.
662
+ If the folder is not found, returns None.
433
663
  """
434
664
  total_size = 0
435
665
 
@@ -457,9 +687,16 @@ def get_folder_size(folder_path):
457
687
  print(f"Folder not found: {folder_path}")
458
688
  return None
459
689
 
460
- def file_creation_date(file_path):
690
+ def file_creation_date(file_path: str) -> datetime:
461
691
  """
462
- Return the last update timestamp
692
+ Return the last update timestamp of a file.
693
+
694
+ Parameters:
695
+ - file_path (str): The path of the file.
696
+
697
+ Returns:
698
+ datetime: The last update timestamp as a datetime object.
699
+ If the file does not exist, returns None.
463
700
  """
464
701
  # Check if the file exists
465
702
  if os.path.exists(file_path):
@@ -476,27 +713,34 @@ def file_creation_date(file_path):
476
713
  ############################################################################
477
714
 
478
715
 
479
- def transform_to_n_items_list(input_list : list, n: int):
716
+ def transform_to_n_items_list(lst: list, n: int) -> list[list]:
480
717
  """
481
718
  Transform a list into a list of n-items sublists.
482
719
 
483
720
  Parameters:
484
- - input_list: The input list to be transformed.
485
- - n: The number of items in each sublist.
721
+ - lst (list): The input list to be transformed.
722
+ - n (int): The number of items in each sublist.
486
723
 
487
724
  Returns:
488
- A list of n-items sublists.
725
+ list[list]: A list of n-items sublists.
489
726
  """
490
- return [input_list[i:i + n] for i in range(0, len(input_list), n)]
727
+ return [lst[i:i + n] for i in range(0, len(lst), n)]
491
728
 
492
- def unduplicate_list(lst):
729
+
730
+ def unduplicate_list(lst: list) -> list:
493
731
  """
494
- Unduplicate elements of a list
732
+ Remove duplicate elements from a list.
733
+
734
+ Parameters:
735
+ - lst (list): The input list with possible duplicate elements.
736
+
737
+ Returns:
738
+ list: A list with duplicate elements removed.
495
739
  """
496
740
  return list(set(lst))
497
741
 
498
742
 
499
- def sort_list(lst, reverse=False):
743
+ def sort_list(lst: list, reverse: bool = False) -> list:
500
744
  """
501
745
  Sort the list in ascending or descending order.
502
746
 
@@ -506,12 +750,12 @@ def sort_list(lst, reverse=False):
506
750
  If False (default), sort the list in ascending order.
507
751
 
508
752
  Returns:
509
- - list: A new list sorted based on the specified order.
753
+ list: A new list sorted based on the specified order.
510
754
  """
511
755
  return sorted(lst, reverse=reverse)
512
756
 
513
757
 
514
- def map_list(lst, function):
758
+ def map_list(lst: list, function: callable) -> list:
515
759
  """
516
760
  Apply a function to each element of the list.
517
761
 
@@ -520,12 +764,12 @@ def map_list(lst, function):
520
764
  - function (callable): The function to apply to each element.
521
765
 
522
766
  Returns:
523
- - list: A new list with the function applied to each element.
767
+ list: A new list with the function applied to each element.
524
768
  """
525
769
  return [function(element) for element in lst]
526
770
 
527
771
 
528
- def flatten_list(lst):
772
+ def flatten_list(lst: list) -> list:
529
773
  """
530
774
  Flatten a nested list into a single list.
531
775
 
@@ -533,7 +777,7 @@ def flatten_list(lst):
533
777
  - lst (list): The input nested list.
534
778
 
535
779
  Returns:
536
- - list: A new list with all nested elements flattened.
780
+ list: A new list with all nested elements flattened.
537
781
  """
538
782
  flattened_list = []
539
783
 
@@ -548,7 +792,7 @@ def flatten_list(lst):
548
792
  return flattened_list
549
793
 
550
794
 
551
- def find_occurrences(lst, element):
795
+ def find_occurrences(lst: list, element) -> int:
552
796
  """
553
797
  Find the occurrences of a specific element in the list.
554
798
 
@@ -557,12 +801,12 @@ def find_occurrences(lst, element):
557
801
  - element: The element to find occurrences of.
558
802
 
559
803
  Returns:
560
- - int: The number of occurrences of the specified element in the list.
804
+ int: The number of occurrences of the specified element in the list.
561
805
  """
562
806
  return lst.count(element)
563
807
 
564
808
 
565
- def is_subset(subset, superset):
809
+ def is_subset(subset: list, superset: list) -> bool:
566
810
  """
567
811
  Check if one list is a subset of another.
568
812
 
@@ -571,11 +815,11 @@ def is_subset(subset, superset):
571
815
  - superset (list): The superset list.
572
816
 
573
817
  Returns:
574
- - bool: True if the subset is a subset of the superset, False otherwise.
818
+ bool: True if the subset is a subset of the superset, False otherwise.
575
819
  """
576
820
  return all(element in superset for element in subset)
577
821
 
578
- def common_elements(list1, list2):
822
+ def common_elements(list1: list, list2: list) -> list:
579
823
  """
580
824
  Find the common elements between two lists.
581
825
 
@@ -584,12 +828,12 @@ def common_elements(list1, list2):
584
828
  - list2 (list): The second list.
585
829
 
586
830
  Returns:
587
- - list: A new list containing the common elements between list1 and list2.
831
+ list: A new list containing the common elements between list1 and list2.
588
832
  """
589
833
  return list(set(list1) & set(list2))
590
834
 
591
835
 
592
- def shuffle_list(lst):
836
+ def shuffle_list(lst: list) -> list:
593
837
  """
594
838
  Shuffle the elements of the list randomly.
595
839
 
@@ -597,14 +841,14 @@ def shuffle_list(lst):
597
841
  - lst (list): The input list.
598
842
 
599
843
  Returns:
600
- - list: A new list with the elements shuffled randomly.
844
+ list: A new list with the elements shuffled randomly.
601
845
  """
602
846
  shuffled_list = lst.copy()
603
847
  random.shuffle(shuffled_list)
604
848
  return shuffled_list
605
849
 
606
850
 
607
- def sample_list(lst, sample_size):
851
+ def sample_list(lst: list, sample_size) -> list:
608
852
  """
609
853
  Sample a list based on an integer or a float representing the sample size.
610
854
 
@@ -614,7 +858,11 @@ def sample_list(lst, sample_size):
614
858
  If a float, the percentage of elements to keep.
615
859
 
616
860
  Returns:
617
- - list: A new list containing the sampled elements.
861
+ list: A new list containing the sampled elements.
862
+
863
+ Raises:
864
+ - ValueError: If the sample size is invalid (negative integer or float outside [0, 1]).
865
+ - TypeError: If the sample size is neither an integer nor a float.
618
866
  """
619
867
  if isinstance(sample_size, int):
620
868
  if sample_size < 0:
@@ -628,7 +876,7 @@ def sample_list(lst, sample_size):
628
876
  else:
629
877
  raise TypeError("Sample size must be an integer or a float.")
630
878
 
631
- def count_elements(lst):
879
+ def count_elements(lst: list) -> dict:
632
880
  """
633
881
  Count the occurrences of each element in the list.
634
882
 
@@ -636,46 +884,70 @@ def count_elements(lst):
636
884
  - lst (list): The input list.
637
885
 
638
886
  Returns:
639
- - dict: A dictionary where keys are unique elements from the list, and values are their counts.
887
+ dict: A dictionary where keys are unique elements from the list, and values are their counts.
640
888
  """
641
889
  return dict(Counter(lst))
642
890
 
643
- def scale_list(lst, min_val=1, max_val=5):
891
+ def scale_list(lst: list, min_val: float = 1, max_val: float = 5) -> list:
892
+ """
893
+ Scale the values of a list to a specified range.
894
+
895
+ Parameters:
896
+ - lst (list): The input list of values to be scaled.
897
+ - min_val (float): The minimum value of the output range (default is 1).
898
+ - max_val (float): The maximum value of the output range (default is 5).
899
+
900
+ Returns:
901
+ - list: A new list with values scaled to the specified range.
902
+ """
644
903
  min_w = min(lst)
645
904
  max_w = max(lst)
646
- scaled_w = [ ]
905
+ scaled_w = []
647
906
  for x in lst:
648
907
  try:
649
908
  scaled_value = (x - min_w) / (max_w - min_w) * (max_val - min_val) + min_val
650
- except :
651
- pass
909
+ except ZeroDivisionError:
652
910
  scaled_value = min_val
653
-
654
911
  scaled_w.append(scaled_value)
655
912
  return scaled_w
656
913
 
657
- def df_scale_column(df, col_to_scale, col_out, min_val, max_val):
914
+
915
+ def df_scale_column(df: pd.DataFrame, col_to_scale: str, col_out: str, min_val: float, max_val: float) -> pd.DataFrame:
916
+ """
917
+ Scale values in a DataFrame column to a specified range.
918
+
919
+ Parameters:
920
+ - df (pd.DataFrame): The input DataFrame.
921
+ - col_to_scale (str): The name of the column to be scaled.
922
+ - col_out (str): The name of the new column to store scaled values.
923
+ - min_val (float): The minimum value of the output range.
924
+ - max_val (float): The maximum value of the output range.
925
+
926
+ Returns:
927
+ - pd.DataFrame: The DataFrame with a new column containing scaled values.
928
+ """
658
929
  min_freq = df[col_to_scale].min()
659
930
  max_freq = df[col_to_scale].max()
660
- df[col_out] = df[col_to_scale].apply(lambda x : ((x - min_freq) / (max_freq - min_freq)) * (max_val - min_val) + min_val)
931
+ df[col_out] = df[col_to_scale].apply(lambda x: ((x - min_freq) / (max_freq - min_freq)) * (max_val - min_val) + min_val)
661
932
  return df
662
933
 
663
934
  ############################################################################
664
935
  # ZIP HELPERS
665
936
  ############################################################################
666
937
 
667
- def zip_file(source_file_path, zip_file_path, name):
938
+ def zip_file(source_file_path: str, zip_file_path: str, name: str) -> str:
668
939
  """
669
940
  Zip a single file.
670
941
 
671
- Args:
672
- source_file_path (str): Path to the file to be zipped.
673
- zip_file_path (str): Path for the resulting zip file.
942
+ Parameters:
943
+ - source_file_path (str): Path to the file to be zipped.
944
+ - zip_file_path (str): Path for the resulting zip file.
945
+ - name (str): Name for the resulting zip file (without extension).
674
946
 
675
947
  Returns:
676
- None
948
+ str: Path to the resulting zip file.
677
949
  """
678
- file_path=os.path.join(zip_file_path, name+".zip")
950
+ file_path = os.path.join(zip_file_path, f"{name}.zip")
679
951
 
680
952
  with zipfile.ZipFile(file_path, 'w') as zip_file:
681
953
  # The second argument to `arcname` is used to set the name of the file inside the zip
@@ -683,18 +955,19 @@ def zip_file(source_file_path, zip_file_path, name):
683
955
 
684
956
  return file_path
685
957
 
686
- def zip_folder(source_folder_path, zip_file_path, name):
958
+ def zip_folder(source_folder_path: str, zip_file_path: str, name: str) -> str:
687
959
  """
688
960
  Zip an entire folder.
689
961
 
690
- Args:
691
- source_folder_path (str): Path to the folder to be zipped.
692
- zip_file_path (str): Path for the resulting zip file.
962
+ Parameters:
963
+ - source_folder_path (str): Path to the folder to be zipped.
964
+ - zip_file_path (str): Path for the resulting zip file.
965
+ - name (str): Name for the resulting zip file (without extension).
693
966
 
694
967
  Returns:
695
- None
968
+ str: Path to the resulting zip file.
696
969
  """
697
- file_path=os.path.join(zip_file_path, name+".zip")
970
+ file_path = os.path.join(zip_file_path, f"{name}.zip")
698
971
 
699
972
  with zipfile.ZipFile(file_path, 'w', zipfile.ZIP_DEFLATED) as zip_file:
700
973
  for foldername, subfolders, filenames in os.walk(source_folder_path):
@@ -705,13 +978,19 @@ def zip_folder(source_folder_path, zip_file_path, name):
705
978
 
706
979
  return file_path
707
980
 
708
- def unzip_file(zip_file_path, destination_path):
981
+ def unzip_file(zip_file_path: str, destination_path: str) -> None:
709
982
  """
710
- unzip a zip file
983
+ Unzip a zip file.
984
+
985
+ Parameters:
986
+ - zip_file_path (str): Path to the zip file to be unzipped.
987
+ - destination_path (str): Path where the contents of the zip file will be extracted.
988
+
989
+ Returns:
990
+ None
711
991
  """
712
992
  with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
713
993
  zip_ref.extractall(destination_path)
714
-
715
994
 
716
995
 
717
996
  ############################################################################
@@ -719,19 +998,32 @@ def unzip_file(zip_file_path, destination_path):
719
998
  ############################################################################
720
999
 
721
1000
 
722
- def create_google_spreadsheet_client(credentials:str):
1001
+ def create_google_spreadsheet_client(credentials: str):
723
1002
  """
724
- Create a Gspread client to interact with Google Sheets
1003
+ Create a Gspread client to interact with Google Sheets.
1004
+
1005
+ Parameters:
1006
+ - credentials (str): Path to the JSON file containing Google Service Account credentials.
1007
+
1008
+ Returns:
1009
+ gspread.Client: A client object for interacting with Google Sheets.
725
1010
  """
726
1011
  return gspread.service_account(filename=credentials)
727
1012
 
728
- def read_google_spreadsheet(client, sheet_id: str, worksheet_name: str):
1013
+ def read_google_spreadsheet(client: gspread.Client, sheet_id: str, worksheet_name: str) -> pd.DataFrame:
729
1014
  """
730
- Function to read a Google spreadsheet in a DataFrame
1015
+ Read data from a Google spreadsheet and return it as a DataFrame.
1016
+
1017
+ Parameters:
1018
+ - client (gspread.Client): A Gspread client object authenticated with Google Sheets API.
1019
+ - sheet_id (str): The ID of the Google spreadsheet.
1020
+ - worksheet_name (str): The name of the worksheet within the spreadsheet.
1021
+
1022
+ Returns:
1023
+ pd.DataFrame: A DataFrame containing the data from the specified worksheet.
731
1024
  """
732
1025
  try:
733
-
734
- # Open the Google Spreadsheet by name
1026
+ # Open the Google Spreadsheet by ID
735
1027
  sheet = client.open_by_key(sheet_id)
736
1028
 
737
1029
  # Select a specific worksheet by name
@@ -751,29 +1043,52 @@ def read_google_spreadsheet(client, sheet_id: str, worksheet_name: str):
751
1043
  print(f"An error occurred: {e}")
752
1044
 
753
1045
 
754
- def list_google_worksheets(client, sheet_id:str):
1046
+ def list_google_worksheets(client: gspread.Client, sheet_id: str) -> list:
755
1047
  """
756
- Return a list of worksheet names for a spreadsheet ID
1048
+ Return a list of worksheet names for a spreadsheet ID.
1049
+
1050
+ Parameters:
1051
+ - client (gspread.Client): A Gspread client object authenticated with Google Sheets API.
1052
+ - sheet_id (str): The ID of the Google spreadsheet.
1053
+
1054
+ Returns:
1055
+ list: A list of worksheet names.
757
1056
  """
758
1057
  sheet = client.open_by_key(sheet_id)
759
1058
  worksheet_obj = sheet.worksheets()
760
1059
  worksheet_list = [sheet.title for sheet in worksheet_obj]
761
1060
  return worksheet_list
762
1061
 
763
- def get_spreadsheet_permissions(client, sheet_id:str):
1062
+ def get_spreadsheet_permissions(client: gspread.Client, sheet_id: str) -> pd.DataFrame:
764
1063
  """
765
- Return a DataFrame with the list of user email and type that can access the document
1064
+ Return a DataFrame with the list of user email and type that can access the document.
1065
+
1066
+ Parameters:
1067
+ - client (gspread.Client): A Gspread client object authenticated with Google Sheets API.
1068
+ - sheet_id (str): The ID of the Google spreadsheet.
1069
+
1070
+ Returns:
1071
+ pd.DataFrame: A DataFrame containing the list of user email addresses and their access types.
766
1072
  """
767
1073
  sheet = client.open_by_key(sheet_id)
768
- permissions=sheet.list_permissions()
769
- user_list=[(user.get("emailAddress"),user.get("type")) for user in permissions if user.get("emailAddress") is not None]
1074
+ permissions = sheet.list_permissions()
1075
+ user_list = [(user.get("emailAddress"), user.get("type")) for user in permissions if user.get("emailAddress") is not None]
770
1076
  df = pd.DataFrame(user_list, columns=['email', 'type'])
771
1077
  return df
772
1078
 
773
1079
 
774
- def create_google_spreadsheet(client, df, filename:str, worksheet_name:str = "Sheet1"):
1080
+ def create_google_spreadsheet(client: gspread.Client, df: pd.DataFrame, filename: str, worksheet_name: str = "Sheet1") -> gspread.Spreadsheet:
775
1081
  """
776
- Load a dataframe in a new spreadsheet
1082
+ Create a new Google spreadsheet and load a DataFrame into it.
1083
+
1084
+ Parameters:
1085
+ - client (gspread.Client): A Gspread client object authenticated with Google Sheets API.
1086
+ - df (pd.DataFrame): The DataFrame to be loaded into the spreadsheet.
1087
+ - filename (str): The desired filename for the new spreadsheet.
1088
+ - worksheet_name (str, optional): The name of the worksheet within the spreadsheet. Defaults to "Sheet1".
1089
+
1090
+ Returns:
1091
+ gspread.Spreadsheet: The created spreadsheet object.
777
1092
  """
778
1093
  spreadsheet = client.create(filename)
779
1094
  worksheet = spreadsheet.sheet1
@@ -783,17 +1098,34 @@ def create_google_spreadsheet(client, df, filename:str, worksheet_name:str = "Sh
783
1098
 
784
1099
  return spreadsheet
785
1100
 
786
- def share_google_spreadsheet(spreadsheet, email, user_type="user", user_role="writer", notify=False, email_message=None, with_link=False):
1101
+ def share_google_spreadsheet(spreadsheet: gspread.Spreadsheet, email: str, user_type: str = "user", user_role: str = "writer", notify: bool = False, email_message: str = None, with_link: bool = False) -> gspread.Spreadsheet:
787
1102
  """
788
- Share a spreadsheet with a user
1103
+ Share a spreadsheet with a user.
1104
+
1105
+ Parameters:
1106
+ - spreadsheet (gspread.Spreadsheet): The Google spreadsheet object to be shared.
1107
+ - email (str): The email address of the user with whom the spreadsheet will be shared.
1108
+ - user_type (str, optional): The permission type for the user. Defaults to "user".
1109
+ - user_role (str, optional): The role assigned to the user. Defaults to "writer".
1110
+ - notify (bool, optional): Whether to notify the user about the sharing. Defaults to False.
1111
+ - email_message (str, optional): The message to include in the notification email.
1112
+ - with_link (bool, optional): Whether to include a link to the shared document in the notification email. Defaults to False.
1113
+
1114
+ Returns:
1115
+ gspread.Spreadsheet: The updated spreadsheet object.
789
1116
  """
790
- spreadsheet.share(email, perm_type=user_type, role=user_role, notify = notify, email_message=email_message, with_link=with_link)
1117
+ spreadsheet.share(email, perm_type=user_type, role=user_role, notify=notify, email_message=email_message, with_link=with_link)
791
1118
  return spreadsheet
792
1119
 
793
-
794
- def generate_short_id(variables : dict):
1120
+ def generate_short_id(variables: dict) -> tuple[str, str]:
795
1121
  """
796
- Generate a 8 characters ID using a dict as input
1122
+ Generate an 8-character ID using a dictionary as input.
1123
+
1124
+ Parameters:
1125
+ - variables (dict): A dictionary containing the variables to be serialized.
1126
+
1127
+ Returns:
1128
+ tuple: A tuple containing the generated short ID and the serialized variables.
797
1129
  """
798
1130
  # Serialize variables into JSON string
799
1131
  serialized_variables = json.dumps(variables, sort_keys=True)
@@ -803,7 +1135,7 @@ def generate_short_id(variables : dict):
803
1135
  short_id = hash_value[:8]
804
1136
  return short_id, serialized_variables
805
1137
 
806
- def df_transform_column_as_list(column):
1138
+ def df_transform_column_as_list(column: pd.Series) -> pd.Series:
807
1139
  def transform(cell):
808
1140
  if isinstance(cell, str):
809
1141
  # Check if it's a list formatted as string, and convert to list
@@ -812,9 +1144,7 @@ def df_transform_column_as_list(column):
812
1144
  else:
813
1145
  try:
814
1146
  values = ast.literal_eval(cell)
815
-
816
1147
  except Exception as e:
817
- pass
818
1148
  # If it's a single URL as string, make it a list
819
1149
  values = [cell]
820
1150
  elif isinstance(cell, (int, float, bool)):
@@ -832,7 +1162,11 @@ def df_transform_column_as_list(column):
832
1162
  return column.apply(transform)
833
1163
 
834
1164
 
835
- def top_rows_per_category(df, col_to_sort, col_to_gb, cols_to_keep, top_rows) :
1165
+ def top_rows_per_category(df: pd.DataFrame,
1166
+ col_to_sort: str,
1167
+ col_to_gb: str,
1168
+ cols_to_keep: list[str],
1169
+ top_rows: int) -> pd.DataFrame:
836
1170
  """
837
1171
  Select top rows for each category in a dataframe
838
1172
  """
@@ -842,3 +1176,44 @@ def top_rows_per_category(df, col_to_sort, col_to_gb, cols_to_keep, top_rows) :
842
1176
  .reset_index(drop=True)
843
1177
  )[cols_to_keep]
844
1178
  return df_gb
1179
+
1180
+ def format_number(number: int) -> str:
1181
+ """
1182
+ Function to format a number in K, M or B
1183
+ """
1184
+ if number < 1000:
1185
+ return str(number)
1186
+ elif number < 1000000:
1187
+ return f"{number / 1000:.1f}K"
1188
+ elif number < 1000000000:
1189
+ return f"{number / 1000000:.1f}M"
1190
+ else:
1191
+ return f"{number / 1000000000:.1f}B"
1192
+
1193
+
1194
+
1195
+ def unrar_file(rar_file_path : str, output_dir : str) -> None:
1196
+ """
1197
+ Extracts a .rar file to the specified output directory using the unrar command.
1198
+
1199
+ Parameters:
1200
+ rar_file_path (str): The path to the .rar file.
1201
+ output_dir (str): The directory where the contents should be extracted.
1202
+
1203
+ Returns:
1204
+ None
1205
+ """
1206
+ try:
1207
+ # Ensure the output directory exists
1208
+ subprocess.run(['mkdir', '-p', output_dir], check=True)
1209
+
1210
+ # Run the unrar command
1211
+ result = subprocess.run(['unrar', 'x', '-y', rar_file_path, output_dir],
1212
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
1213
+
1214
+ # Check if the extraction was successful
1215
+ if result.returncode != 0:
1216
+ print(f"Extraction failed. Error: {result.stderr}")
1217
+
1218
+ except Exception as e:
1219
+ print(f"An error occurred: {e}")