moducomp 0.7.7__py3-none-any.whl → 0.7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
moducomp/moducomp.py CHANGED
@@ -44,6 +44,57 @@ from typing import Any, Dict, List, Optional, Set, Tuple, Union
44
44
  import pandas as pd
45
45
  import typer
46
46
 
47
+ RESOURCE_SUMMARIES: List[Dict[str, Any]] = []
48
+
49
+
50
+ def _get_logger() -> logging.Logger:
51
+ return logging.getLogger("ModuComp")
52
+
53
+
54
+ def _log_lines(logger: logging.Logger, message: Union[str, List[str]], level: int = logging.INFO) -> None:
55
+ if isinstance(message, (list, tuple)):
56
+ lines = message
57
+ else:
58
+ lines = str(message).splitlines()
59
+ for line in lines:
60
+ if line.strip():
61
+ logger.log(level, line)
62
+
63
+
64
+ def _log_or_print(
65
+ message: str,
66
+ logger: Optional[logging.Logger] = None,
67
+ level: int = logging.INFO,
68
+ color: str = "white",
69
+ err: bool = False,
70
+ verbose: bool = True,
71
+ force: bool = False,
72
+ ) -> None:
73
+ logger = logger or _get_logger()
74
+ has_handlers = bool(getattr(logger, "handlers", []))
75
+
76
+ if level < logging.WARNING and not verbose and not force:
77
+ if has_handlers:
78
+ _log_lines(logger, message, logging.DEBUG)
79
+ return
80
+
81
+ if has_handlers:
82
+ _log_lines(logger, message, level)
83
+ else:
84
+ typer.secho(message, fg=color, err=err)
85
+
86
+
87
+ def log_info(message: str, logger: Optional[logging.Logger] = None) -> None:
88
+ _log_or_print(message, logger=logger, level=logging.INFO, color="white", err=False, force=True)
89
+
90
+
91
+ def log_warning(message: str, logger: Optional[logging.Logger] = None) -> None:
92
+ _log_or_print(message, logger=logger, level=logging.WARNING, color="yellow", err=True, force=True)
93
+
94
+
95
+ def log_error(message: str, logger: Optional[logging.Logger] = None) -> None:
96
+ _log_or_print(message, logger=logger, level=logging.ERROR, color="red", err=True, force=True)
97
+
47
98
  def _data_roots() -> List[Path]:
48
99
  roots: List[Path] = []
49
100
  env_root = os.environ.get("MODUCOMP_DATA_DIR")
@@ -123,31 +174,22 @@ def require_eggnog_data_dir(eggnog_data_dir: Optional[str], logger: Optional[log
123
174
  return data_dir
124
175
  def conditional_output(message: str, color: str = "white", verbose: bool = True) -> None:
125
176
  """
126
- Print message to terminal only if verbose mode is enabled.
177
+ Emit a progress message with optional verbosity gating.
127
178
 
128
179
  Parameters
129
180
  ----------
130
181
  message : str
131
182
  Message to display
132
183
  color : str, optional
133
- Color for the message, by default "white"
184
+ Color for the message when falling back to console output
134
185
  verbose : bool, optional
135
- Whether to display the message, by default True
186
+ Whether to display the message at INFO level (otherwise DEBUG)
136
187
  """
137
- if not verbose:
138
- return
139
- logger = logging.getLogger("ModuComp")
140
- if logger.handlers:
141
- logger.info(message)
142
- else:
143
- typer.secho(message, fg=color)
188
+ _log_or_print(message, level=logging.INFO, color=color, err=False, verbose=verbose, force=False)
144
189
 
145
190
  def emit_error(message: str, logger: Optional[logging.Logger] = None) -> None:
146
- """Log and emit an error to both stdout and stderr."""
147
- if logger:
148
- logger.error(message)
149
- typer.secho(f"ERROR: {message}", fg="red", err=True)
150
- typer.secho(f"ERROR: {message}", fg="red")
191
+ """Log and emit an error to stderr."""
192
+ log_error(message, logger=logger)
151
193
 
152
194
 
153
195
  def format_bytes(num_bytes: float) -> str:
@@ -222,15 +264,20 @@ def run_subprocess_with_logging(
222
264
  logger.debug("Working directory: %s", os.getcwd())
223
265
 
224
266
  try:
267
+ output_level = logging.INFO if verbose else logging.DEBUG
268
+ error_level = logging.WARNING if verbose else logging.DEBUG
269
+
225
270
  def stream_reader(stream, q, stream_type):
226
- """Read from stream and put lines in queue"""
271
+ """Read from stream and put lines in queue."""
227
272
  try:
228
273
  while True:
229
274
  line = stream.readline()
230
275
  if not line:
231
276
  break
232
- line = line.rstrip('\n\r')
233
- q.put((stream_type, line)) # Put all lines, even empty ones
277
+ line = line.rstrip("\n\r")
278
+ if not line:
279
+ continue
280
+ q.put((stream_type, line))
234
281
  stream.close()
235
282
  except Exception:
236
283
  pass
@@ -282,13 +329,10 @@ def run_subprocess_with_logging(
282
329
  stream_type, line = stdout_queue.get_nowait()
283
330
  if stream_type == 'stdout':
284
331
  stdout_lines.append(line)
285
- # Stream to console immediately
286
- if verbose:
287
- print(line, flush=True)
288
332
  if logger:
289
- logger.debug("STDOUT: %s", line)
290
- if logger:
291
- logger.info(line)
333
+ _log_lines(logger, line, output_level)
334
+ elif verbose:
335
+ print(line, flush=True)
292
336
  last_output_time = current_time
293
337
  output_received = True
294
338
  except queue.Empty:
@@ -299,13 +343,10 @@ def run_subprocess_with_logging(
299
343
  stream_type, line = stderr_queue.get_nowait()
300
344
  if stream_type == 'stderr':
301
345
  stderr_lines.append(line)
302
- # Stream to console immediately
303
- if verbose:
304
- print(line, file=sys.stderr, flush=True)
305
346
  if logger:
306
- logger.debug("STDERR: %s", line)
307
- if logger:
308
- logger.warning(line)
347
+ _log_lines(logger, line, error_level)
348
+ elif verbose:
349
+ print(line, file=sys.stderr, flush=True)
309
350
  last_output_time = current_time
310
351
  output_received = True
311
352
  except queue.Empty:
@@ -314,10 +355,11 @@ def run_subprocess_with_logging(
314
355
  # Show progress message if no output for a while
315
356
  if not output_received and current_time - last_output_time > progress_interval:
316
357
  elapsed = int(current_time - last_output_time)
317
- if verbose:
318
- print(f" ... still running (no output for {elapsed}s)", flush=True)
358
+ message = f"Process still running, no output for {elapsed} seconds"
319
359
  if logger:
320
- logger.info(f"Process still running, no output for {int(current_time - last_output_time)} seconds")
360
+ _log_lines(logger, message, output_level)
361
+ elif verbose:
362
+ print(message, flush=True)
321
363
  last_output_time = current_time
322
364
 
323
365
  # Small delay to prevent busy waiting
@@ -333,10 +375,10 @@ def run_subprocess_with_logging(
333
375
  stream_type, line = stdout_queue.get_nowait()
334
376
  if stream_type == 'stdout':
335
377
  stdout_lines.append(line)
336
- if verbose:
337
- print(line, flush=True)
338
378
  if logger:
339
- logger.info(line)
379
+ _log_lines(logger, line, output_level)
380
+ elif verbose:
381
+ print(line, flush=True)
340
382
  except queue.Empty:
341
383
  break
342
384
 
@@ -345,10 +387,10 @@ def run_subprocess_with_logging(
345
387
  stream_type, line = stderr_queue.get_nowait()
346
388
  if stream_type == 'stderr':
347
389
  stderr_lines.append(line)
348
- if verbose:
349
- print(line, file=sys.stderr, flush=True)
350
390
  if logger:
351
- logger.warning(line)
391
+ _log_lines(logger, line, error_level)
392
+ elif verbose:
393
+ print(line, file=sys.stderr, flush=True)
352
394
  except queue.Empty:
353
395
  break
354
396
 
@@ -363,9 +405,7 @@ def run_subprocess_with_logging(
363
405
 
364
406
  except Exception as e:
365
407
  error_msg = f"Exception running command {' '.join(cmd)}: {str(e)}"
366
- if logger:
367
- logger.error(error_msg)
368
- print(f"ERROR: {error_msg}", file=sys.stderr)
408
+ log_error(error_msg, logger=logger)
369
409
  return -1, "", str(e)
370
410
 
371
411
 
@@ -494,23 +534,33 @@ def run_subprocess_with_resource_monitoring(
494
534
  f.write(f"# {key}: {value}\n")
495
535
  f.write("\n")
496
536
 
497
- # Display resource summary
498
- if verbose:
499
- conditional_output("\nResource Usage Summary:", "cyan", verbose)
500
- conditional_output(f" Wall Clock Time: {elapsed_seconds}s", "white", verbose)
501
- conditional_output(f" CPU Time (User): {user_time}s", "white", verbose)
502
- conditional_output(f" CPU Time (System): {system_time}s", "white", verbose)
503
- conditional_output(f" CPU Usage: {cpu_percent}", "white", verbose)
504
- conditional_output(f" Peak RAM Usage: {max_ram_gb_str} GB", "white", verbose)
505
- conditional_output(f" Exit Code: {exit_status}\n", "white", verbose)
537
+ RESOURCE_SUMMARIES.append(
538
+ {
539
+ "description": description,
540
+ "command": cmd_str,
541
+ "elapsed_seconds": elapsed_seconds,
542
+ "user_time": user_time,
543
+ "system_time": system_time,
544
+ "cpu_percent": cpu_percent,
545
+ "max_ram_gb": max_ram_gb_str,
546
+ "exit_status": exit_status,
547
+ }
548
+ )
506
549
 
507
550
  if logger:
508
- logger.info(f"Resource usage - Wall time: {elapsed_seconds}s, CPU: {cpu_percent}, Peak RAM: {max_ram_gb_str} GB")
551
+ logger.debug(
552
+ "Resource usage recorded for %s (wall=%ss, cpu=%s, peak_ram=%s GB).",
553
+ description,
554
+ elapsed_seconds,
555
+ cpu_percent,
556
+ max_ram_gb_str,
557
+ )
509
558
 
510
559
  except Exception as e:
511
560
  if logger:
512
- logger.warning(f"Failed to parse resource usage: {str(e)}")
513
- conditional_output(f"Warning: Could not parse resource usage: {str(e)}", "yellow", verbose)
561
+ logger.warning("Failed to parse resource usage: %s", str(e))
562
+ else:
563
+ log_warning(f"Failed to parse resource usage: {str(e)}")
514
564
 
515
565
  # Clean up temporary file
516
566
  try:
@@ -521,7 +571,8 @@ def run_subprocess_with_resource_monitoring(
521
571
  else:
522
572
  if logger:
523
573
  logger.warning("Resource monitoring file not found")
524
- conditional_output("Warning: Resource monitoring output not found", "yellow", verbose)
574
+ else:
575
+ log_warning("Resource monitoring output not found")
525
576
 
526
577
  return returncode, stdout, stderr
527
578
 
@@ -549,14 +600,53 @@ def log_final_resource_summary(resource_log_file: str, total_start_time: float,
549
600
  f.write(f"Pipeline completed at: {end_time.strftime('%Y-%m-%d %H:%M:%S')}\n")
550
601
  f.write(f"Total pipeline elapsed time: {total_elapsed:.2f} seconds ({total_elapsed/60:.2f} minutes)\n")
551
602
 
552
- if verbose:
553
- conditional_output("Resource usage summary saved.", "green", verbose)
554
- conditional_output(f"Resource log: {resource_log_file}", "white", verbose)
555
- conditional_output(f"Total pipeline time: {total_elapsed:.2f}s ({total_elapsed/60:.2f}min)", "white", verbose)
556
-
557
603
  if logger:
558
- logger.info(f"Resource usage summary completed. Total time: {total_elapsed:.2f}s")
559
- logger.info(f"Resource log saved to: {resource_log_file}")
604
+ _log_lines(
605
+ logger,
606
+ [
607
+ "Resource usage summary completed.",
608
+ f"Resource log: {resource_log_file}",
609
+ f"Total pipeline time: {total_elapsed:.2f}s ({total_elapsed/60:.2f}min)",
610
+ ],
611
+ logging.INFO,
612
+ )
613
+ log_resource_usage_summary(logger)
614
+ else:
615
+ _log_or_print(
616
+ f"Resource log: {resource_log_file}",
617
+ level=logging.INFO,
618
+ verbose=verbose,
619
+ force=True,
620
+ )
621
+
622
+
623
+ def log_resource_usage_summary(logger: Optional[logging.Logger] = None) -> None:
624
+ """Log a per-command resource usage summary at the end of the pipeline."""
625
+ if not RESOURCE_SUMMARIES:
626
+ return
627
+ logger = logger or _get_logger()
628
+ if not getattr(logger, "handlers", []):
629
+ return
630
+
631
+ _log_lines(logger, "Resource usage summary (per command):", logging.INFO)
632
+ for entry in RESOURCE_SUMMARIES:
633
+ description = entry.get("description", "Command")
634
+ wall = entry.get("elapsed_seconds", "N/A")
635
+ user_time = entry.get("user_time", "N/A")
636
+ system_time = entry.get("system_time", "N/A")
637
+ cpu = entry.get("cpu_percent", "N/A")
638
+ ram = entry.get("max_ram_gb", "N/A")
639
+ exit_status = entry.get("exit_status", "N/A")
640
+ wall_display = f"{wall}s" if wall not in ("N/A", None, "") else "N/A"
641
+ user_display = f"{user_time}s" if user_time not in ("N/A", None, "") else "N/A"
642
+ system_display = f"{system_time}s" if system_time not in ("N/A", None, "") else "N/A"
643
+ ram_display = f"{ram} GB" if ram not in ("N/A", None, "") else "N/A"
644
+ line = (
645
+ f" - {description}: wall={wall_display}, user={user_display}, "
646
+ f"system={system_display}, cpu={cpu}, peak_ram={ram_display}, "
647
+ f"exit={exit_status}"
648
+ )
649
+ _log_lines(logger, line, logging.INFO)
560
650
 
561
651
 
562
652
  def display_pipeline_completion_summary(start_time: float, savedir: str, logger: Optional[logging.Logger] = None, verbose: bool = True) -> None:
@@ -605,19 +695,19 @@ def display_pipeline_completion_summary(start_time: float, savedir: str, logger:
605
695
  if complementarity_files > 0:
606
696
  output_files.append(f"{complementarity_files} complementarity report(s)")
607
697
 
608
- if verbose:
609
- conditional_output("Pipeline completed.", "green", verbose)
610
- conditional_output(f"Total execution time: {time_str} ({total_elapsed:.2f} seconds)", "white", verbose)
611
- conditional_output(f"Output directory: {savedir}", "white", verbose)
612
- conditional_output(f"Generated files: {', '.join(output_files) if output_files else 'None'}", "white", verbose)
613
- conditional_output(f"Completed at: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", "white", verbose)
698
+ summary_lines = [
699
+ "Pipeline completed.",
700
+ f"Total execution time: {time_str} ({total_elapsed:.2f} seconds)",
701
+ f"Output directory: {savedir}",
702
+ f"Generated files: {', '.join(output_files) if output_files else 'None'}",
703
+ f"Completed at: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
704
+ ]
614
705
 
615
706
  if logger:
616
- logger.info("Pipeline completed.")
617
- logger.info(f"Total execution time: {time_str} ({total_elapsed:.2f} seconds)")
618
- logger.info(f"Output directory: {savedir}")
619
- logger.info(f"Generated files: {', '.join(output_files) if output_files else 'None'}")
620
- logger.info(f"Completed at: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
707
+ _log_lines(logger, summary_lines, logging.INFO)
708
+ else:
709
+ for line in summary_lines:
710
+ _log_or_print(line, level=logging.INFO, verbose=verbose, force=True)
621
711
 
622
712
 
623
713
  def parse_emapper_annotations(emapper_file_path: str, logger: Optional[logging.Logger] = None) -> Dict[str, Dict[str, List[str]]]:
@@ -836,8 +926,9 @@ def configure_logging(log_level: str, log_dir: Union[str, Path]) -> logging.Logg
836
926
 
837
927
  logger = logging.getLogger("ModuComp")
838
928
  numeric_level = getattr(logging, log_level.upper(), logging.INFO)
839
- logger.setLevel(numeric_level)
929
+ logger.setLevel(logging.DEBUG)
840
930
  logger.handlers.clear()
931
+ logger.propagate = False
841
932
 
842
933
  formatter = logging.Formatter(
843
934
  fmt="%(asctime)s %(levelname)s %(name)s: %(message)s",
@@ -848,12 +939,22 @@ def configure_logging(log_level: str, log_dir: Union[str, Path]) -> logging.Logg
848
939
  file_handler.setLevel(logging.DEBUG)
849
940
  file_handler.setFormatter(formatter)
850
941
 
851
- console_handler = logging.StreamHandler()
852
- console_handler.setLevel(numeric_level)
853
- console_handler.setFormatter(formatter)
942
+ class _BelowWarningFilter(logging.Filter):
943
+ def filter(self, record: logging.LogRecord) -> bool:
944
+ return record.levelno < logging.WARNING
945
+
946
+ stdout_handler = logging.StreamHandler(stream=sys.stdout)
947
+ stdout_handler.setLevel(numeric_level)
948
+ stdout_handler.setFormatter(formatter)
949
+ stdout_handler.addFilter(_BelowWarningFilter())
950
+
951
+ stderr_handler = logging.StreamHandler(stream=sys.stderr)
952
+ stderr_handler.setLevel(logging.WARNING)
953
+ stderr_handler.setFormatter(formatter)
854
954
 
855
955
  logger.addHandler(file_handler)
856
- logger.addHandler(console_handler)
956
+ logger.addHandler(stdout_handler)
957
+ logger.addHandler(stderr_handler)
857
958
  logger.debug("Logging initialised at level %s", logging.getLevelName(numeric_level))
858
959
  logger.info(f"Log file created at: {log_file}")
859
960
  return logger
@@ -903,10 +1004,9 @@ def how_many_genomes(genomedir: str, verbose: bool = True):
903
1004
  """
904
1005
  n_files = len(get_path_to_each_genome(genomedir))
905
1006
  if n_files > 0:
906
- conditional_output(f"OK: {n_files} faa files were found in '{genomedir}'\n", "white", verbose)
1007
+ conditional_output(f"OK: {n_files} faa files were found in '{genomedir}'", "white", verbose)
907
1008
  else:
908
- # Always show errors regardless of verbose setting
909
- typer.secho(f"ERROR: No FAA files were found in '{genomedir}'\n", fg="red")
1009
+ log_error(f"No FAA files were found in '{genomedir}'")
910
1010
  exit()
911
1011
 
912
1012
 
@@ -921,12 +1021,12 @@ def create_output_dir(savedir: str, verbose: bool = True):
921
1021
  verbose : bool
922
1022
  Whether to display detailed output
923
1023
  """
924
- conditional_output("\nCreating output directory", "green", verbose)
1024
+ conditional_output("Creating output directory", "green", verbose)
925
1025
  if os.path.exists(savedir):
926
- conditional_output(f"OK: Output directory already exists at: {savedir}\n", "white", verbose)
1026
+ conditional_output(f"OK: Output directory already exists at: {savedir}", "white", verbose)
927
1027
  else:
928
1028
  os.makedirs(savedir, exist_ok=True)
929
- conditional_output(f"OK: Output directory created at: {savedir}\n", "white", verbose)
1029
+ conditional_output(f"OK: Output directory created at: {savedir}", "white", verbose)
930
1030
 
931
1031
 
932
1032
  def get_tmp_dir(savedir:str) -> str:
@@ -958,13 +1058,13 @@ def create_tmp_dir(savedir: str, verbose: bool = True):
958
1058
  verbose : bool
959
1059
  Whether to display detailed output
960
1060
  """
961
- conditional_output("\nCreating tmp dir", "green", verbose)
1061
+ conditional_output("Creating tmp dir", "green", verbose)
962
1062
  tmp_dir_path = get_tmp_dir(savedir)
963
1063
  if (os.path.exists(tmp_dir_path)):
964
- conditional_output(f"OK: Tmp directory already exists at: {tmp_dir_path}\n", "white", verbose)
1064
+ conditional_output(f"OK: Tmp directory already exists at: {tmp_dir_path}", "white", verbose)
965
1065
  else:
966
1066
  os.mkdir(tmp_dir_path)
967
- conditional_output(f"OK: Tmp directory created at: {tmp_dir_path}\n", "white", verbose)
1067
+ conditional_output(f"OK: Tmp directory created at: {tmp_dir_path}", "white", verbose)
968
1068
 
969
1069
 
970
1070
  def adapt_fasta_headers(genomedir: str, savedir: str, verbose: bool = True) -> None:
@@ -985,11 +1085,11 @@ def adapt_fasta_headers(genomedir: str, savedir: str, verbose: bool = True) -> N
985
1085
  verbose : bool
986
1086
  Whether to display detailed output
987
1087
  """
988
- conditional_output("\nModifying fasta headers", "green", verbose)
1088
+ conditional_output("Modifying fasta headers", "green", verbose)
989
1089
  path_to_each_genome = get_path_to_each_genome(genomedir)
990
1090
  output_dir = f"{get_tmp_dir(savedir)}/faa"
991
1091
  if os.path.exists(output_dir):
992
- conditional_output(f"OK: Fasta headers already modified at: {output_dir}\n", "white", verbose)
1092
+ conditional_output(f"OK: Fasta headers already modified at: {output_dir}", "white", verbose)
993
1093
  return
994
1094
 
995
1095
  os.mkdir(output_dir)
@@ -1005,7 +1105,7 @@ def adapt_fasta_headers(genomedir: str, savedir: str, verbose: bool = True) -> N
1005
1105
  i+=1
1006
1106
  else:
1007
1107
  outfile.write(line)
1008
- conditional_output(f"OK: Fasta headers modified at: {output_dir}\n", "white", verbose)
1108
+ conditional_output(f"OK: Fasta headers modified at: {output_dir}", "white", verbose)
1009
1109
 
1010
1110
 
1011
1111
  def copy_faa_to_tmp(genomedir: str, savedir: str, verbose: bool = True) -> None:
@@ -1025,18 +1125,18 @@ def copy_faa_to_tmp(genomedir: str, savedir: str, verbose: bool = True) -> None:
1025
1125
  verbose : bool
1026
1126
  Whether to display detailed output
1027
1127
  """
1028
- conditional_output("\nCopying faa files to tmp dir", "green", verbose)
1128
+ conditional_output("Copying faa files to tmp dir", "green", verbose)
1029
1129
  path_to_each_genome = get_path_to_each_genome(genomedir)
1030
1130
  output_dir = f"{get_tmp_dir(savedir)}/faa"
1031
1131
  if os.path.exists(output_dir):
1032
- conditional_output(f"OK: Fasta files already exist at: {output_dir}\n", "white", verbose)
1132
+ conditional_output(f"OK: Fasta files already exist at: {output_dir}", "white", verbose)
1033
1133
  return
1034
1134
 
1035
1135
  os.mkdir(output_dir)
1036
1136
  conditional_output("Copying genome files to temporary directory...", "yellow", verbose)
1037
1137
  for each_file in path_to_each_genome:
1038
1138
  shutil.copy(each_file, output_dir)
1039
- conditional_output(f"OK: Fasta files copied to: {output_dir}\n", "white", verbose)
1139
+ conditional_output(f"OK: Fasta files copied to: {output_dir}", "white", verbose)
1040
1140
 
1041
1141
 
1042
1142
  def merge_genomes(savedir: str, logger: Optional[logging.Logger] = None, verbose: bool = True) -> bool:
@@ -1057,13 +1157,13 @@ def merge_genomes(savedir: str, logger: Optional[logging.Logger] = None, verbose
1057
1157
  bool
1058
1158
  True if the merged file was created or already exists, False otherwise
1059
1159
  """
1060
- conditional_output("\nMerging genomes", "green", verbose)
1160
+ conditional_output("Merging genomes", "green", verbose)
1061
1161
  genome_file_paths = glob.glob(f"{get_tmp_dir(savedir)}/faa/*.faa")
1062
1162
  output_file = f"{get_tmp_dir(savedir)}/merged_genomes.faa"
1063
1163
 
1064
1164
 
1065
1165
  if os.path.exists(output_file):
1066
- conditional_output(f"OK: Merged genomes file already exists at: {output_file}\n", "white", verbose)
1166
+ conditional_output(f"OK: Merged genomes file already exists at: {output_file}", "white", verbose)
1067
1167
  if logger:
1068
1168
  logger.info(f"Using existing merged genomes file: {output_file}")
1069
1169
  return True
@@ -1071,10 +1171,7 @@ def merge_genomes(savedir: str, logger: Optional[logging.Logger] = None, verbose
1071
1171
 
1072
1172
  if not genome_file_paths:
1073
1173
  error_msg = f"No FAA files found in {get_tmp_dir(savedir)}/faa/"
1074
- # Always show errors regardless of verbose setting
1075
- typer.secho(f"ERROR: {error_msg}", fg="red")
1076
- if logger:
1077
- logger.error(error_msg)
1174
+ log_error(error_msg, logger=logger)
1078
1175
  return False
1079
1176
 
1080
1177
  conditional_output("Merging individual genome files...", "yellow", verbose)
@@ -1084,16 +1181,13 @@ def merge_genomes(savedir: str, logger: Optional[logging.Logger] = None, verbose
1084
1181
  with open(each_file) as infile:
1085
1182
  for line in infile:
1086
1183
  outfile.write(line)
1087
- conditional_output(f"OK: Fasta files merged at: {output_file}\n", "white", verbose)
1184
+ conditional_output(f"OK: Fasta files merged at: {output_file}", "white", verbose)
1088
1185
  if logger:
1089
1186
  logger.info(f"Successfully created merged genome file: {output_file}")
1090
1187
  return True
1091
1188
  except Exception as e:
1092
1189
  error_msg = f"Error merging genome files: {str(e)}"
1093
- # Always show errors regardless of verbose setting
1094
- typer.secho(f"ERROR: {error_msg}", fg="red")
1095
- if logger:
1096
- logger.error(error_msg)
1190
+ log_error(error_msg, logger=logger)
1097
1191
  return False
1098
1192
 
1099
1193
 
@@ -1117,12 +1211,16 @@ def run_emapper(savedir: str, ncpus: int, resource_log_file: str, lowmem: bool =
1117
1211
  bool
1118
1212
  True if emapper ran successfully or outputs already exist, False otherwise
1119
1213
  """
1120
- conditional_output("\nStarting eggNOG-mapper", "green", verbose)
1214
+ conditional_output("Starting eggNOG-mapper", "green", verbose)
1121
1215
 
1122
1216
 
1123
1217
  final_emapper_annotation_file = f"{savedir}/emapper_out.emapper.annotations"
1124
1218
  if os.path.exists(final_emapper_annotation_file):
1125
- typer.secho(f"OK: Emapper annotations already exist at: {final_emapper_annotation_file}\n", fg="white")
1219
+ conditional_output(
1220
+ f"OK: Emapper annotations already exist at: {final_emapper_annotation_file}",
1221
+ "white",
1222
+ verbose,
1223
+ )
1126
1224
  if logger:
1127
1225
  logger.info(f"Using existing emapper annotations: {final_emapper_annotation_file}")
1128
1226
  return True
@@ -1135,14 +1233,16 @@ def run_emapper(savedir: str, ncpus: int, resource_log_file: str, lowmem: bool =
1135
1233
 
1136
1234
  if not os.path.exists(merged_genomes_file):
1137
1235
  error_msg = f"Merged genomes file not found at: {merged_genomes_file}"
1138
- typer.secho(f"ERROR: {error_msg}", fg="red")
1139
- if logger:
1140
- logger.error(error_msg)
1236
+ log_error(error_msg, logger=logger)
1141
1237
  return False
1142
1238
 
1143
1239
 
1144
1240
  if os.path.exists(emapper_tmp_file):
1145
- typer.secho(f"OK: Emapper output already exists at: {emapper_tmp_file}\n", fg="white")
1241
+ conditional_output(
1242
+ f"OK: Emapper output already exists at: {emapper_tmp_file}",
1243
+ "white",
1244
+ verbose,
1245
+ )
1146
1246
  if logger:
1147
1247
  logger.info(f"Using existing emapper output from temporary directory: {emapper_tmp_file}")
1148
1248
 
@@ -1185,41 +1285,40 @@ def run_emapper(savedir: str, ncpus: int, resource_log_file: str, lowmem: bool =
1185
1285
  error_msg = f"emapper failed with return code {returncode}"
1186
1286
  if stderr:
1187
1287
  error_msg += f": {stderr}"
1188
- typer.secho(f"ERROR: {error_msg}", fg="red")
1189
- if logger:
1190
- logger.error(error_msg)
1288
+ log_error(error_msg, logger=logger)
1191
1289
  return False
1192
1290
 
1193
1291
 
1194
1292
  if logger and stdout:
1195
- logger.info(f"emapper stdout summary: {stdout[:500]}{'...' if len(stdout) > 500 else ''}")
1293
+ summary = stdout[:500] + ("..." if len(stdout) > 500 else "")
1294
+ _log_lines(logger, f"emapper stdout summary:\n{summary}", logging.INFO)
1196
1295
 
1197
1296
 
1198
1297
  if not os.path.exists(emapper_tmp_file):
1199
1298
  error_msg = f"emapper did not generate expected output: {emapper_tmp_file}"
1200
- typer.secho(f"ERROR: {error_msg}", fg="red")
1201
- if logger:
1202
- logger.error(error_msg)
1299
+ log_error(error_msg, logger=logger)
1203
1300
  return False
1204
1301
 
1205
1302
 
1206
1303
  shutil.copy(emapper_tmp_file, final_emapper_annotation_file)
1207
1304
 
1208
- typer.secho(f"OK: emapper output saved at: {output_folder_emapper}\n", fg="white")
1209
- typer.secho(f"OK: emapper annotations copied to: {final_emapper_annotation_file}\n", fg="white")
1305
+ conditional_output(f"OK: emapper output saved at: {output_folder_emapper}", "white", verbose)
1306
+ conditional_output(
1307
+ f"OK: emapper annotations copied to: {final_emapper_annotation_file}",
1308
+ "white",
1309
+ verbose,
1310
+ )
1210
1311
  if logger:
1211
1312
  logger.info(f"Successfully ran emapper and saved annotations to: {final_emapper_annotation_file}")
1212
1313
  return True
1213
1314
 
1214
1315
  except Exception as e:
1215
1316
  error_msg = f"Error running emapper: {str(e)}"
1216
- typer.secho(f"ERROR: {error_msg}", fg="red")
1217
- if logger:
1218
- logger.error(error_msg)
1317
+ log_error(error_msg, logger=logger)
1219
1318
  return False
1220
1319
 
1221
1320
 
1222
- def remove_temp_files(savedir: str, logger: Optional[logging.Logger] = None) -> None:
1321
+ def remove_temp_files(savedir: str, logger: Optional[logging.Logger] = None, verbose: bool = True) -> None:
1223
1322
  """
1224
1323
  Remove temporary files and directories.
1225
1324
 
@@ -1234,13 +1333,11 @@ def remove_temp_files(savedir: str, logger: Optional[logging.Logger] = None) ->
1234
1333
  if os.path.exists(tmp_dir):
1235
1334
  try:
1236
1335
  shutil.rmtree(tmp_dir)
1237
- typer.secho(f"OK: Temporary files removed from: {tmp_dir}", fg="white")
1336
+ conditional_output(f"OK: Temporary files removed from: {tmp_dir}", "white", verbose)
1238
1337
  if logger:
1239
1338
  logger.info(f"Removed temporary directory: {tmp_dir}")
1240
1339
  except Exception as e:
1241
- typer.secho(f"WARNING: Failed to remove temporary files: {str(e)}", fg="yellow")
1242
- if logger:
1243
- logger.warning(f"Failed to remove temporary directory {tmp_dir}: {str(e)}")
1340
+ log_warning(f"Failed to remove temporary directory {tmp_dir}: {str(e)}", logger=logger)
1244
1341
 
1245
1342
 
1246
1343
  def check_final_reports_exist(savedir: str, calculate_complementarity: int, logger: Optional[logging.Logger] = None) -> bool:
@@ -1343,18 +1440,16 @@ def generate_complementarity_report(
1343
1440
 
1344
1441
 
1345
1442
  if os.path.exists(output_file):
1443
+ conditional_output(f"OK: Complementarity report already exists at: {output_file}", "white", verbose)
1346
1444
  if logger:
1347
1445
  logger.info(f"Complementarity report already exists at {output_file}")
1348
- conditional_output(f"OK: Complementarity report already exists at: {output_file}", "white", verbose)
1349
1446
  return
1350
1447
 
1351
1448
 
1352
1449
  module_matrix_file = f"{savedir}/module_completeness.tsv"
1353
1450
  if not os.path.exists(module_matrix_file):
1354
1451
  error_msg = f"Module completeness matrix not found at: {module_matrix_file}"
1355
- if logger:
1356
- logger.error(error_msg)
1357
- typer.secho(f"ERROR: {error_msg}", fg="red")
1452
+ log_error(error_msg, logger=logger)
1358
1453
  return
1359
1454
 
1360
1455
 
@@ -1371,13 +1466,15 @@ def generate_complementarity_report(
1371
1466
  emapper_file = possible_file
1372
1467
  if logger:
1373
1468
  logger.info(f"Found emapper annotation file at: {emapper_file}")
1374
- typer.secho(f"OK: Using emapper annotations from: {emapper_file}", fg="white")
1469
+ conditional_output(f"OK: Using emapper annotations from: {emapper_file}", "white", verbose)
1375
1470
  break
1376
1471
 
1377
1472
  if not emapper_file:
1378
- if logger:
1379
- logger.warning(f"Emapper annotation file not found in any of the expected locations. Will use placeholder protein IDs.")
1380
- typer.secho(f"WARNING: Emapper annotation file not found. Will use placeholder protein IDs.", fg="yellow")
1473
+ log_warning(
1474
+ "Emapper annotation file not found in any expected location. "
1475
+ "Will use placeholder protein IDs.",
1476
+ logger=logger,
1477
+ )
1381
1478
 
1382
1479
 
1383
1480
  kpct_output_file = None
@@ -1395,9 +1492,7 @@ def generate_complementarity_report(
1395
1492
 
1396
1493
  if not kpct_output_file:
1397
1494
  error_msg = "KPCT output file not found. Cannot extract module metadata."
1398
- if logger:
1399
- logger.error(error_msg)
1400
- typer.secho(f"ERROR: {error_msg}", fg="red")
1495
+ log_error(error_msg, logger=logger)
1401
1496
  return
1402
1497
 
1403
1498
  try:
@@ -1476,10 +1571,13 @@ def generate_complementarity_report(
1476
1571
  error_msg = f"Cannot identify required module columns in KPCT output: {kpct_output_file}"
1477
1572
  if logger:
1478
1573
  logger.error(error_msg)
1479
- if module_id_col: logger.error(f"Found module_id_col: {module_id_col}")
1480
- if module_name_col: logger.error(f"Found module_name_col: {module_name_col}")
1574
+ if module_id_col:
1575
+ logger.error(f"Found module_id_col: {module_id_col}")
1576
+ if module_name_col:
1577
+ logger.error(f"Found module_name_col: {module_name_col}")
1481
1578
  logger.error(f"Available columns: {kpct_df.columns.tolist()}")
1482
- typer.secho(f"ERROR: {error_msg}", fg="red")
1579
+ else:
1580
+ log_error(error_msg, logger=logger)
1483
1581
  return
1484
1582
 
1485
1583
  if not contig_col or not matching_ko_col:
@@ -1487,7 +1585,10 @@ def generate_complementarity_report(
1487
1585
  logger.warning(f"Cannot identify contig or matching_ko columns in KPCT output.")
1488
1586
  logger.warning(f"Found contig_col: {contig_col}, matching_ko_col: {matching_ko_col}")
1489
1587
  logger.warning(f"Available columns: {kpct_df.columns.tolist()}")
1490
- typer.secho(f"WARNING: Missing columns in KPCT output may affect mapping of KOs to combinations.", fg="yellow")
1588
+ log_warning(
1589
+ "Missing columns in KPCT output may affect mapping of KOs to combinations.",
1590
+ logger=logger,
1591
+ )
1491
1592
 
1492
1593
 
1493
1594
  module_metadata = {}
@@ -1749,10 +1850,6 @@ def generate_complementarity_report(
1749
1850
 
1750
1851
  report_df.to_csv(output_file, sep='\t', index=False)
1751
1852
 
1752
- if logger:
1753
- logger.info(f"Found {len(report_df)} complementary modules in {n_members}-member combinations")
1754
- logger.info(f"Complementarity report saved to: {output_file}")
1755
-
1756
1853
  conditional_output(f"OK: Found {len(report_df)} complementary modules in {n_members}-member combinations", "green", verbose)
1757
1854
  conditional_output(f"Complementarity report saved to: {output_file}", "white", verbose)
1758
1855
  else:
@@ -1773,17 +1870,27 @@ def generate_complementarity_report(
1773
1870
  report_df = pd.DataFrame(columns=columns)
1774
1871
  report_df.to_csv(output_file, sep='\t', index=False)
1775
1872
 
1776
- typer.secho(f"WARNING: No complementary modules found in {n_members}-member combinations", fg="yellow")
1777
- typer.secho(f"Empty report saved to: {output_file}", fg="white")
1873
+ log_warning(
1874
+ f"No complementary modules found in {n_members}-member combinations",
1875
+ logger=logger,
1876
+ )
1877
+ conditional_output(f"Empty report saved to: {output_file}", "white", verbose)
1778
1878
 
1779
1879
  except Exception as e:
1780
1880
  error_msg = f"Error generating complementarity report: {str(e)}"
1781
1881
  if logger:
1782
1882
  logger.error(error_msg, exc_info=True)
1783
- typer.secho(f"ERROR: {error_msg}", fg="red")
1883
+ else:
1884
+ log_error(error_msg, logger=logger)
1784
1885
 
1785
1886
 
1786
- def ko_matrix_to_kpct_format(kos_matrix: str, savedir: str, calculate_complementarity: int = 0, logger: Optional[logging.Logger] = None) -> str:
1887
+ def ko_matrix_to_kpct_format(
1888
+ kos_matrix: str,
1889
+ savedir: str,
1890
+ calculate_complementarity: int = 0,
1891
+ logger: Optional[logging.Logger] = None,
1892
+ verbose: bool = True,
1893
+ ) -> str:
1787
1894
  """
1788
1895
  Convert KO matrix to KPCT format.
1789
1896
 
@@ -1812,7 +1919,7 @@ def ko_matrix_to_kpct_format(kos_matrix: str, savedir: str, calculate_complement
1812
1919
 
1813
1920
  initial_delimiter = ',' if kos_matrix.lower().endswith('.csv') else '\t'
1814
1921
 
1815
- typer.secho(f"Reading KO matrix file: {kos_matrix}", fg="yellow")
1922
+ conditional_output(f"Reading KO matrix file: {kos_matrix}", "yellow", verbose)
1816
1923
  if logger:
1817
1924
  logger.info(f"Reading KO matrix file with delimiter '{initial_delimiter}': {kos_matrix}")
1818
1925
 
@@ -1865,9 +1972,7 @@ def ko_matrix_to_kpct_format(kos_matrix: str, savedir: str, calculate_complement
1865
1972
 
1866
1973
  if 'taxon_oid' not in ko_df.columns:
1867
1974
  msg = "Invalid KO matrix format: missing 'taxon_oid' column"
1868
- if logger:
1869
- logger.error(msg)
1870
- typer.secho(f"ERROR: {msg}", fg="red")
1975
+ log_error(msg, logger=logger)
1871
1976
  exit(1)
1872
1977
 
1873
1978
 
@@ -1912,14 +2017,12 @@ def ko_matrix_to_kpct_format(kos_matrix: str, savedir: str, calculate_complement
1912
2017
 
1913
2018
  if logger:
1914
2019
  logger.info(f"KO matrix converted to KPCT format: {output_path}")
1915
- typer.secho(f"OK: KO matrix converted to KPCT format: {output_path}", fg="white")
2020
+ conditional_output(f"OK: KO matrix converted to KPCT format: {output_path}", "white", verbose)
1916
2021
  return output_path
1917
2022
 
1918
2023
  except Exception as e:
1919
2024
  error_msg = f"Error converting KO matrix to KPCT format: {str(e)}"
1920
- if logger:
1921
- logger.error(error_msg)
1922
- typer.secho(f"ERROR: {error_msg}", fg="red")
2025
+ log_error(error_msg, logger=logger)
1923
2026
  raise
1924
2027
 
1925
2028
 
@@ -1975,7 +2078,12 @@ def get_ko_protein_mappings_from_kpct_input(kpct_input_file: str, logger: Option
1975
2078
  return {}
1976
2079
 
1977
2080
 
1978
- def create_module_completeness_matrix(savedir: str, kpct_outprefix: str, logger: Optional[logging.Logger] = None) -> None:
2081
+ def create_module_completeness_matrix(
2082
+ savedir: str,
2083
+ kpct_outprefix: str,
2084
+ logger: Optional[logging.Logger] = None,
2085
+ verbose: bool = True,
2086
+ ) -> None:
1979
2087
  """
1980
2088
  Create a module completeness matrix from the KPCT output.
1981
2089
 
@@ -2007,9 +2115,7 @@ def create_module_completeness_matrix(savedir: str, kpct_outprefix: str, logger:
2007
2115
 
2008
2116
  if not os.path.exists(kpct_output_file):
2009
2117
  error_msg = f"KPCT output file not found: tried {kpct_outprefix}_contigs.with_weights.tsv and alternatives"
2010
- if logger:
2011
- logger.error(error_msg)
2012
- typer.secho(f"ERROR: {error_msg}", fg="red")
2118
+ log_error(error_msg, logger=logger)
2013
2119
  return
2014
2120
 
2015
2121
  try:
@@ -2109,7 +2215,8 @@ def create_module_completeness_matrix(savedir: str, kpct_outprefix: str, logger:
2109
2215
  error_msg = "Could not identify module columns in the KPCT output"
2110
2216
  if logger:
2111
2217
  logger.error(error_msg)
2112
- typer.secho(f"ERROR: {error_msg}", fg="red")
2218
+ else:
2219
+ log_error(error_msg, logger=logger)
2113
2220
  return
2114
2221
 
2115
2222
  # Build the result data
@@ -2152,19 +2259,22 @@ def create_module_completeness_matrix(savedir: str, kpct_outprefix: str, logger:
2152
2259
  logger.info(f"Matrix contains {single_genomes} single genomes out of {total_genomes} total entries")
2153
2260
  if all_genomes:
2154
2261
  logger.info(f"Expected {len(all_genomes)} single genomes from KPCT input")
2155
- typer.secho(f"OK: Module completeness matrix saved to: {output_file}", fg="white")
2262
+ conditional_output(f"OK: Module completeness matrix saved to: {output_file}", "white", verbose)
2156
2263
 
2157
2264
  except Exception as e:
2158
2265
  error_msg = f"Error creating module completeness matrix: {str(e)}"
2159
2266
  if logger:
2160
- logger.error(error_msg)
2161
- typer.secho(f"ERROR: {error_msg}", fg="red")
2162
-
2163
- if logger:
2164
- logger.error(f"Error details: {e}", exc_info=True)
2267
+ logger.error(error_msg, exc_info=True)
2268
+ else:
2269
+ log_error(error_msg, logger=logger)
2165
2270
 
2166
2271
 
2167
- def create_ko_matrix_from_emapper_annotation(emapper_file_path: str, output_file_path: str, logger: Optional[logging.Logger] = None) -> None:
2272
+ def create_ko_matrix_from_emapper_annotation(
2273
+ emapper_file_path: str,
2274
+ output_file_path: str,
2275
+ logger: Optional[logging.Logger] = None,
2276
+ verbose: bool = True,
2277
+ ) -> None:
2168
2278
  """
2169
2279
  Create a KO matrix from an eggNOG-mapper annotation file.
2170
2280
 
@@ -2203,24 +2313,22 @@ def create_ko_matrix_from_emapper_annotation(emapper_file_path: str, output_file
2203
2313
  - Removes 'ko:' prefixes and weight annotations like '(0.5)'
2204
2314
  - Skips rows with missing or '-' KO annotations
2205
2315
  """
2206
- typer.secho("\nCreating KO matrix from eggNOG-mapper annotations", fg="green")
2316
+ conditional_output("Creating KO matrix from eggNOG-mapper annotations", "green", verbose)
2207
2317
 
2208
2318
 
2209
2319
  if os.path.exists(output_file_path):
2210
- typer.secho(f"OK: KO matrix already exists at: {output_file_path}", fg="white")
2320
+ conditional_output(f"OK: KO matrix already exists at: {output_file_path}", "white", verbose)
2211
2321
  if logger:
2212
2322
  logger.info(f"KO matrix already exists at: {output_file_path}")
2213
2323
  return
2214
2324
 
2215
2325
  if not os.path.exists(emapper_file_path):
2216
2326
  error_msg = f"eMapper annotation file not found at {emapper_file_path}. Cannot proceed."
2217
- if logger:
2218
- logger.error(error_msg)
2219
- typer.secho(f"ERROR: {error_msg}", fg="red")
2327
+ log_error(error_msg, logger=logger)
2220
2328
  exit(1)
2221
2329
 
2222
2330
  try:
2223
- typer.secho("Processing eggNOG-mapper annotations and extracting KO terms...", fg="yellow")
2331
+ conditional_output("Processing eggNOG-mapper annotations and extracting KO terms...", "yellow", verbose)
2224
2332
 
2225
2333
  if logger:
2226
2334
  logger.info(f"Reading eggNOG-mapper annotations from: {emapper_file_path}")
@@ -2280,13 +2388,11 @@ def create_ko_matrix_from_emapper_annotation(emapper_file_path: str, output_file
2280
2388
 
2281
2389
  if not kos_data_for_matrix_df:
2282
2390
  error_msg = "No KO data found in the eMapper annotations file"
2283
- if logger:
2284
- logger.error(error_msg)
2285
- typer.secho(f"ERROR: {error_msg}", fg="red")
2391
+ log_error(error_msg, logger=logger)
2286
2392
  return
2287
2393
 
2288
2394
 
2289
- typer.secho("Creating KO count matrix (kos_matrix.csv)...", fg="yellow")
2395
+ conditional_output("Creating KO count matrix (kos_matrix.csv)...", "yellow", verbose)
2290
2396
  kos_count_df = pd.concat(kos_data_for_matrix_df)
2291
2397
 
2292
2398
 
@@ -2313,13 +2419,14 @@ def create_ko_matrix_from_emapper_annotation(emapper_file_path: str, output_file
2313
2419
  logger.info(f"Created KO matrix with {len(kos_count_df)} genomes and {len(kos_count_df.columns)-1} KOs")
2314
2420
  logger.info(f"KO matrix saved to: {output_file_path}")
2315
2421
 
2316
- typer.secho(f"OK: KO matrix created and saved to: {output_file_path}", fg="white")
2422
+ conditional_output(f"OK: KO matrix created and saved to: {output_file_path}", "white", verbose)
2317
2423
 
2318
2424
  except Exception as e:
2319
2425
  error_msg = f"Error creating KO matrix: {str(e)}"
2320
2426
  if logger:
2321
2427
  logger.error(error_msg, exc_info=True)
2322
- typer.secho(f"ERROR: {error_msg}", fg="red")
2428
+ else:
2429
+ log_error(error_msg, logger=logger)
2323
2430
  exit(1)
2324
2431
 
2325
2432
 
@@ -2347,16 +2454,12 @@ def check_kpct_installed(logger: Optional[logging.Logger] = None) -> bool:
2347
2454
 
2348
2455
  if give_completeness_check.returncode != 0:
2349
2456
  error_msg = "KPCT 'give_completeness' tool not found in PATH. Please install it via pip: pip install kegg-pathways-completeness"
2350
- if logger:
2351
- logger.error(error_msg)
2352
- typer.secho(f"ERROR: {error_msg}", fg="red")
2457
+ log_error(error_msg, logger=logger)
2353
2458
  return False
2354
2459
  return True
2355
2460
  except Exception as e:
2356
2461
  error_msg = f"Error checking for KPCT installation: {str(e)}"
2357
- if logger:
2358
- logger.error(error_msg)
2359
- typer.secho(f"ERROR: {error_msg}", fg="red")
2462
+ log_error(error_msg, logger=logger)
2360
2463
  return False
2361
2464
 
2362
2465
 
@@ -2441,9 +2544,7 @@ def chunk_kpct_input_file(kpct_input_file: str, savedir: str, n_chunks: int, log
2441
2544
 
2442
2545
  if not lines:
2443
2546
  error_msg = f"KPCT input file is empty: {kpct_input_file}"
2444
- if logger:
2445
- logger.error(error_msg)
2446
- typer.secho(f"ERROR: {error_msg}", fg="red")
2547
+ log_error(error_msg, logger=logger)
2447
2548
  return []
2448
2549
 
2449
2550
  # Calculate lines per chunk using ceiling division to ensure we create exactly n_chunks
@@ -2495,9 +2596,7 @@ def chunk_kpct_input_file(kpct_input_file: str, savedir: str, n_chunks: int, log
2495
2596
 
2496
2597
  except Exception as e:
2497
2598
  error_msg = f"Error chunking KPCT input file: {str(e)}"
2498
- if logger:
2499
- logger.error(error_msg)
2500
- typer.secho(f"ERROR: {error_msg}", fg="red")
2599
+ log_error(error_msg, logger=logger)
2501
2600
  return []
2502
2601
 
2503
2602
 
@@ -2636,13 +2735,18 @@ def concatenate_kpct_outputs(chunk_dirs: List[str], savedir: str, kpct_outprefix
2636
2735
 
2637
2736
  except Exception as e:
2638
2737
  error_msg = f"Error concatenating KPCT outputs: {str(e)}"
2639
- if logger:
2640
- logger.error(error_msg)
2641
- typer.secho(f"ERROR: {error_msg}", fg="red")
2738
+ log_error(error_msg, logger=logger)
2642
2739
  return False
2643
2740
 
2644
2741
 
2645
- def run_kpct_parallel(kpct_input_file: str, savedir: str, kpct_outprefix: str, ncpus: int, logger: Optional[logging.Logger] = None) -> bool:
2742
+ def run_kpct_parallel(
2743
+ kpct_input_file: str,
2744
+ savedir: str,
2745
+ kpct_outprefix: str,
2746
+ ncpus: int,
2747
+ logger: Optional[logging.Logger] = None,
2748
+ verbose: bool = True,
2749
+ ) -> bool:
2646
2750
  """
2647
2751
  Run KPCT in parallel by chunking the input file and processing chunks concurrently.
2648
2752
 
@@ -2693,7 +2797,7 @@ def run_kpct_parallel(kpct_input_file: str, savedir: str, kpct_outprefix: str, n
2693
2797
  if all(os.path.exists(f) for f in final_outputs):
2694
2798
  if logger:
2695
2799
  logger.info("KPCT output files already exist, skipping parallel processing")
2696
- typer.secho("OK: KPCT output files already exist", fg="white")
2800
+ conditional_output("OK: KPCT output files already exist", "white", verbose)
2697
2801
  return True
2698
2802
 
2699
2803
 
@@ -2702,7 +2806,7 @@ def run_kpct_parallel(kpct_input_file: str, savedir: str, kpct_outprefix: str, n
2702
2806
  if logger:
2703
2807
  logger.info(f"Running KPCT in parallel with up to {n_chunks} chunks using {ncpus} CPU cores")
2704
2808
 
2705
- typer.secho(f"Running KPCT in parallel with up to {n_chunks} chunks", fg="yellow")
2809
+ conditional_output(f"Running KPCT in parallel with up to {n_chunks} chunks", "yellow", verbose)
2706
2810
 
2707
2811
 
2708
2812
  chunks_base_dir = os.path.join(get_tmp_dir(savedir), "kpct_chunk_outputs")
@@ -2721,7 +2825,7 @@ def run_kpct_parallel(kpct_input_file: str, savedir: str, kpct_outprefix: str, n
2721
2825
  if all_chunks_exist:
2722
2826
  if logger:
2723
2827
  logger.info("All chunk outputs already exist, proceeding to concatenation")
2724
- typer.secho("OK: All chunks already processed, concatenating results", fg="white")
2828
+ conditional_output("OK: All chunks already processed, concatenating results", "white", verbose)
2725
2829
 
2726
2830
 
2727
2831
  concatenation_success = concatenate_kpct_outputs(existing_chunk_dirs, savedir, kpct_outprefix, logger)
@@ -2778,11 +2882,11 @@ def run_kpct_parallel(kpct_input_file: str, savedir: str, kpct_outprefix: str, n
2778
2882
  if not chunks_to_process:
2779
2883
  if logger:
2780
2884
  logger.info("All chunks already processed, proceeding to concatenation")
2781
- typer.secho("OK: All chunks already processed, concatenating results", fg="white")
2885
+ conditional_output("OK: All chunks already processed, concatenating results", "white", verbose)
2782
2886
  else:
2783
2887
  if logger:
2784
2888
  logger.info(f"Processing {len(chunks_to_process)} remaining chunks")
2785
- typer.secho(f"Processing {len(chunks_to_process)} remaining chunks", fg="yellow")
2889
+ conditional_output(f"Processing {len(chunks_to_process)} remaining chunks", "yellow", verbose)
2786
2890
 
2787
2891
 
2788
2892
  failed_chunks = []
@@ -2824,9 +2928,7 @@ def run_kpct_parallel(kpct_input_file: str, savedir: str, kpct_outprefix: str, n
2824
2928
 
2825
2929
  if failed_chunks:
2826
2930
  error_msg = f"Failed to process {len(failed_chunks)} chunks: {failed_chunks}"
2827
- if logger:
2828
- logger.error(error_msg)
2829
- typer.secho(f"ERROR: {error_msg}", fg="red")
2931
+ log_error(error_msg, logger=logger)
2830
2932
  return False
2831
2933
 
2832
2934
 
@@ -2837,9 +2939,7 @@ def run_kpct_parallel(kpct_input_file: str, savedir: str, kpct_outprefix: str, n
2837
2939
 
2838
2940
  if not all_chunks_exist:
2839
2941
  error_msg = f"Not all chunks were processed successfully. Expected {len(all_chunk_dirs)}, got {len(final_chunk_dirs)}"
2840
- if logger:
2841
- logger.error(error_msg)
2842
- typer.secho(f"ERROR: {error_msg}", fg="red")
2942
+ log_error(error_msg, logger=logger)
2843
2943
  return False
2844
2944
 
2845
2945
 
@@ -2855,26 +2955,29 @@ def run_kpct_parallel(kpct_input_file: str, savedir: str, kpct_outprefix: str, n
2855
2955
  missing_outputs = [f for f in final_outputs if not os.path.exists(f)]
2856
2956
  if missing_outputs:
2857
2957
  error_msg = f"Failed to create final output files: {missing_outputs}"
2858
- if logger:
2859
- logger.error(error_msg)
2860
- typer.secho(f"ERROR: {error_msg}", fg="red")
2958
+ log_error(error_msg, logger=logger)
2861
2959
  return False
2862
2960
 
2863
2961
  if logger:
2864
2962
  logger.info("Successfully completed parallel KPCT processing")
2865
- typer.secho("OK: KPCT parallel processing completed successfully", fg="green")
2963
+ conditional_output("OK: KPCT parallel processing completed successfully", "green", verbose)
2866
2964
 
2867
2965
  return True
2868
2966
 
2869
2967
  except Exception as e:
2870
2968
  error_msg = f"Error in parallel KPCT processing: {str(e)}"
2871
- if logger:
2872
- logger.error(error_msg)
2873
- typer.secho(f"ERROR: {error_msg}", fg="red")
2969
+ log_error(error_msg, logger=logger)
2874
2970
  return False
2875
2971
 
2876
2972
 
2877
- def run_kpct(kpct_input_file: str, savedir: str, kpct_outprefix: str, resource_log_file: str, logger: Optional[logging.Logger] = None) -> bool:
2973
+ def run_kpct(
2974
+ kpct_input_file: str,
2975
+ savedir: str,
2976
+ kpct_outprefix: str,
2977
+ resource_log_file: str,
2978
+ logger: Optional[logging.Logger] = None,
2979
+ verbose: bool = True,
2980
+ ) -> bool:
2878
2981
  """
2879
2982
  Run the KPCT give_completeness tool (sequential version).
2880
2983
  This function is kept as a fallback in case parallel processing fails.
@@ -2912,21 +3015,19 @@ def run_kpct(kpct_input_file: str, savedir: str, kpct_outprefix: str, resource_l
2912
3015
  resource_log_file,
2913
3016
  logger,
2914
3017
  "Running KPCT give_completeness tool (sequential)",
2915
- True
3018
+ verbose,
2916
3019
  )
2917
3020
 
2918
3021
  if returncode != 0:
2919
3022
  error_msg = f"KPCT tool failed with return code {returncode}"
2920
3023
  if stderr:
2921
3024
  error_msg += f": {stderr}"
2922
- if logger:
2923
- logger.error(error_msg)
2924
- typer.secho(f"ERROR: {error_msg}", fg="red")
3025
+ log_error(error_msg, logger=logger)
2925
3026
  return False
2926
3027
 
2927
3028
 
2928
3029
  if logger and stdout:
2929
- logger.info(f"KPCT stdout: {stdout}")
3030
+ _log_lines(logger, f"KPCT stdout:\n{stdout}", logging.INFO)
2930
3031
 
2931
3032
 
2932
3033
  possible_kpct_files = [
@@ -2939,28 +3040,36 @@ def run_kpct(kpct_input_file: str, savedir: str, kpct_outprefix: str, resource_l
2939
3040
 
2940
3041
  if not kpct_file_exists:
2941
3042
  error_msg = f"KPCT did not generate any output files with prefix '{kpct_outprefix}'"
2942
- if logger:
2943
- logger.error(error_msg)
2944
- typer.secho(f"ERROR: {error_msg}", fg="red")
3043
+ log_error(error_msg, logger=logger)
2945
3044
  return False
2946
3045
 
2947
3046
 
2948
3047
  created_files = [f for f in possible_kpct_files if os.path.exists(f)]
2949
3048
  if logger:
2950
3049
  logger.info(f"KPCT successfully created output files: {created_files}")
2951
- typer.secho(f"OK: KPCT completed successfully. Created files: {[os.path.basename(f) for f in created_files]}", fg="green")
3050
+ conditional_output(
3051
+ f"OK: KPCT completed successfully. Created files: {[os.path.basename(f) for f in created_files]}",
3052
+ "green",
3053
+ verbose,
3054
+ )
2952
3055
 
2953
3056
  return True
2954
3057
 
2955
3058
  except Exception as e:
2956
3059
  error_msg = f"Error running KPCT: {str(e)}"
2957
- if logger:
2958
- logger.error(error_msg)
2959
- typer.secho(f"ERROR: {error_msg}", fg="red")
3060
+ log_error(error_msg, logger=logger)
2960
3061
  return False
2961
3062
 
2962
3063
 
2963
- def run_kpct_with_fallback(kpct_input_file: str, savedir: str, kpct_outprefix: str, ncpus: int, resource_log_file: str, logger: Optional[logging.Logger] = None) -> bool:
3064
+ def run_kpct_with_fallback(
3065
+ kpct_input_file: str,
3066
+ savedir: str,
3067
+ kpct_outprefix: str,
3068
+ ncpus: int,
3069
+ resource_log_file: str,
3070
+ logger: Optional[logging.Logger] = None,
3071
+ verbose: bool = True,
3072
+ ) -> bool:
2964
3073
  """
2965
3074
  Run KPCT with parallel processing and fallback to sequential if parallel fails.
2966
3075
 
@@ -2987,20 +3096,27 @@ def run_kpct_with_fallback(kpct_input_file: str, savedir: str, kpct_outprefix: s
2987
3096
  if logger:
2988
3097
  logger.info("Attempting parallel KPCT processing")
2989
3098
 
2990
- parallel_success = run_kpct_parallel(kpct_input_file, savedir, kpct_outprefix, ncpus, logger)
3099
+ parallel_success = run_kpct_parallel(
3100
+ kpct_input_file,
3101
+ savedir,
3102
+ kpct_outprefix,
3103
+ ncpus,
3104
+ logger,
3105
+ verbose=verbose,
3106
+ )
2991
3107
 
2992
3108
  if parallel_success:
2993
3109
  return True
2994
3110
  else:
2995
3111
  if logger:
2996
3112
  logger.warning("Parallel KPCT processing failed, falling back to sequential processing")
2997
- typer.secho("WARNING: Parallel processing failed, trying sequential approach", fg="yellow")
3113
+ log_warning("Parallel processing failed, trying sequential approach", logger=logger)
2998
3114
 
2999
3115
 
3000
3116
  if logger:
3001
3117
  logger.info("Running KPCT in sequential mode")
3002
3118
 
3003
- return run_kpct(kpct_input_file, savedir, kpct_outprefix, resource_log_file, logger)
3119
+ return run_kpct(kpct_input_file, savedir, kpct_outprefix, resource_log_file, logger, verbose=verbose)
3004
3120
 
3005
3121
 
3006
3122
  app = typer.Typer()
@@ -3012,7 +3128,12 @@ def pipeline(genomedir: str,
3012
3128
  adapt_headers: bool=False,
3013
3129
  del_tmp: bool=True,
3014
3130
  calculate_complementarity: int=0,
3015
- lowmem: bool = typer.Option(False, "--lowmem", help="Run emapper with reduced memory footprint, omitting --dbmem flag."),
3131
+ lowmem: bool = typer.Option(
3132
+ False,
3133
+ "--lowmem/--fullmem",
3134
+ "--low-mem/--full-mem",
3135
+ help="Run emapper with reduced memory footprint, omitting --dbmem flag.",
3136
+ ),
3016
3137
  verbose: bool = typer.Option(False, "--verbose", help="Enable verbose output with detailed progress information."),
3017
3138
  log_level: str = typer.Option("INFO", "--log-level", "-l", help="Logging level (DEBUG, INFO, WARNING, ERROR)."),
3018
3139
  eggnog_data_dir: Optional[str] = typer.Option(None, "--eggnog-data-dir", help="Path to eggNOG-mapper data directory (sets EGGNOG_DATA_DIR)."),
@@ -3066,6 +3187,7 @@ def pipeline(genomedir: str,
3066
3187
  # Setup logging first to capture everything
3067
3188
  log_dir = Path(savedir) / "logs"
3068
3189
  logger = configure_logging(log_level, log_dir)
3190
+ RESOURCE_SUMMARIES.clear()
3069
3191
  logger.info("Starting moducomp pipeline.")
3070
3192
  logger.info("Genome directory: %s", genomedir)
3071
3193
  logger.info("Output directory: %s", savedir)
@@ -3092,7 +3214,7 @@ def _run_pipeline_core(genomedir: str, savedir: str, ncpus: int, adapt_headers:
3092
3214
  start_time = time.time()
3093
3215
 
3094
3216
  greetings(verbose)
3095
- conditional_output("\nInitializing pipeline...", "green", verbose)
3217
+ conditional_output("Initializing pipeline...", "green", verbose)
3096
3218
 
3097
3219
  # Convert to absolute paths
3098
3220
  genomedir = os.path.abspath(genomedir)
@@ -3175,21 +3297,19 @@ def _run_pipeline_core(genomedir: str, savedir: str, ncpus: int, adapt_headers:
3175
3297
  logger.info("Starting genome merging")
3176
3298
  merge_success = merge_genomes(savedir, logger, verbose)
3177
3299
  if not merge_success:
3178
- logger.error("Failed to merge genomes. Exiting pipeline.")
3179
- typer.secho("ERROR: Failed to merge genomes. Exiting pipeline.", fg="red")
3300
+ log_error("Failed to merge genomes. Exiting pipeline.", logger=logger)
3180
3301
  return
3181
3302
 
3182
3303
  # Run eggNOG-mapper
3183
3304
  logger.info(f"Starting eMapper with {ncpus} CPUs")
3184
3305
  emapper_success = run_emapper(savedir, ncpus, resource_log_file, lowmem, logger, verbose)
3185
3306
  if not emapper_success:
3186
- logger.error("Failed to run emapper. Exiting pipeline.")
3187
- typer.secho("ERROR: Failed to run emapper. Exiting pipeline.", fg="red")
3307
+ log_error("Failed to run emapper. Exiting pipeline.", logger=logger)
3188
3308
  return
3189
3309
 
3190
3310
  # Create KO matrix from annotations
3191
3311
  logger.info(f"Creating KO matrix from eMapper annotations: {emapper_annotation_file}")
3192
- create_ko_matrix_from_emapper_annotation(emapper_annotation_file, ko_matrix_path, logger)
3312
+ create_ko_matrix_from_emapper_annotation(emapper_annotation_file, ko_matrix_path, logger, verbose)
3193
3313
  logger.info(f"KO matrix created: {ko_matrix_path}")
3194
3314
 
3195
3315
  # Process module completeness
@@ -3197,7 +3317,11 @@ def _run_pipeline_core(genomedir: str, savedir: str, ncpus: int, adapt_headers:
3197
3317
 
3198
3318
  if os.path.exists(module_completeness_file):
3199
3319
  logger.info(f"Module completeness matrix already exists: {module_completeness_file}")
3200
- typer.secho(f"OK: Using existing module completeness matrix: {module_completeness_file}", fg="white")
3320
+ conditional_output(
3321
+ f"OK: Using existing module completeness matrix: {module_completeness_file}",
3322
+ "white",
3323
+ verbose,
3324
+ )
3201
3325
  else:
3202
3326
  # Set up KPCT processing
3203
3327
  kpct_outprefix = "output_give_completeness"
@@ -3215,10 +3339,10 @@ def _run_pipeline_core(genomedir: str, savedir: str, ncpus: int, adapt_headers:
3215
3339
  # Convert KO matrix to KPCT format if needed
3216
3340
  if not os.path.exists(kpct_input_file):
3217
3341
  logger.info(f"Converting KO matrix to KPCT format: {ko_matrix_path}")
3218
- ko_matrix_to_kpct_format(ko_matrix_path, savedir, calculate_complementarity, logger)
3342
+ ko_matrix_to_kpct_format(ko_matrix_path, savedir, calculate_complementarity, logger, verbose)
3219
3343
  else:
3220
3344
  logger.info(f"KPCT input file already exists: {kpct_input_file}")
3221
- typer.secho(f"OK: Using existing KPCT input file: {kpct_input_file}", fg="white")
3345
+ conditional_output(f"OK: Using existing KPCT input file: {kpct_input_file}", "white", verbose)
3222
3346
 
3223
3347
  # Run KPCT if needed
3224
3348
  if not kpct_file_exists:
@@ -3228,16 +3352,28 @@ def _run_pipeline_core(genomedir: str, savedir: str, ncpus: int, adapt_headers:
3228
3352
 
3229
3353
  # Run KPCT with parallel processing
3230
3354
  logger.info(f"Running KPCT with parallel processing on file: {kpct_input_file}")
3231
- kpct_success = run_kpct_with_fallback(kpct_input_file, savedir, kpct_outprefix, ncpus, resource_log_file, logger)
3355
+ kpct_success = run_kpct_with_fallback(
3356
+ kpct_input_file,
3357
+ savedir,
3358
+ kpct_outprefix,
3359
+ ncpus,
3360
+ resource_log_file,
3361
+ logger,
3362
+ verbose=verbose,
3363
+ )
3232
3364
  if not kpct_success:
3233
3365
  return
3234
3366
  else:
3235
3367
  logger.info(f"KPCT output file(s) already exist with prefix '{kpct_outprefix}'")
3236
- typer.secho(f"OK: Using existing KPCT output files with prefix '{kpct_outprefix}'", fg="white")
3368
+ conditional_output(
3369
+ f"OK: Using existing KPCT output files with prefix '{kpct_outprefix}'",
3370
+ "white",
3371
+ verbose,
3372
+ )
3237
3373
 
3238
3374
  # Create module completeness matrix
3239
3375
  logger.info(f"Creating module completeness matrix")
3240
- create_module_completeness_matrix(savedir, kpct_outprefix, logger)
3376
+ create_module_completeness_matrix(savedir, kpct_outprefix, logger, verbose)
3241
3377
 
3242
3378
  # Generate complementarity reports if requested
3243
3379
  if calculate_complementarity >= 2:
@@ -3248,7 +3384,11 @@ def _run_pipeline_core(genomedir: str, savedir: str, ncpus: int, adapt_headers:
3248
3384
  complementarity_report_file = f"{savedir}/module_completeness_complementarity_{n_members}member.tsv"
3249
3385
  if os.path.exists(complementarity_report_file):
3250
3386
  logger.info(f"Complementarity report for {n_members}-member combinations already exists: {complementarity_report_file}")
3251
- typer.secho(f"OK: Using existing {n_members}-member complementarity report: {complementarity_report_file}", fg="white")
3387
+ conditional_output(
3388
+ f"OK: Using existing {n_members}-member complementarity report: {complementarity_report_file}",
3389
+ "white",
3390
+ verbose,
3391
+ )
3252
3392
  else:
3253
3393
  logger.info(f"Generating complementarity report for {n_members}-member combinations")
3254
3394
  generate_complementarity_report(savedir, n_members, logger, verbose)
@@ -3256,7 +3396,7 @@ def _run_pipeline_core(genomedir: str, savedir: str, ncpus: int, adapt_headers:
3256
3396
  # Clean up temporary files if requested
3257
3397
  if del_tmp:
3258
3398
  logger.info("Cleaning up temporary files")
3259
- remove_temp_files(savedir, logger)
3399
+ remove_temp_files(savedir, logger, verbose)
3260
3400
 
3261
3401
  # Generate final resource usage summary
3262
3402
  log_final_resource_summary(resource_log_file, start_time, logger, verbose)
@@ -3299,7 +3439,8 @@ def test(
3299
3439
  lowmem: bool = typer.Option(
3300
3440
  True,
3301
3441
  "--lowmem/--fullmem",
3302
- help="Run emapper with reduced memory footprint during the test.",
3442
+ "--low-mem/--full-mem",
3443
+ help="Run emapper with reduced memory footprint during the test (default: low-mem).",
3303
3444
  ),
3304
3445
  verbose: bool = typer.Option(
3305
3446
  True,
@@ -3331,6 +3472,7 @@ def test(
3331
3472
 
3332
3473
  log_dir = Path(savedir) / "logs"
3333
3474
  logger = configure_logging(log_level, log_dir)
3475
+ RESOURCE_SUMMARIES.clear()
3334
3476
  logger.info("Starting moducomp test run.")
3335
3477
  logger.info("Test genomes: %s", test_root)
3336
3478
  logger.info("CLI command: %s", " ".join(shlex.quote(arg) for arg in sys.argv))
@@ -3446,6 +3588,9 @@ def download_eggnog_data(
3446
3588
  stdout_thread.start()
3447
3589
  stderr_thread.start()
3448
3590
 
3591
+ output_level = logging.INFO if verbose else logging.DEBUG
3592
+ error_level = logging.WARNING if verbose else logging.DEBUG
3593
+
3449
3594
  while process.poll() is None or not stdout_queue.empty() or not stderr_queue.empty():
3450
3595
  now = time.time()
3451
3596
 
@@ -3454,9 +3599,8 @@ def download_eggnog_data(
3454
3599
  while True:
3455
3600
  stream_type, line = stdout_queue.get_nowait()
3456
3601
  if line:
3457
- if verbose:
3458
- print(line, flush=True)
3459
- logger.info(line)
3602
+ if logger:
3603
+ _log_lines(logger, line, output_level)
3460
3604
  except queue.Empty:
3461
3605
  pass
3462
3606
 
@@ -3465,9 +3609,8 @@ def download_eggnog_data(
3465
3609
  while True:
3466
3610
  stream_type, line = stderr_queue.get_nowait()
3467
3611
  if line:
3468
- if verbose:
3469
- print(line, file=sys.stderr, flush=True)
3470
- logger.warning(line)
3612
+ if logger:
3613
+ _log_lines(logger, line, error_level)
3471
3614
  except queue.Empty:
3472
3615
  pass
3473
3616
 
@@ -3506,8 +3649,6 @@ def download_eggnog_data(
3506
3649
  f"across {total_files} files"
3507
3650
  )
3508
3651
  logger.info(summary)
3509
- if verbose:
3510
- typer.secho(summary, fg="green")
3511
3652
 
3512
3653
  if returncode != 0:
3513
3654
  raise typer.Exit(returncode)
@@ -3578,6 +3719,7 @@ def analyze_ko_matrix(
3578
3719
 
3579
3720
  log_dir = Path(savedir) / "logs"
3580
3721
  logger = configure_logging(log_level, log_dir)
3722
+ RESOURCE_SUMMARIES.clear()
3581
3723
 
3582
3724
  # Setup resource monitoring
3583
3725
  resource_log_file = setup_resource_logging(log_dir)
@@ -3589,11 +3731,11 @@ def analyze_ko_matrix(
3589
3731
  logger.info("CLI command: %s", " ".join(shlex.quote(arg) for arg in sys.argv))
3590
3732
 
3591
3733
  greetings(verbose)
3592
- conditional_output("\nInitializing KO matrix analysis...", "green", verbose)
3734
+ conditional_output("Initializing KO matrix analysis...", "green", verbose)
3593
3735
 
3594
3736
 
3595
3737
  if not os.path.exists(kos_matrix):
3596
- typer.secho(f"ERROR: KO matrix file not found at: {kos_matrix}", fg="red")
3738
+ log_error(f"KO matrix file not found at: {kos_matrix}", logger=logger)
3597
3739
  exit(1)
3598
3740
 
3599
3741
 
@@ -3607,7 +3749,7 @@ def analyze_ko_matrix(
3607
3749
 
3608
3750
 
3609
3751
  if check_final_reports_exist(savedir, calculate_complementarity, logger):
3610
- typer.secho("OK: All output files already exist. Skipping processing.", fg="green")
3752
+ conditional_output("OK: All output files already exist. Skipping processing.", "green", verbose)
3611
3753
  logger.info("Analysis skipped as all output files already exist")
3612
3754
  return
3613
3755
 
@@ -3642,10 +3784,10 @@ def analyze_ko_matrix(
3642
3784
 
3643
3785
  if not os.path.exists(kpct_input_file):
3644
3786
  logger.info(f"Converting KO matrix to KPCT format: {kos_matrix}")
3645
- ko_matrix_to_kpct_format(kos_matrix, savedir, calculate_complementarity, logger)
3787
+ ko_matrix_to_kpct_format(kos_matrix, savedir, calculate_complementarity, logger, verbose)
3646
3788
  else:
3647
3789
  logger.info(f"KPCT input file already exists: {kpct_input_file}")
3648
- typer.secho(f"OK: Using existing KPCT input file: {kpct_input_file}", fg="white")
3790
+ conditional_output(f"OK: Using existing KPCT input file: {kpct_input_file}", "white", verbose)
3649
3791
 
3650
3792
 
3651
3793
  if not kpct_file_exists:
@@ -3655,22 +3797,38 @@ def analyze_ko_matrix(
3655
3797
 
3656
3798
 
3657
3799
  logger.info(f"Running KPCT with parallel processing on file: {kpct_input_file}")
3658
- kpct_success = run_kpct_with_fallback(kpct_input_file, savedir, kpct_outprefix, ncpus, resource_log_file, logger)
3800
+ kpct_success = run_kpct_with_fallback(
3801
+ kpct_input_file,
3802
+ savedir,
3803
+ kpct_outprefix,
3804
+ ncpus,
3805
+ resource_log_file,
3806
+ logger,
3807
+ verbose=verbose,
3808
+ )
3659
3809
  if not kpct_success:
3660
3810
  exit(1)
3661
3811
  else:
3662
3812
  logger.info(f"KPCT output file(s) already exist with prefix '{kpct_outprefix}'")
3663
- typer.secho(f"OK: Using existing KPCT output files with prefix '{kpct_outprefix}'", fg="white")
3813
+ conditional_output(
3814
+ f"OK: Using existing KPCT output files with prefix '{kpct_outprefix}'",
3815
+ "white",
3816
+ verbose,
3817
+ )
3664
3818
 
3665
3819
 
3666
3820
  if not os.path.exists(module_completeness_file):
3667
3821
  if logger:
3668
3822
  logger.info(f"Creating module completeness matrix")
3669
- create_module_completeness_matrix(savedir, kpct_outprefix, logger)
3823
+ create_module_completeness_matrix(savedir, kpct_outprefix, logger, verbose)
3670
3824
  else:
3671
3825
  if logger:
3672
3826
  logger.info(f"Module completeness matrix already exists: {module_completeness_file}")
3673
- typer.secho(f"OK: Using existing module completeness matrix: {module_completeness_file}", fg="white")
3827
+ conditional_output(
3828
+ f"OK: Using existing module completeness matrix: {module_completeness_file}",
3829
+ "white",
3830
+ verbose,
3831
+ )
3674
3832
 
3675
3833
 
3676
3834
  if calculate_complementarity >= 2:
@@ -3682,7 +3840,11 @@ def analyze_ko_matrix(
3682
3840
  complementarity_report_file = f"{savedir}/module_completeness_complementarity_{n_members}member.tsv"
3683
3841
  if os.path.exists(complementarity_report_file):
3684
3842
  logger.info(f"Complementarity report for {n_members}-member combinations already exists: {complementarity_report_file}")
3685
- typer.secho(f"OK: Using existing {n_members}-member complementarity report: {complementarity_report_file}", fg="white")
3843
+ conditional_output(
3844
+ f"OK: Using existing {n_members}-member complementarity report: {complementarity_report_file}",
3845
+ "white",
3846
+ verbose,
3847
+ )
3686
3848
  else:
3687
3849
  logger.info(f"Generating complementarity report for {n_members}-member combinations")
3688
3850
  generate_complementarity_report(savedir, n_members, logger, verbose)
@@ -3691,7 +3853,7 @@ def analyze_ko_matrix(
3691
3853
  if del_tmp:
3692
3854
  if logger:
3693
3855
  logger.info("Cleaning up temporary files")
3694
- remove_temp_files(savedir, logger)
3856
+ remove_temp_files(savedir, logger, verbose)
3695
3857
 
3696
3858
  # Generate final resource usage summary
3697
3859
  log_final_resource_summary(resource_log_file, start_time, logger, verbose)
@@ -3702,7 +3864,8 @@ def analyze_ko_matrix(
3702
3864
  except Exception as e:
3703
3865
  if logger:
3704
3866
  logger.error(f"Error in KPCT analysis: {str(e)}", exc_info=True)
3705
- typer.secho(f"ERROR: Error in KPCT analysis: {str(e)}", fg="red")
3867
+ else:
3868
+ log_error(f"Error in KPCT analysis: {str(e)}", logger=logger)
3706
3869
  exit(1)
3707
3870
 
3708
3871