moducomp 0.7.3__tar.gz → 0.7.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: moducomp
3
- Version: 0.7.3
3
+ Version: 0.7.4
4
4
  Summary: moducomp: metabolic module completeness and complementarity for microbiomes.
5
5
  Keywords: bioinformatics,microbiome,metabolic,kegg,genomics
6
6
  Author-email: "Juan C. Villada" <jvillada@lbl.gov>
@@ -74,7 +74,7 @@ If `EGGNOG_DATA_DIR` is not set, `moducomp download-eggnog-data` defaults to `${
74
74
  Small test data sets ship with `moducomp`. After installation you can confirm the pipeline by running:
75
75
 
76
76
  ```bash
77
- moducomp test --ncpus 2 --eggnog-data-dir "$EGGNOG_DATA_DIR"
77
+ moducomp test --ncpus 16 --eggnog-data-dir "$EGGNOG_DATA_DIR"
78
78
  ```
79
79
 
80
80
  ### Developer install (Pixi)
@@ -49,7 +49,7 @@ If `EGGNOG_DATA_DIR` is not set, `moducomp download-eggnog-data` defaults to `${
49
49
  Small test data sets ship with `moducomp`. After installation you can confirm the pipeline by running:
50
50
 
51
51
  ```bash
52
- moducomp test --ncpus 2 --eggnog-data-dir "$EGGNOG_DATA_DIR"
52
+ moducomp test --ncpus 16 --eggnog-data-dir "$EGGNOG_DATA_DIR"
53
53
  ```
54
54
 
55
55
  ### Developer install (Pixi)
@@ -2,7 +2,7 @@
2
2
  moducomp: metabolic module completeness and complementarity for microbiomes.
3
3
  """
4
4
 
5
- __version__ = "0.7.3"
5
+ __version__ = "0.7.4"
6
6
  __author__ = "Juan C. Villada"
7
7
  __email__ = "jvillada@lbl.gov"
8
8
  __title__ = "moducomp"
@@ -141,8 +141,39 @@ def emit_error(message: str, logger: Optional[logging.Logger] = None) -> None:
141
141
  """Log and emit an error to both stdout and stderr."""
142
142
  if logger:
143
143
  logger.error(message)
144
- typer.secho(f"❌ [ERROR] {message}", fg="red", err=True)
145
- typer.secho(f"❌ [ERROR] {message}", fg="red")
144
+ typer.secho(f"ERROR: {message}", fg="red", err=True)
145
+ typer.secho(f"ERROR: {message}", fg="red")
146
+
147
+
148
+ def format_bytes(num_bytes: float) -> str:
149
+ """Format bytes as a human-readable string."""
150
+ units = ["B", "KB", "MB", "GB", "TB", "PB"]
151
+ value = float(num_bytes)
152
+ for unit in units:
153
+ if value < 1024 or unit == units[-1]:
154
+ return f"{value:.1f} {unit}"
155
+ value /= 1024
156
+ return f"{value:.1f} PB"
157
+
158
+
159
+ def get_dir_size(path: Path) -> int:
160
+ """Return total size of files under path."""
161
+ total = 0
162
+ for root, _, files in os.walk(path):
163
+ for name in files:
164
+ try:
165
+ total += (Path(root) / name).stat().st_size
166
+ except OSError:
167
+ continue
168
+ return total
169
+
170
+
171
+ def count_files(path: Path) -> int:
172
+ """Return number of files under path."""
173
+ total = 0
174
+ for _, _, files in os.walk(path):
175
+ total += len(files)
176
+ return total
146
177
 
147
178
 
148
179
  def default_eggnog_data_dir() -> Path:
@@ -329,7 +360,7 @@ def run_subprocess_with_logging(
329
360
  error_msg = f"Exception running command {' '.join(cmd)}: {str(e)}"
330
361
  if logger:
331
362
  logger.error(error_msg)
332
- print(f"❌ [ERROR] {error_msg}", file=sys.stderr)
363
+ print(f"ERROR: {error_msg}", file=sys.stderr)
333
364
  return -1, "", str(e)
334
365
 
335
366
 
@@ -876,10 +907,10 @@ def how_many_genomes(genomedir: str, verbose: bool = True):
876
907
  """
877
908
  n_files = len(get_path_to_each_genome(genomedir))
878
909
  if n_files > 0:
879
- conditional_output(f"✅ [OK] {n_files} faa files were found in '{genomedir}'\n", "white", verbose)
910
+ conditional_output(f"OK: {n_files} faa files were found in '{genomedir}'\n", "white", verbose)
880
911
  else:
881
912
  # Always show errors regardless of verbose setting
882
- typer.secho(f"❌ [ERROR] No FAA files were found in '{genomedir}'\n", fg="red")
913
+ typer.secho(f"ERROR: No FAA files were found in '{genomedir}'\n", fg="red")
883
914
  exit()
884
915
 
885
916
 
@@ -896,10 +927,10 @@ def create_output_dir(savedir: str, verbose: bool = True):
896
927
  """
897
928
  conditional_output("\nCreating output directory", "green", verbose)
898
929
  if os.path.exists(savedir):
899
- conditional_output(f"✅ [OK] Output directory already exists at: {savedir}\n", "white", verbose)
930
+ conditional_output(f"OK: Output directory already exists at: {savedir}\n", "white", verbose)
900
931
  else:
901
932
  os.makedirs(savedir, exist_ok=True)
902
- conditional_output(f"✅ [OK] Output directory created at: {savedir}\n", "white", verbose)
933
+ conditional_output(f"OK: Output directory created at: {savedir}\n", "white", verbose)
903
934
 
904
935
 
905
936
  def get_tmp_dir(savedir:str) -> str:
@@ -934,10 +965,10 @@ def create_tmp_dir(savedir: str, verbose: bool = True):
934
965
  conditional_output("\nCreating tmp dir", "green", verbose)
935
966
  tmp_dir_path = get_tmp_dir(savedir)
936
967
  if (os.path.exists(tmp_dir_path)):
937
- conditional_output(f"✅ [OK] Tmp directory already exists at: {tmp_dir_path}\n", "white", verbose)
968
+ conditional_output(f"OK: Tmp directory already exists at: {tmp_dir_path}\n", "white", verbose)
938
969
  else:
939
970
  os.mkdir(tmp_dir_path)
940
- conditional_output(f"✅ [OK] Tmp directory created at: {tmp_dir_path}\n", "white", verbose)
971
+ conditional_output(f"OK: Tmp directory created at: {tmp_dir_path}\n", "white", verbose)
941
972
 
942
973
 
943
974
  def adapt_fasta_headers(genomedir: str, savedir: str, verbose: bool = True) -> None:
@@ -962,7 +993,7 @@ def adapt_fasta_headers(genomedir: str, savedir: str, verbose: bool = True) -> N
962
993
  path_to_each_genome = get_path_to_each_genome(genomedir)
963
994
  output_dir = f"{get_tmp_dir(savedir)}/faa"
964
995
  if os.path.exists(output_dir):
965
- conditional_output(f"✅ [OK] Fasta headers already modified at: {output_dir}\n", "white", verbose)
996
+ conditional_output(f"OK: Fasta headers already modified at: {output_dir}\n", "white", verbose)
966
997
  return
967
998
 
968
999
  os.mkdir(output_dir)
@@ -978,7 +1009,7 @@ def adapt_fasta_headers(genomedir: str, savedir: str, verbose: bool = True) -> N
978
1009
  i+=1
979
1010
  else:
980
1011
  outfile.write(line)
981
- conditional_output(f"✅ [OK] Fasta headers modified at: {output_dir}\n", "white", verbose)
1012
+ conditional_output(f"OK: Fasta headers modified at: {output_dir}\n", "white", verbose)
982
1013
 
983
1014
 
984
1015
  def copy_faa_to_tmp(genomedir: str, savedir: str, verbose: bool = True) -> None:
@@ -1002,14 +1033,14 @@ def copy_faa_to_tmp(genomedir: str, savedir: str, verbose: bool = True) -> None:
1002
1033
  path_to_each_genome = get_path_to_each_genome(genomedir)
1003
1034
  output_dir = f"{get_tmp_dir(savedir)}/faa"
1004
1035
  if os.path.exists(output_dir):
1005
- conditional_output(f"✅ [OK] Fasta files already exist at: {output_dir}\n", "white", verbose)
1036
+ conditional_output(f"OK: Fasta files already exist at: {output_dir}\n", "white", verbose)
1006
1037
  return
1007
1038
 
1008
1039
  os.mkdir(output_dir)
1009
1040
  conditional_output("Copying genome files to temporary directory...", "yellow", verbose)
1010
1041
  for each_file in path_to_each_genome:
1011
1042
  shutil.copy(each_file, output_dir)
1012
- conditional_output(f"✅ [OK] Fasta files copied to: {output_dir}\n", "white", verbose)
1043
+ conditional_output(f"OK: Fasta files copied to: {output_dir}\n", "white", verbose)
1013
1044
 
1014
1045
 
1015
1046
  def merge_genomes(savedir: str, logger: Optional[logging.Logger] = None, verbose: bool = True) -> bool:
@@ -1036,7 +1067,7 @@ def merge_genomes(savedir: str, logger: Optional[logging.Logger] = None, verbose
1036
1067
 
1037
1068
 
1038
1069
  if os.path.exists(output_file):
1039
- conditional_output(f"✅ [OK] Merged genomes file already exists at: {output_file}\n", "white", verbose)
1070
+ conditional_output(f"OK: Merged genomes file already exists at: {output_file}\n", "white", verbose)
1040
1071
  if logger:
1041
1072
  logger.info(f"Using existing merged genomes file: {output_file}")
1042
1073
  return True
@@ -1045,7 +1076,7 @@ def merge_genomes(savedir: str, logger: Optional[logging.Logger] = None, verbose
1045
1076
  if not genome_file_paths:
1046
1077
  error_msg = f"No FAA files found in {get_tmp_dir(savedir)}/faa/"
1047
1078
  # Always show errors regardless of verbose setting
1048
- typer.secho(f"❌ [ERROR] {error_msg}", fg="red")
1079
+ typer.secho(f"ERROR: {error_msg}", fg="red")
1049
1080
  if logger:
1050
1081
  logger.error(error_msg)
1051
1082
  return False
@@ -1057,14 +1088,14 @@ def merge_genomes(savedir: str, logger: Optional[logging.Logger] = None, verbose
1057
1088
  with open(each_file) as infile:
1058
1089
  for line in infile:
1059
1090
  outfile.write(line)
1060
- conditional_output(f"✅ [OK] Fasta files merged at: {output_file}\n", "white", verbose)
1091
+ conditional_output(f"OK: Fasta files merged at: {output_file}\n", "white", verbose)
1061
1092
  if logger:
1062
1093
  logger.info(f"Successfully created merged genome file: {output_file}")
1063
1094
  return True
1064
1095
  except Exception as e:
1065
1096
  error_msg = f"Error merging genome files: {str(e)}"
1066
1097
  # Always show errors regardless of verbose setting
1067
- typer.secho(f"❌ [ERROR] {error_msg}", fg="red")
1098
+ typer.secho(f"ERROR: {error_msg}", fg="red")
1068
1099
  if logger:
1069
1100
  logger.error(error_msg)
1070
1101
  return False
@@ -1090,12 +1121,12 @@ def run_emapper(savedir: str, ncpus: int, resource_log_file: str, lowmem: bool =
1090
1121
  bool
1091
1122
  True if emapper ran successfully or outputs already exist, False otherwise
1092
1123
  """
1093
- typer.secho("\nRunning emapper", fg="green")
1124
+ conditional_output("\nStarting eggNOG-mapper", "green", verbose)
1094
1125
 
1095
1126
 
1096
1127
  final_emapper_annotation_file = f"{savedir}/emapper_out.emapper.annotations"
1097
1128
  if os.path.exists(final_emapper_annotation_file):
1098
- typer.secho(f"✅ [OK] Emapper annotations already exist at: {final_emapper_annotation_file}\n", fg="white")
1129
+ typer.secho(f"OK: Emapper annotations already exist at: {final_emapper_annotation_file}\n", fg="white")
1099
1130
  if logger:
1100
1131
  logger.info(f"Using existing emapper annotations: {final_emapper_annotation_file}")
1101
1132
  return True
@@ -1108,14 +1139,14 @@ def run_emapper(savedir: str, ncpus: int, resource_log_file: str, lowmem: bool =
1108
1139
 
1109
1140
  if not os.path.exists(merged_genomes_file):
1110
1141
  error_msg = f"Merged genomes file not found at: {merged_genomes_file}"
1111
- typer.secho(f"❌ [ERROR] {error_msg}", fg="red")
1142
+ typer.secho(f"ERROR: {error_msg}", fg="red")
1112
1143
  if logger:
1113
1144
  logger.error(error_msg)
1114
1145
  return False
1115
1146
 
1116
1147
 
1117
1148
  if os.path.exists(emapper_tmp_file):
1118
- typer.secho(f"✅ [OK] Emapper output already exists at: {emapper_tmp_file}\n", fg="white")
1149
+ typer.secho(f"OK: Emapper output already exists at: {emapper_tmp_file}\n", fg="white")
1119
1150
  if logger:
1120
1151
  logger.info(f"Using existing emapper output from temporary directory: {emapper_tmp_file}")
1121
1152
 
@@ -1150,7 +1181,7 @@ def run_emapper(savedir: str, ncpus: int, resource_log_file: str, lowmem: bool =
1150
1181
  cmd_emapper,
1151
1182
  resource_log_file,
1152
1183
  logger,
1153
- "Running eggNOG-mapper",
1184
+ "eggNOG-mapper",
1154
1185
  verbose
1155
1186
  )
1156
1187
 
@@ -1158,7 +1189,7 @@ def run_emapper(savedir: str, ncpus: int, resource_log_file: str, lowmem: bool =
1158
1189
  error_msg = f"emapper failed with return code {returncode}"
1159
1190
  if stderr:
1160
1191
  error_msg += f": {stderr}"
1161
- typer.secho(f"❌ [ERROR] {error_msg}", fg="red")
1192
+ typer.secho(f"ERROR: {error_msg}", fg="red")
1162
1193
  if logger:
1163
1194
  logger.error(error_msg)
1164
1195
  return False
@@ -1170,7 +1201,7 @@ def run_emapper(savedir: str, ncpus: int, resource_log_file: str, lowmem: bool =
1170
1201
 
1171
1202
  if not os.path.exists(emapper_tmp_file):
1172
1203
  error_msg = f"emapper did not generate expected output: {emapper_tmp_file}"
1173
- typer.secho(f"❌ [ERROR] {error_msg}", fg="red")
1204
+ typer.secho(f"ERROR: {error_msg}", fg="red")
1174
1205
  if logger:
1175
1206
  logger.error(error_msg)
1176
1207
  return False
@@ -1178,15 +1209,15 @@ def run_emapper(savedir: str, ncpus: int, resource_log_file: str, lowmem: bool =
1178
1209
 
1179
1210
  shutil.copy(emapper_tmp_file, final_emapper_annotation_file)
1180
1211
 
1181
- typer.secho(f"✅ [OK] emapper output saved at: {output_folder_emapper}\n", fg="white")
1182
- typer.secho(f"✅ [OK] emapper annotations copied to: {final_emapper_annotation_file}\n", fg="white")
1212
+ typer.secho(f"OK: emapper output saved at: {output_folder_emapper}\n", fg="white")
1213
+ typer.secho(f"OK: emapper annotations copied to: {final_emapper_annotation_file}\n", fg="white")
1183
1214
  if logger:
1184
1215
  logger.info(f"Successfully ran emapper and saved annotations to: {final_emapper_annotation_file}")
1185
1216
  return True
1186
1217
 
1187
1218
  except Exception as e:
1188
1219
  error_msg = f"Error running emapper: {str(e)}"
1189
- typer.secho(f"❌ [ERROR] {error_msg}", fg="red")
1220
+ typer.secho(f"ERROR: {error_msg}", fg="red")
1190
1221
  if logger:
1191
1222
  logger.error(error_msg)
1192
1223
  return False
@@ -1207,11 +1238,11 @@ def remove_temp_files(savedir: str, logger: Optional[logging.Logger] = None) ->
1207
1238
  if os.path.exists(tmp_dir):
1208
1239
  try:
1209
1240
  shutil.rmtree(tmp_dir)
1210
- typer.secho(f"✅ [OK] Temporary files removed from: {tmp_dir}", fg="white")
1241
+ typer.secho(f"OK: Temporary files removed from: {tmp_dir}", fg="white")
1211
1242
  if logger:
1212
1243
  logger.info(f"Removed temporary directory: {tmp_dir}")
1213
1244
  except Exception as e:
1214
- typer.secho(f"⚠️ [WARNING] Failed to remove temporary files: {str(e)}", fg="yellow")
1245
+ typer.secho(f"WARNING: Failed to remove temporary files: {str(e)}", fg="yellow")
1215
1246
  if logger:
1216
1247
  logger.warning(f"Failed to remove temporary directory {tmp_dir}: {str(e)}")
1217
1248
 
@@ -1318,7 +1349,7 @@ def generate_complementarity_report(
1318
1349
  if os.path.exists(output_file):
1319
1350
  if logger:
1320
1351
  logger.info(f"Complementarity report already exists at {output_file}")
1321
- conditional_output(f"✅ [OK] Complementarity report already exists at: {output_file}", "white", verbose)
1352
+ conditional_output(f"OK: Complementarity report already exists at: {output_file}", "white", verbose)
1322
1353
  return
1323
1354
 
1324
1355
 
@@ -1327,7 +1358,7 @@ def generate_complementarity_report(
1327
1358
  error_msg = f"Module completeness matrix not found at: {module_matrix_file}"
1328
1359
  if logger:
1329
1360
  logger.error(error_msg)
1330
- typer.secho(f"❌ [ERROR] {error_msg}", fg="red")
1361
+ typer.secho(f"ERROR: {error_msg}", fg="red")
1331
1362
  return
1332
1363
 
1333
1364
 
@@ -1344,13 +1375,13 @@ def generate_complementarity_report(
1344
1375
  emapper_file = possible_file
1345
1376
  if logger:
1346
1377
  logger.info(f"Found emapper annotation file at: {emapper_file}")
1347
- typer.secho(f"✅ [OK] Using emapper annotations from: {emapper_file}", fg="white")
1378
+ typer.secho(f"OK: Using emapper annotations from: {emapper_file}", fg="white")
1348
1379
  break
1349
1380
 
1350
1381
  if not emapper_file:
1351
1382
  if logger:
1352
1383
  logger.warning(f"Emapper annotation file not found in any of the expected locations. Will use placeholder protein IDs.")
1353
- typer.secho(f"⚠️ [WARNING] Emapper annotation file not found. Will use placeholder protein IDs.", fg="yellow")
1384
+ typer.secho(f"WARNING: Emapper annotation file not found. Will use placeholder protein IDs.", fg="yellow")
1354
1385
 
1355
1386
 
1356
1387
  kpct_output_file = None
@@ -1370,7 +1401,7 @@ def generate_complementarity_report(
1370
1401
  error_msg = "KPCT output file not found. Cannot extract module metadata."
1371
1402
  if logger:
1372
1403
  logger.error(error_msg)
1373
- typer.secho(f"❌ [ERROR] {error_msg}", fg="red")
1404
+ typer.secho(f"ERROR: {error_msg}", fg="red")
1374
1405
  return
1375
1406
 
1376
1407
  try:
@@ -1452,7 +1483,7 @@ def generate_complementarity_report(
1452
1483
  if module_id_col: logger.error(f"Found module_id_col: {module_id_col}")
1453
1484
  if module_name_col: logger.error(f"Found module_name_col: {module_name_col}")
1454
1485
  logger.error(f"Available columns: {kpct_df.columns.tolist()}")
1455
- typer.secho(f"❌ [ERROR] {error_msg}", fg="red")
1486
+ typer.secho(f"ERROR: {error_msg}", fg="red")
1456
1487
  return
1457
1488
 
1458
1489
  if not contig_col or not matching_ko_col:
@@ -1460,7 +1491,7 @@ def generate_complementarity_report(
1460
1491
  logger.warning(f"Cannot identify contig or matching_ko columns in KPCT output.")
1461
1492
  logger.warning(f"Found contig_col: {contig_col}, matching_ko_col: {matching_ko_col}")
1462
1493
  logger.warning(f"Available columns: {kpct_df.columns.tolist()}")
1463
- typer.secho(f"⚠️ [WARNING] Missing columns in KPCT output may affect mapping of KOs to combinations.", fg="yellow")
1494
+ typer.secho(f"WARNING: Missing columns in KPCT output may affect mapping of KOs to combinations.", fg="yellow")
1464
1495
 
1465
1496
 
1466
1497
  module_metadata = {}
@@ -1726,7 +1757,7 @@ def generate_complementarity_report(
1726
1757
  logger.info(f"Found {len(report_df)} complementary modules in {n_members}-member combinations")
1727
1758
  logger.info(f"Complementarity report saved to: {output_file}")
1728
1759
 
1729
- conditional_output(f"✅ [OK] Found {len(report_df)} complementary modules in {n_members}-member combinations", "green", verbose)
1760
+ conditional_output(f"OK: Found {len(report_df)} complementary modules in {n_members}-member combinations", "green", verbose)
1730
1761
  conditional_output(f"Complementarity report saved to: {output_file}", "white", verbose)
1731
1762
  else:
1732
1763
  if logger:
@@ -1746,14 +1777,14 @@ def generate_complementarity_report(
1746
1777
  report_df = pd.DataFrame(columns=columns)
1747
1778
  report_df.to_csv(output_file, sep='\t', index=False)
1748
1779
 
1749
- typer.secho(f"⚠️ [WARNING] No complementary modules found in {n_members}-member combinations", fg="yellow")
1780
+ typer.secho(f"WARNING: No complementary modules found in {n_members}-member combinations", fg="yellow")
1750
1781
  typer.secho(f"Empty report saved to: {output_file}", fg="white")
1751
1782
 
1752
1783
  except Exception as e:
1753
1784
  error_msg = f"Error generating complementarity report: {str(e)}"
1754
1785
  if logger:
1755
1786
  logger.error(error_msg, exc_info=True)
1756
- typer.secho(f"❌ [ERROR] {error_msg}", fg="red")
1787
+ typer.secho(f"ERROR: {error_msg}", fg="red")
1757
1788
 
1758
1789
 
1759
1790
  def ko_matrix_to_kpct_format(kos_matrix: str, savedir: str, calculate_complementarity: int = 0, logger: Optional[logging.Logger] = None) -> str:
@@ -1840,7 +1871,7 @@ def ko_matrix_to_kpct_format(kos_matrix: str, savedir: str, calculate_complement
1840
1871
  msg = "Invalid KO matrix format: missing 'taxon_oid' column"
1841
1872
  if logger:
1842
1873
  logger.error(msg)
1843
- typer.secho(f"❌ [ERROR] {msg}", fg="red")
1874
+ typer.secho(f"ERROR: {msg}", fg="red")
1844
1875
  exit(1)
1845
1876
 
1846
1877
 
@@ -1885,14 +1916,14 @@ def ko_matrix_to_kpct_format(kos_matrix: str, savedir: str, calculate_complement
1885
1916
 
1886
1917
  if logger:
1887
1918
  logger.info(f"KO matrix converted to KPCT format: {output_path}")
1888
- typer.secho(f"✅ [OK] KO matrix converted to KPCT format: {output_path}", fg="white")
1919
+ typer.secho(f"OK: KO matrix converted to KPCT format: {output_path}", fg="white")
1889
1920
  return output_path
1890
1921
 
1891
1922
  except Exception as e:
1892
1923
  error_msg = f"Error converting KO matrix to KPCT format: {str(e)}"
1893
1924
  if logger:
1894
1925
  logger.error(error_msg)
1895
- typer.secho(f"❌ [ERROR] {error_msg}", fg="red")
1926
+ typer.secho(f"ERROR: {error_msg}", fg="red")
1896
1927
  raise
1897
1928
 
1898
1929
 
@@ -1982,7 +2013,7 @@ def create_module_completeness_matrix(savedir: str, kpct_outprefix: str, logger:
1982
2013
  error_msg = f"KPCT output file not found: tried {kpct_outprefix}_contigs.with_weights.tsv and alternatives"
1983
2014
  if logger:
1984
2015
  logger.error(error_msg)
1985
- typer.secho(f"❌ [ERROR] {error_msg}", fg="red")
2016
+ typer.secho(f"ERROR: {error_msg}", fg="red")
1986
2017
  return
1987
2018
 
1988
2019
  try:
@@ -2082,7 +2113,7 @@ def create_module_completeness_matrix(savedir: str, kpct_outprefix: str, logger:
2082
2113
  error_msg = "Could not identify module columns in the KPCT output"
2083
2114
  if logger:
2084
2115
  logger.error(error_msg)
2085
- typer.secho(f"❌ [ERROR] {error_msg}", fg="red")
2116
+ typer.secho(f"ERROR: {error_msg}", fg="red")
2086
2117
  return
2087
2118
 
2088
2119
  # Build the result data
@@ -2125,13 +2156,13 @@ def create_module_completeness_matrix(savedir: str, kpct_outprefix: str, logger:
2125
2156
  logger.info(f"Matrix contains {single_genomes} single genomes out of {total_genomes} total entries")
2126
2157
  if all_genomes:
2127
2158
  logger.info(f"Expected {len(all_genomes)} single genomes from KPCT input")
2128
- typer.secho(f"✅ [OK] Module completeness matrix saved to: {output_file}", fg="white")
2159
+ typer.secho(f"OK: Module completeness matrix saved to: {output_file}", fg="white")
2129
2160
 
2130
2161
  except Exception as e:
2131
2162
  error_msg = f"Error creating module completeness matrix: {str(e)}"
2132
2163
  if logger:
2133
2164
  logger.error(error_msg)
2134
- typer.secho(f"❌ [ERROR] {error_msg}", fg="red")
2165
+ typer.secho(f"ERROR: {error_msg}", fg="red")
2135
2166
 
2136
2167
  if logger:
2137
2168
  logger.error(f"Error details: {e}", exc_info=True)
@@ -2180,7 +2211,7 @@ def create_ko_matrix_from_emapper_annotation(emapper_file_path: str, output_file
2180
2211
 
2181
2212
 
2182
2213
  if os.path.exists(output_file_path):
2183
- typer.secho(f"✅ [OK] KO matrix already exists at: {output_file_path}", fg="white")
2214
+ typer.secho(f"OK: KO matrix already exists at: {output_file_path}", fg="white")
2184
2215
  if logger:
2185
2216
  logger.info(f"KO matrix already exists at: {output_file_path}")
2186
2217
  return
@@ -2189,7 +2220,7 @@ def create_ko_matrix_from_emapper_annotation(emapper_file_path: str, output_file
2189
2220
  error_msg = f"eMapper annotation file not found at {emapper_file_path}. Cannot proceed."
2190
2221
  if logger:
2191
2222
  logger.error(error_msg)
2192
- typer.secho(f"❌ [ERROR] {error_msg}", fg="red")
2223
+ typer.secho(f"ERROR: {error_msg}", fg="red")
2193
2224
  exit(1)
2194
2225
 
2195
2226
  try:
@@ -2255,7 +2286,7 @@ def create_ko_matrix_from_emapper_annotation(emapper_file_path: str, output_file
2255
2286
  error_msg = "No KO data found in the eMapper annotations file"
2256
2287
  if logger:
2257
2288
  logger.error(error_msg)
2258
- typer.secho(f"❌ [ERROR] {error_msg}", fg="red")
2289
+ typer.secho(f"ERROR: {error_msg}", fg="red")
2259
2290
  return
2260
2291
 
2261
2292
 
@@ -2286,13 +2317,13 @@ def create_ko_matrix_from_emapper_annotation(emapper_file_path: str, output_file
2286
2317
  logger.info(f"Created KO matrix with {len(kos_count_df)} genomes and {len(kos_count_df.columns)-1} KOs")
2287
2318
  logger.info(f"KO matrix saved to: {output_file_path}")
2288
2319
 
2289
- typer.secho(f"✅ [OK] KO matrix created and saved to: {output_file_path}", fg="white")
2320
+ typer.secho(f"OK: KO matrix created and saved to: {output_file_path}", fg="white")
2290
2321
 
2291
2322
  except Exception as e:
2292
2323
  error_msg = f"Error creating KO matrix: {str(e)}"
2293
2324
  if logger:
2294
2325
  logger.error(error_msg, exc_info=True)
2295
- typer.secho(f"❌ [ERROR] {error_msg}", fg="red")
2326
+ typer.secho(f"ERROR: {error_msg}", fg="red")
2296
2327
  exit(1)
2297
2328
 
2298
2329
 
@@ -2322,14 +2353,14 @@ def check_kpct_installed(logger: Optional[logging.Logger] = None) -> bool:
2322
2353
  error_msg = "KPCT 'give_completeness' tool not found in PATH. Please install it via pip: pip install kegg-pathways-completeness"
2323
2354
  if logger:
2324
2355
  logger.error(error_msg)
2325
- typer.secho(f"❌ [ERROR] {error_msg}", fg="red")
2356
+ typer.secho(f"ERROR: {error_msg}", fg="red")
2326
2357
  return False
2327
2358
  return True
2328
2359
  except Exception as e:
2329
2360
  error_msg = f"Error checking for KPCT installation: {str(e)}"
2330
2361
  if logger:
2331
2362
  logger.error(error_msg)
2332
- typer.secho(f"❌ [ERROR] {error_msg}", fg="red")
2363
+ typer.secho(f"ERROR: {error_msg}", fg="red")
2333
2364
  return False
2334
2365
 
2335
2366
 
@@ -2416,7 +2447,7 @@ def chunk_kpct_input_file(kpct_input_file: str, savedir: str, n_chunks: int, log
2416
2447
  error_msg = f"KPCT input file is empty: {kpct_input_file}"
2417
2448
  if logger:
2418
2449
  logger.error(error_msg)
2419
- typer.secho(f"❌ [ERROR] {error_msg}", fg="red")
2450
+ typer.secho(f"ERROR: {error_msg}", fg="red")
2420
2451
  return []
2421
2452
 
2422
2453
  # Calculate lines per chunk using ceiling division to ensure we create exactly n_chunks
@@ -2470,7 +2501,7 @@ def chunk_kpct_input_file(kpct_input_file: str, savedir: str, n_chunks: int, log
2470
2501
  error_msg = f"Error chunking KPCT input file: {str(e)}"
2471
2502
  if logger:
2472
2503
  logger.error(error_msg)
2473
- typer.secho(f"❌ [ERROR] {error_msg}", fg="red")
2504
+ typer.secho(f"ERROR: {error_msg}", fg="red")
2474
2505
  return []
2475
2506
 
2476
2507
 
@@ -2611,7 +2642,7 @@ def concatenate_kpct_outputs(chunk_dirs: List[str], savedir: str, kpct_outprefix
2611
2642
  error_msg = f"Error concatenating KPCT outputs: {str(e)}"
2612
2643
  if logger:
2613
2644
  logger.error(error_msg)
2614
- typer.secho(f"❌ [ERROR] {error_msg}", fg="red")
2645
+ typer.secho(f"ERROR: {error_msg}", fg="red")
2615
2646
  return False
2616
2647
 
2617
2648
 
@@ -2666,7 +2697,7 @@ def run_kpct_parallel(kpct_input_file: str, savedir: str, kpct_outprefix: str, n
2666
2697
  if all(os.path.exists(f) for f in final_outputs):
2667
2698
  if logger:
2668
2699
  logger.info("KPCT output files already exist, skipping parallel processing")
2669
- typer.secho("✅ [OK] KPCT output files already exist", fg="white")
2700
+ typer.secho("OK: KPCT output files already exist", fg="white")
2670
2701
  return True
2671
2702
 
2672
2703
 
@@ -2694,7 +2725,7 @@ def run_kpct_parallel(kpct_input_file: str, savedir: str, kpct_outprefix: str, n
2694
2725
  if all_chunks_exist:
2695
2726
  if logger:
2696
2727
  logger.info("All chunk outputs already exist, proceeding to concatenation")
2697
- typer.secho("✅ [OK] All chunks already processed, concatenating results", fg="white")
2728
+ typer.secho("OK: All chunks already processed, concatenating results", fg="white")
2698
2729
 
2699
2730
 
2700
2731
  concatenation_success = concatenate_kpct_outputs(existing_chunk_dirs, savedir, kpct_outprefix, logger)
@@ -2751,7 +2782,7 @@ def run_kpct_parallel(kpct_input_file: str, savedir: str, kpct_outprefix: str, n
2751
2782
  if not chunks_to_process:
2752
2783
  if logger:
2753
2784
  logger.info("All chunks already processed, proceeding to concatenation")
2754
- typer.secho("✅ [OK] All chunks already processed, concatenating results", fg="white")
2785
+ typer.secho("OK: All chunks already processed, concatenating results", fg="white")
2755
2786
  else:
2756
2787
  if logger:
2757
2788
  logger.info(f"Processing {len(chunks_to_process)} remaining chunks")
@@ -2799,7 +2830,7 @@ def run_kpct_parallel(kpct_input_file: str, savedir: str, kpct_outprefix: str, n
2799
2830
  error_msg = f"Failed to process {len(failed_chunks)} chunks: {failed_chunks}"
2800
2831
  if logger:
2801
2832
  logger.error(error_msg)
2802
- typer.secho(f"❌ [ERROR] {error_msg}", fg="red")
2833
+ typer.secho(f"ERROR: {error_msg}", fg="red")
2803
2834
  return False
2804
2835
 
2805
2836
 
@@ -2812,7 +2843,7 @@ def run_kpct_parallel(kpct_input_file: str, savedir: str, kpct_outprefix: str, n
2812
2843
  error_msg = f"Not all chunks were processed successfully. Expected {len(all_chunk_dirs)}, got {len(final_chunk_dirs)}"
2813
2844
  if logger:
2814
2845
  logger.error(error_msg)
2815
- typer.secho(f"❌ [ERROR] {error_msg}", fg="red")
2846
+ typer.secho(f"ERROR: {error_msg}", fg="red")
2816
2847
  return False
2817
2848
 
2818
2849
 
@@ -2830,12 +2861,12 @@ def run_kpct_parallel(kpct_input_file: str, savedir: str, kpct_outprefix: str, n
2830
2861
  error_msg = f"Failed to create final output files: {missing_outputs}"
2831
2862
  if logger:
2832
2863
  logger.error(error_msg)
2833
- typer.secho(f"❌ [ERROR] {error_msg}", fg="red")
2864
+ typer.secho(f"ERROR: {error_msg}", fg="red")
2834
2865
  return False
2835
2866
 
2836
2867
  if logger:
2837
2868
  logger.info("Successfully completed parallel KPCT processing")
2838
- typer.secho("✅ [OK] KPCT parallel processing completed successfully", fg="green")
2869
+ typer.secho("OK: KPCT parallel processing completed successfully", fg="green")
2839
2870
 
2840
2871
  return True
2841
2872
 
@@ -2843,7 +2874,7 @@ def run_kpct_parallel(kpct_input_file: str, savedir: str, kpct_outprefix: str, n
2843
2874
  error_msg = f"Error in parallel KPCT processing: {str(e)}"
2844
2875
  if logger:
2845
2876
  logger.error(error_msg)
2846
- typer.secho(f"❌ [ERROR] {error_msg}", fg="red")
2877
+ typer.secho(f"ERROR: {error_msg}", fg="red")
2847
2878
  return False
2848
2879
 
2849
2880
 
@@ -2894,7 +2925,7 @@ def run_kpct(kpct_input_file: str, savedir: str, kpct_outprefix: str, resource_l
2894
2925
  error_msg += f": {stderr}"
2895
2926
  if logger:
2896
2927
  logger.error(error_msg)
2897
- typer.secho(f"❌ [ERROR] {error_msg}", fg="red")
2928
+ typer.secho(f"ERROR: {error_msg}", fg="red")
2898
2929
  return False
2899
2930
 
2900
2931
 
@@ -2914,14 +2945,14 @@ def run_kpct(kpct_input_file: str, savedir: str, kpct_outprefix: str, resource_l
2914
2945
  error_msg = f"KPCT did not generate any output files with prefix '{kpct_outprefix}'"
2915
2946
  if logger:
2916
2947
  logger.error(error_msg)
2917
- typer.secho(f"❌ [ERROR] {error_msg}", fg="red")
2948
+ typer.secho(f"ERROR: {error_msg}", fg="red")
2918
2949
  return False
2919
2950
 
2920
2951
 
2921
2952
  created_files = [f for f in possible_kpct_files if os.path.exists(f)]
2922
2953
  if logger:
2923
2954
  logger.info(f"KPCT successfully created output files: {created_files}")
2924
- typer.secho(f"✅ [OK] KPCT completed successfully. Created files: {[os.path.basename(f) for f in created_files]}", fg="green")
2955
+ typer.secho(f"OK: KPCT completed successfully. Created files: {[os.path.basename(f) for f in created_files]}", fg="green")
2925
2956
 
2926
2957
  return True
2927
2958
 
@@ -2929,7 +2960,7 @@ def run_kpct(kpct_input_file: str, savedir: str, kpct_outprefix: str, resource_l
2929
2960
  error_msg = f"Error running KPCT: {str(e)}"
2930
2961
  if logger:
2931
2962
  logger.error(error_msg)
2932
- typer.secho(f"❌ [ERROR] {error_msg}", fg="red")
2963
+ typer.secho(f"ERROR: {error_msg}", fg="red")
2933
2964
  return False
2934
2965
 
2935
2966
 
@@ -2967,7 +2998,7 @@ def run_kpct_with_fallback(kpct_input_file: str, savedir: str, kpct_outprefix: s
2967
2998
  else:
2968
2999
  if logger:
2969
3000
  logger.warning("Parallel KPCT processing failed, falling back to sequential processing")
2970
- typer.secho("⚠️ [WARNING] Parallel processing failed, trying sequential approach", fg="yellow")
3001
+ typer.secho("WARNING: Parallel processing failed, trying sequential approach", fg="yellow")
2971
3002
 
2972
3003
 
2973
3004
  if logger:
@@ -3086,9 +3117,9 @@ def _run_pipeline_core(genomedir: str, savedir: str, ncpus: int, adapt_headers:
3086
3117
 
3087
3118
  # Check if all outputs already exist
3088
3119
  if check_final_reports_exist(savedir, calculate_complementarity, logger):
3089
- conditional_output("✅ [OK] All output files already exist. Skipping processing.", "green", verbose)
3120
+ conditional_output("OK: All output files already exist. Skipping processing.", "green", verbose)
3090
3121
  if not del_tmp:
3091
- conditional_output("ℹ️ Keeping temporary files as requested.", "blue", verbose)
3122
+ conditional_output("INFO: Keeping temporary files as requested.", "blue", verbose)
3092
3123
  logger.info("Pipeline skipped as all output files already exist")
3093
3124
  return
3094
3125
 
@@ -3111,15 +3142,15 @@ def _run_pipeline_core(genomedir: str, savedir: str, ncpus: int, adapt_headers:
3111
3142
  # Process annotations and create KO matrix
3112
3143
  if os.path.exists(ko_matrix_path):
3113
3144
  logger.info(f"KO matrix already exists: {ko_matrix_path}")
3114
- conditional_output(f"✅ [OK] Using existing KO matrix: {ko_matrix_path}", "white", verbose)
3145
+ conditional_output(f"OK: Using existing KO matrix: {ko_matrix_path}", "white", verbose)
3115
3146
  else:
3116
3147
  # Check for existing emapper annotations
3117
3148
  if os.path.exists(emapper_annotation_file):
3118
3149
  logger.info(f"Emapper annotations already exist: {emapper_annotation_file}")
3119
- conditional_output(f"✅ [OK] Using existing emapper annotations: {emapper_annotation_file}", "white", verbose)
3150
+ conditional_output(f"OK: Using existing emapper annotations: {emapper_annotation_file}", "white", verbose)
3120
3151
  elif os.path.exists(tmp_emapper_file):
3121
3152
  logger.info(f"Emapper annotations found in temp directory: {tmp_emapper_file}")
3122
- conditional_output(f"✅ [OK] Using existing emapper annotations from temp directory", "white", verbose)
3153
+ conditional_output(f"OK: Using existing emapper annotations from temp directory", "white", verbose)
3123
3154
  # Copy to final location
3124
3155
  try:
3125
3156
  shutil.copy(tmp_emapper_file, emapper_annotation_file)
@@ -3149,7 +3180,7 @@ def _run_pipeline_core(genomedir: str, savedir: str, ncpus: int, adapt_headers:
3149
3180
  merge_success = merge_genomes(savedir, logger, verbose)
3150
3181
  if not merge_success:
3151
3182
  logger.error("Failed to merge genomes. Exiting pipeline.")
3152
- typer.secho("❌ [ERROR] Failed to merge genomes. Exiting pipeline.", fg="red")
3183
+ typer.secho("ERROR: Failed to merge genomes. Exiting pipeline.", fg="red")
3153
3184
  return
3154
3185
 
3155
3186
  # Run eggNOG-mapper
@@ -3157,7 +3188,7 @@ def _run_pipeline_core(genomedir: str, savedir: str, ncpus: int, adapt_headers:
3157
3188
  emapper_success = run_emapper(savedir, ncpus, resource_log_file, lowmem, logger, verbose)
3158
3189
  if not emapper_success:
3159
3190
  logger.error("Failed to run emapper. Exiting pipeline.")
3160
- typer.secho("❌ [ERROR] Failed to run emapper. Exiting pipeline.", fg="red")
3191
+ typer.secho("ERROR: Failed to run emapper. Exiting pipeline.", fg="red")
3161
3192
  return
3162
3193
 
3163
3194
  # Create KO matrix from annotations
@@ -3170,7 +3201,7 @@ def _run_pipeline_core(genomedir: str, savedir: str, ncpus: int, adapt_headers:
3170
3201
 
3171
3202
  if os.path.exists(module_completeness_file):
3172
3203
  logger.info(f"Module completeness matrix already exists: {module_completeness_file}")
3173
- typer.secho(f"✅ [OK] Using existing module completeness matrix: {module_completeness_file}", fg="white")
3204
+ typer.secho(f"OK: Using existing module completeness matrix: {module_completeness_file}", fg="white")
3174
3205
  else:
3175
3206
  # Set up KPCT processing
3176
3207
  kpct_outprefix = "output_give_completeness"
@@ -3191,7 +3222,7 @@ def _run_pipeline_core(genomedir: str, savedir: str, ncpus: int, adapt_headers:
3191
3222
  ko_matrix_to_kpct_format(ko_matrix_path, savedir, calculate_complementarity, logger)
3192
3223
  else:
3193
3224
  logger.info(f"KPCT input file already exists: {kpct_input_file}")
3194
- typer.secho(f"✅ [OK] Using existing KPCT input file: {kpct_input_file}", fg="white")
3225
+ typer.secho(f"OK: Using existing KPCT input file: {kpct_input_file}", fg="white")
3195
3226
 
3196
3227
  # Run KPCT if needed
3197
3228
  if not kpct_file_exists:
@@ -3206,7 +3237,7 @@ def _run_pipeline_core(genomedir: str, savedir: str, ncpus: int, adapt_headers:
3206
3237
  return
3207
3238
  else:
3208
3239
  logger.info(f"KPCT output file(s) already exist with prefix '{kpct_outprefix}'")
3209
- typer.secho(f"✅ [OK] Using existing KPCT output files with prefix '{kpct_outprefix}'", fg="white")
3240
+ typer.secho(f"OK: Using existing KPCT output files with prefix '{kpct_outprefix}'", fg="white")
3210
3241
 
3211
3242
  # Create module completeness matrix
3212
3243
  logger.info(f"Creating module completeness matrix")
@@ -3221,7 +3252,7 @@ def _run_pipeline_core(genomedir: str, savedir: str, ncpus: int, adapt_headers:
3221
3252
  complementarity_report_file = f"{savedir}/module_completeness_complementarity_{n_members}member.tsv"
3222
3253
  if os.path.exists(complementarity_report_file):
3223
3254
  logger.info(f"Complementarity report for {n_members}-member combinations already exists: {complementarity_report_file}")
3224
- typer.secho(f"✅ [OK] Using existing {n_members}-member complementarity report: {complementarity_report_file}", fg="white")
3255
+ typer.secho(f"OK: Using existing {n_members}-member complementarity report: {complementarity_report_file}", fg="white")
3225
3256
  else:
3226
3257
  logger.info(f"Generating complementarity report for {n_members}-member combinations")
3227
3258
  generate_complementarity_report(savedir, n_members, logger, verbose)
@@ -3354,16 +3385,13 @@ def download_eggnog_data(
3354
3385
  logger.info("Starting eggNOG data download.")
3355
3386
  logger.info("CLI command: %s", " ".join(shlex.quote(arg) for arg in sys.argv))
3356
3387
 
3357
- if eggnog_data_dir:
3358
- os.environ["EGGNOG_DATA_DIR"] = eggnog_data_dir
3359
-
3360
3388
  env_value = os.environ.get("EGGNOG_DATA_DIR", "").strip()
3361
3389
  if not env_value:
3362
3390
  default_dir = default_eggnog_data_dir()
3363
3391
  os.environ["EGGNOG_DATA_DIR"] = str(default_dir)
3364
3392
  env_value = str(default_dir)
3365
3393
  typer.secho(
3366
- f"ℹ️ [INFO] EGGNOG_DATA_DIR not set; using default {env_value}",
3394
+ f"INFO: EGGNOG_DATA_DIR not set; using default {env_value}",
3367
3395
  fg="yellow",
3368
3396
  )
3369
3397
  logger.info("EGGNOG_DATA_DIR not set; using default %s", env_value)
@@ -3381,12 +3409,118 @@ def download_eggnog_data(
3381
3409
  emit_error(message, logger)
3382
3410
  raise typer.Exit(1)
3383
3411
 
3384
- returncode, _, _ = run_subprocess_with_logging(
3385
- [downloader],
3386
- logger=logger,
3387
- description="Downloading eggNOG data",
3388
- verbose=verbose,
3412
+ # Run the downloader with progress updates based on data directory growth.
3413
+ cmd = [downloader]
3414
+ logger.info("Downloading eggNOG data: %s", downloader)
3415
+ if verbose:
3416
+ typer.secho("Running download_eggnog_data.py", fg="yellow")
3417
+ typer.secho(f" Command: {' '.join(cmd)}", fg="blue")
3418
+
3419
+ start_time = time.time()
3420
+ last_progress_time = start_time
3421
+ last_size = get_dir_size(data_dir)
3422
+ last_files = count_files(data_dir)
3423
+ progress_interval = 60
3424
+
3425
+ process = subprocess.Popen(
3426
+ cmd,
3427
+ stdout=subprocess.PIPE,
3428
+ stderr=subprocess.PIPE,
3429
+ text=True,
3430
+ bufsize=1,
3431
+ universal_newlines=True,
3432
+ )
3433
+
3434
+ stdout_queue: "queue.Queue[Tuple[str, str]]" = queue.Queue()
3435
+ stderr_queue: "queue.Queue[Tuple[str, str]]" = queue.Queue()
3436
+
3437
+ def stream_reader(stream, q, stream_type):
3438
+ try:
3439
+ for line in iter(stream.readline, ""):
3440
+ q.put((stream_type, line.rstrip("\n\r")))
3441
+ finally:
3442
+ try:
3443
+ stream.close()
3444
+ except Exception:
3445
+ pass
3446
+
3447
+ stdout_thread = threading.Thread(
3448
+ target=stream_reader,
3449
+ args=(process.stdout, stdout_queue, "stdout"),
3450
+ daemon=True,
3389
3451
  )
3452
+ stderr_thread = threading.Thread(
3453
+ target=stream_reader,
3454
+ args=(process.stderr, stderr_queue, "stderr"),
3455
+ daemon=True,
3456
+ )
3457
+ stdout_thread.start()
3458
+ stderr_thread.start()
3459
+
3460
+ while process.poll() is None or not stdout_queue.empty() or not stderr_queue.empty():
3461
+ now = time.time()
3462
+
3463
+ # Drain stdout
3464
+ try:
3465
+ while True:
3466
+ stream_type, line = stdout_queue.get_nowait()
3467
+ if line:
3468
+ if verbose:
3469
+ print(line, flush=True)
3470
+ logger.info(line)
3471
+ except queue.Empty:
3472
+ pass
3473
+
3474
+ # Drain stderr
3475
+ try:
3476
+ while True:
3477
+ stream_type, line = stderr_queue.get_nowait()
3478
+ if line:
3479
+ if verbose:
3480
+ print(line, file=sys.stderr, flush=True)
3481
+ logger.warning(line)
3482
+ except queue.Empty:
3483
+ pass
3484
+
3485
+ if now - last_progress_time >= progress_interval:
3486
+ try:
3487
+ current_size = get_dir_size(data_dir)
3488
+ current_files = count_files(data_dir)
3489
+ delta = current_size - last_size
3490
+ elapsed = now - last_progress_time
3491
+ speed = delta / elapsed if elapsed > 0 else 0.0
3492
+ file_delta = current_files - last_files
3493
+ msg = (
3494
+ f"Download progress: {format_bytes(current_size)} total "
3495
+ f"(+{format_bytes(delta)} in {int(elapsed)}s, "
3496
+ f"{format_bytes(speed)}/s, +{file_delta} files)"
3497
+ )
3498
+ logger.info(msg)
3499
+ if verbose:
3500
+ typer.secho(msg, fg="cyan")
3501
+ last_size = current_size
3502
+ last_files = current_files
3503
+ last_progress_time = now
3504
+ except Exception as exc:
3505
+ logger.warning("Progress check failed: %s", exc)
3506
+ last_progress_time = now
3507
+
3508
+ time.sleep(0.2)
3509
+
3510
+ stdout_thread.join(timeout=1.0)
3511
+ stderr_thread.join(timeout=1.0)
3512
+
3513
+ returncode = process.returncode
3514
+ total_size = get_dir_size(data_dir)
3515
+ total_files = count_files(data_dir)
3516
+ total_elapsed = time.time() - start_time
3517
+ summary = (
3518
+ f"Download finished: {format_bytes(total_size)} in {int(total_elapsed)}s "
3519
+ f"across {total_files} files"
3520
+ )
3521
+ logger.info(summary)
3522
+ if verbose:
3523
+ typer.secho(summary, fg="green")
3390
3524
 
3391
3525
  if returncode != 0:
3392
3526
  raise typer.Exit(returncode)
@@ -3472,7 +3606,7 @@ def analyze_ko_matrix(
3472
3606
 
3473
3607
 
3474
3608
  if not os.path.exists(kos_matrix):
3475
- typer.secho(f"❌ [ERROR] KO matrix file not found at: {kos_matrix}", fg="red")
3609
+ typer.secho(f"ERROR: KO matrix file not found at: {kos_matrix}", fg="red")
3476
3610
  exit(1)
3477
3611
 
3478
3612
 
@@ -3486,7 +3620,7 @@ def analyze_ko_matrix(
3486
3620
 
3487
3621
 
3488
3622
  if check_final_reports_exist(savedir, calculate_complementarity, logger):
3489
- typer.secho("✅ [OK] All output files already exist. Skipping processing.", fg="green")
3623
+ typer.secho("OK: All output files already exist. Skipping processing.", fg="green")
3490
3624
  logger.info("Analysis skipped as all output files already exist")
3491
3625
  return
3492
3626
 
@@ -3524,7 +3658,7 @@ def analyze_ko_matrix(
3524
3658
  ko_matrix_to_kpct_format(kos_matrix, savedir, calculate_complementarity, logger)
3525
3659
  else:
3526
3660
  logger.info(f"KPCT input file already exists: {kpct_input_file}")
3527
- typer.secho(f"✅ [OK] Using existing KPCT input file: {kpct_input_file}", fg="white")
3661
+ typer.secho(f"OK: Using existing KPCT input file: {kpct_input_file}", fg="white")
3528
3662
 
3529
3663
 
3530
3664
  if not kpct_file_exists:
@@ -3539,7 +3673,7 @@ def analyze_ko_matrix(
3539
3673
  exit(1)
3540
3674
  else:
3541
3675
  logger.info(f"KPCT output file(s) already exist with prefix '{kpct_outprefix}'")
3542
- typer.secho(f"✅ [OK] Using existing KPCT output files with prefix '{kpct_outprefix}'", fg="white")
3676
+ typer.secho(f"OK: Using existing KPCT output files with prefix '{kpct_outprefix}'", fg="white")
3543
3677
 
3544
3678
 
3545
3679
  if not os.path.exists(module_completeness_file):
@@ -3549,7 +3683,7 @@ def analyze_ko_matrix(
3549
3683
  else:
3550
3684
  if logger:
3551
3685
  logger.info(f"Module completeness matrix already exists: {module_completeness_file}")
3552
- typer.secho(f"✅ [OK] Using existing module completeness matrix: {module_completeness_file}", fg="white")
3686
+ typer.secho(f"OK: Using existing module completeness matrix: {module_completeness_file}", fg="white")
3553
3687
 
3554
3688
 
3555
3689
  if calculate_complementarity >= 2:
@@ -3561,7 +3695,7 @@ def analyze_ko_matrix(
3561
3695
  complementarity_report_file = f"{savedir}/module_completeness_complementarity_{n_members}member.tsv"
3562
3696
  if os.path.exists(complementarity_report_file):
3563
3697
  logger.info(f"Complementarity report for {n_members}-member combinations already exists: {complementarity_report_file}")
3564
- typer.secho(f"✅ [OK] Using existing {n_members}-member complementarity report: {complementarity_report_file}", fg="white")
3698
+ typer.secho(f"OK: Using existing {n_members}-member complementarity report: {complementarity_report_file}", fg="white")
3565
3699
  else:
3566
3700
  logger.info(f"Generating complementarity report for {n_members}-member combinations")
3567
3701
  generate_complementarity_report(savedir, n_members, logger, verbose)
@@ -3581,7 +3715,7 @@ def analyze_ko_matrix(
3581
3715
  except Exception as e:
3582
3716
  if logger:
3583
3717
  logger.error(f"Error in KPCT analysis: {str(e)}", exc_info=True)
3584
- typer.secho(f"❌ [ERROR] Error in KPCT analysis: {str(e)}", fg="red")
3718
+ typer.secho(f"ERROR: Error in KPCT analysis: {str(e)}", fg="red")
3585
3719
  exit(1)
3586
3720
 
3587
3721
 
@@ -1,5 +1,5 @@
1
1
  context:
2
- version: 0.7.3
2
+ version: 0.7.4
3
3
 
4
4
  package:
5
5
  name: moducomp
@@ -7,7 +7,7 @@ package:
7
7
 
8
8
  source:
9
9
  - url: https://pypi.org/packages/source/m/moducomp/moducomp-${{ version }}.tar.gz
10
- sha256: f3f2f6b7dcc23062c47c894eb694e225fdfd4fb7804b9584dda6970748caa866
10
+ sha256: b6d5648b660aadc9ecdb9375d35a984a02ef23d99d9d8085067a18d72037aabf
11
11
 
12
12
  build:
13
13
  script:
File without changes
File without changes
File without changes
File without changes
File without changes