gsMap 1.73.0__py3-none-any.whl → 1.73.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gsMap/GNN/train.py CHANGED
@@ -17,7 +17,7 @@ def reconstruction_loss(decoded, x):
17
17
 
18
18
  def label_loss(pred_label, true_label):
19
19
  """Compute the cross-entropy loss."""
20
- return F.cross_entropy(pred_label, true_label)
20
+ return F.cross_entropy(pred_label, true_label.long())
21
21
 
22
22
 
23
23
  class ModelTrainer:
gsMap/__init__.py CHANGED
@@ -2,4 +2,4 @@
2
2
  Genetics-informed pathogenic spatial mapping
3
3
  """
4
4
 
5
- __version__ = "1.73.0"
5
+ __version__ = "1.73.1"
gsMap/config.py CHANGED
@@ -232,6 +232,9 @@ def add_find_latent_representations_args(parser):
232
232
  action="store_true",
233
233
  help="Enable hierarchical latent representation finding.",
234
234
  )
235
+ parser.add_argument(
236
+ "--pearson_residuals", action="store_true", help="Using the pearson residuals."
237
+ )
235
238
 
236
239
 
237
240
  def chrom_choice(value):
@@ -308,7 +311,7 @@ def add_generate_ldscore_args(parser):
308
311
  help="Root path for genotype plink bfiles (.bim, .bed, .fam).",
309
312
  )
310
313
  parser.add_argument(
311
- "--keep_snp_root", type=str, required=True, help="Root path for SNP files."
314
+ "--keep_snp_root", type=str, required=False, help="Root path for SNP files"
312
315
  )
313
316
  parser.add_argument(
314
317
  "--gtf_annotation_file", type=str, required=True, help="Path to GTF annotation file."
@@ -357,7 +360,11 @@ def add_spatial_ldsc_args(parser):
357
360
  "--sumstats_file", type=str, required=True, help="Path to GWAS summary statistics file."
358
361
  )
359
362
  parser.add_argument(
360
- "--w_file", type=str, required=True, help="Path to regression weight file."
363
+ "--w_file",
364
+ type=str,
365
+ required=False,
366
+ default=None,
367
+ help="Path to regression weight file. If not provided, will use weights generated in the generate_ldscore step.",
361
368
  )
362
369
  parser.add_argument(
363
370
  "--trait_name", type=str, required=True, help="Name of the trait being analyzed."
@@ -678,6 +685,9 @@ def add_run_all_mode_args(parser):
678
685
  parser.add_argument(
679
686
  "--gM_slices", type=str, default=None, help="Path to the slice mean file (optional)."
680
687
  )
688
+ parser.add_argument(
689
+ "--pearson_residuals", action="store_true", help="Using the pearson residuals."
690
+ )
681
691
 
682
692
 
683
693
  def ensure_path_exists(func):
@@ -854,6 +864,7 @@ class FindLatentRepresentationsConfig(ConfigWithAutoPaths):
854
864
  var: bool = False
855
865
  convergence_threshold: float = 1e-4
856
866
  hierarchically: bool = False
867
+ pearson_residuals: bool = False
857
868
 
858
869
  def __post_init__(self):
859
870
  # self.output_hdf5_path = self.hdf5_with_latent_path
@@ -942,11 +953,11 @@ class GenerateLDScoreConfig(ConfigWithAutoPaths):
942
953
  chrom: int | str
943
954
 
944
955
  bfile_root: str
945
- keep_snp_root: str | None
946
956
 
947
957
  # annotation by gene distance
948
958
  gtf_annotation_file: str
949
959
  gene_window_size: int = 50000
960
+ keep_snp_root: str | None = None
950
961
 
951
962
  # annotation by enhancer
952
963
  enhancer_annotation_file: str = None
@@ -1055,7 +1066,7 @@ class GenerateLDScoreConfig(ConfigWithAutoPaths):
1055
1066
 
1056
1067
  @dataclass
1057
1068
  class SpatialLDSCConfig(ConfigWithAutoPaths):
1058
- w_file: str
1069
+ w_file: str | None = None
1059
1070
  # ldscore_save_dir: str
1060
1071
  use_additional_baseline_annotation: bool = True
1061
1072
  trait_name: str | None = None
@@ -1105,8 +1116,19 @@ class SpatialLDSCConfig(ConfigWithAutoPaths):
1105
1116
  for sumstats_file in self.sumstats_config_dict.values():
1106
1117
  assert Path(sumstats_file).exists(), f"{sumstats_file} does not exist."
1107
1118
 
1108
- # check if additional baseline annotation is exist
1109
- # self.use_additional_baseline_annotation = False
1119
+ # Handle w_file
1120
+ if self.w_file is None:
1121
+ w_ld_dir = Path(self.ldscore_save_dir) / "w_ld"
1122
+ if w_ld_dir.exists():
1123
+ self.w_file = str(w_ld_dir / "weights.")
1124
+ logger.info(f"Using weights generated in the generate_ldscore step: {self.w_file}")
1125
+ else:
1126
+ raise ValueError(
1127
+ "No w_file provided and no weights found in generate_ldscore output. "
1128
+ "Either provide --w_file or run generate_ldscore first."
1129
+ )
1130
+ else:
1131
+ logger.info(f"Using provided weights file: {self.w_file}")
1110
1132
 
1111
1133
  if self.use_additional_baseline_annotation:
1112
1134
  self.process_additional_baseline_annotation()
@@ -1117,16 +1139,6 @@ class SpatialLDSCConfig(ConfigWithAutoPaths):
1117
1139
 
1118
1140
  if not dir_exists:
1119
1141
  self.use_additional_baseline_annotation = False
1120
- # if self.use_additional_baseline_annotation:
1121
- # logger.warning(f"additional_baseline directory is not found in {self.ldscore_save_dir}.")
1122
- # print('''\
1123
- # if you want to use additional baseline annotation,
1124
- # please provide additional baseline annotation when calculating ld score.
1125
- # ''')
1126
- # raise FileNotFoundError(
1127
- # f'additional_baseline directory is not found.')
1128
- # return
1129
- # self.use_additional_baseline_annotation = self.use_additional_baseline_annotation or True
1130
1142
  else:
1131
1143
  logger.info(
1132
1144
  "------Additional baseline annotation is provided. It will be used with the default baseline annotation."
@@ -1227,6 +1239,7 @@ class RunAllModeConfig(ConfigWithAutoPaths):
1227
1239
 
1228
1240
  # == Find Latent Representation PARAMETERS ==
1229
1241
  n_comps: int = 300
1242
+ pearson_residuals: bool = False
1230
1243
 
1231
1244
  # == latent 2 Gene PARAMETERS ==
1232
1245
  gM_slices: str | None = None
@@ -23,6 +23,7 @@ def get_common_genes(h5ad_files, config: CreateSliceMeanConfig):
23
23
  common_genes = None
24
24
  for file in tqdm(h5ad_files, desc="Finding common genes"):
25
25
  adata = sc.read_h5ad(file)
26
+ sc.pp.filter_genes(adata, min_cells=1)
26
27
  adata.var_names_make_unique()
27
28
  if common_genes is None:
28
29
  common_genes = adata.var_names
gsMap/diagnosis.py CHANGED
@@ -49,7 +49,10 @@ def compute_gene_diagnostic_info(config: DiagnosisConfig):
49
49
 
50
50
  # Align marker scores with trait LDSC results
51
51
  mk_score = mk_score.loc[trait_ldsc_result.index]
52
- mk_score = mk_score.loc[:, mk_score.sum(axis=0) != 0]
52
+
53
+ # Filter out genes with no variation
54
+ non_zero_std_cols = mk_score.columns[mk_score.std() > 0]
55
+ mk_score = mk_score.loc[:, non_zero_std_cols]
53
56
 
54
57
  logger.info("Calculating correlation between gene marker scores and trait logp-values...")
55
58
  corr = mk_score.corrwith(trait_ldsc_result["logp"])
@@ -88,19 +91,6 @@ def compute_gene_diagnostic_info(config: DiagnosisConfig):
88
91
  gene_diagnostic_info.to_csv(gene_diagnostic_info_save_path, index=False)
89
92
  logger.info(f"Gene diagnostic information saved to {gene_diagnostic_info_save_path}.")
90
93
 
91
- # TODO: A new script is needed to save the gene diagnostic info to adata.var and trait_ldsc_result to adata.obs when running multiple traits
92
- # # Save to adata.var with the trait_name prefix
93
- # logger.info('Saving gene diagnostic info to adata.var...')
94
- # gene_diagnostic_info.set_index('Gene', inplace=True) # Use 'Gene' as the index to align with adata.var
95
- # adata.var[f'{config.trait_name}_Annotation'] = gene_diagnostic_info['Annotation']
96
- # adata.var[f'{config.trait_name}_Median_GSS'] = gene_diagnostic_info['Median_GSS']
97
- # adata.var[f'{config.trait_name}_PCC'] = gene_diagnostic_info['PCC']
98
- #
99
- # # Save trait_ldsc_result to adata.obs
100
- # logger.info(f'Saving trait LDSC results to adata.obs as gsMap_{config.trait_name}_p_value...')
101
- # adata.obs[f'gsMap_{config.trait_name}_p_value'] = trait_ldsc_result['p']
102
- # adata.write(config.hdf5_with_latent_path, )
103
-
104
94
  return gene_diagnostic_info.reset_index()
105
95
 
106
96
 
@@ -50,6 +50,15 @@ def preprocess_data(adata, params):
50
50
  # HVGs based on count
51
51
  logger.info("Dealing with count data...")
52
52
  sc.pp.highly_variable_genes(adata, flavor="seurat_v3", n_top_genes=params.feat_cell)
53
+
54
+ # Get the pearson residuals
55
+ if params.pearson_residuals:
56
+ sc.experimental.pp.normalize_pearson_residuals(adata, inplace=False)
57
+ pearson_residuals = sc.experimental.pp.normalize_pearson_residuals(
58
+ adata, inplace=False, clip=10
59
+ )
60
+ adata.layers["pearson_residuals"] = pearson_residuals["X"]
61
+
53
62
  # Normalize the data
54
63
  sc.pp.normalize_total(adata, target_sum=1e4)
55
64
  sc.pp.log1p(adata)
@@ -64,8 +73,13 @@ class LatentRepresentationFinder:
64
73
  def __init__(self, adata, args: FindLatentRepresentationsConfig):
65
74
  self.params = args
66
75
 
67
- self.expression_array = adata[:, adata.var.highly_variable].X.copy()
68
- self.expression_array = sc.pp.scale(self.expression_array, max_value=10)
76
+ if "pearson_residuals" in adata.layers:
77
+ self.expression_array = (
78
+ adata[:, adata.var.highly_variable].layers["pearson_residuals"].copy()
79
+ )
80
+ else:
81
+ self.expression_array = adata[:, adata.var.highly_variable].X.copy()
82
+ self.expression_array = sc.pp.scale(self.expression_array, max_value=10)
69
83
 
70
84
  # Construct the neighboring graph
71
85
  self.graph_dict = construct_adjacency_matrix(adata, self.params)
@@ -103,6 +117,8 @@ def run_find_latent_representation(args: FindLatentRepresentationsConfig):
103
117
  # Load the ST data
104
118
  logger.info(f"Loading ST data of {args.sample_name}...")
105
119
  adata = sc.read_h5ad(args.input_hdf5_path)
120
+ sc.pp.filter_genes(adata, min_cells=1)
121
+
106
122
  logger.info(f"The ST data contains {adata.shape[0]} cells, {adata.shape[1]} genes.")
107
123
 
108
124
  # Load the cell type annotation