py2ls 0.1.10.12__py3-none-any.whl → 0.2.7.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of py2ls might be problematic. Click here for more details.

Files changed (72) hide show
  1. py2ls/.DS_Store +0 -0
  2. py2ls/.git/.DS_Store +0 -0
  3. py2ls/.git/index +0 -0
  4. py2ls/.git/logs/refs/remotes/origin/HEAD +1 -0
  5. py2ls/.git/objects/.DS_Store +0 -0
  6. py2ls/.git/refs/.DS_Store +0 -0
  7. py2ls/ImageLoader.py +621 -0
  8. py2ls/__init__.py +7 -5
  9. py2ls/apptainer2ls.py +3940 -0
  10. py2ls/batman.py +164 -42
  11. py2ls/bio.py +2595 -0
  12. py2ls/cell_image_clf.py +1632 -0
  13. py2ls/container2ls.py +4635 -0
  14. py2ls/corr.py +475 -0
  15. py2ls/data/.DS_Store +0 -0
  16. py2ls/data/email/email_html_template.html +88 -0
  17. py2ls/data/hyper_param_autogluon_zeroshot2024.json +2383 -0
  18. py2ls/data/hyper_param_tabrepo_2024.py +1753 -0
  19. py2ls/data/mygenes_fields_241022.txt +355 -0
  20. py2ls/data/re_common_pattern.json +173 -0
  21. py2ls/data/sns_info.json +74 -0
  22. py2ls/data/styles/.DS_Store +0 -0
  23. py2ls/data/styles/example/.DS_Store +0 -0
  24. py2ls/data/styles/stylelib/.DS_Store +0 -0
  25. py2ls/data/styles/stylelib/grid.mplstyle +15 -0
  26. py2ls/data/styles/stylelib/high-contrast.mplstyle +6 -0
  27. py2ls/data/styles/stylelib/high-vis.mplstyle +4 -0
  28. py2ls/data/styles/stylelib/ieee.mplstyle +15 -0
  29. py2ls/data/styles/stylelib/light.mplstyl +6 -0
  30. py2ls/data/styles/stylelib/muted.mplstyle +6 -0
  31. py2ls/data/styles/stylelib/nature-reviews-latex.mplstyle +616 -0
  32. py2ls/data/styles/stylelib/nature-reviews.mplstyle +616 -0
  33. py2ls/data/styles/stylelib/nature.mplstyle +31 -0
  34. py2ls/data/styles/stylelib/no-latex.mplstyle +10 -0
  35. py2ls/data/styles/stylelib/notebook.mplstyle +36 -0
  36. py2ls/data/styles/stylelib/paper.mplstyle +290 -0
  37. py2ls/data/styles/stylelib/paper2.mplstyle +305 -0
  38. py2ls/data/styles/stylelib/retro.mplstyle +4 -0
  39. py2ls/data/styles/stylelib/sans.mplstyle +10 -0
  40. py2ls/data/styles/stylelib/scatter.mplstyle +7 -0
  41. py2ls/data/styles/stylelib/science.mplstyle +48 -0
  42. py2ls/data/styles/stylelib/std-colors.mplstyle +4 -0
  43. py2ls/data/styles/stylelib/vibrant.mplstyle +6 -0
  44. py2ls/data/tiles.csv +146 -0
  45. py2ls/data/usages_pd.json +1417 -0
  46. py2ls/data/usages_sns.json +31 -0
  47. py2ls/docker2ls.py +5446 -0
  48. py2ls/ec2ls.py +61 -0
  49. py2ls/fetch_update.py +145 -0
  50. py2ls/ich2ls.py +1955 -296
  51. py2ls/im2.py +8242 -0
  52. py2ls/image_ml2ls.py +2100 -0
  53. py2ls/ips.py +33909 -3418
  54. py2ls/ml2ls.py +7700 -0
  55. py2ls/mol.py +289 -0
  56. py2ls/mount2ls.py +1307 -0
  57. py2ls/netfinder.py +873 -351
  58. py2ls/nl2ls.py +283 -0
  59. py2ls/ocr.py +1581 -458
  60. py2ls/plot.py +10394 -314
  61. py2ls/rna2ls.py +311 -0
  62. py2ls/ssh2ls.md +456 -0
  63. py2ls/ssh2ls.py +5933 -0
  64. py2ls/ssh2ls_v01.py +2204 -0
  65. py2ls/stats.py +66 -172
  66. py2ls/temp20251124.py +509 -0
  67. py2ls/translator.py +2 -0
  68. py2ls/utils/decorators.py +3564 -0
  69. py2ls/utils_bio.py +3453 -0
  70. {py2ls-0.1.10.12.dist-info → py2ls-0.2.7.10.dist-info}/METADATA +113 -224
  71. {py2ls-0.1.10.12.dist-info → py2ls-0.2.7.10.dist-info}/RECORD +72 -16
  72. {py2ls-0.1.10.12.dist-info → py2ls-0.2.7.10.dist-info}/WHEEL +0 -0
py2ls/rna2ls.py ADDED
@@ -0,0 +1,311 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ rna_master_tools.py
4
+ Single-file advanced RNA-seq toolkit + CLI.
5
+
6
+ Usage examples:
7
+ python rna_master_tools.py run-rmats --b1 a1.bam,a2.bam --b2 b1.bam,b2.bam --gtf ref.gtf --outdir rmats_out
8
+ python rna_master_tools.py parse-rmats --in rmats_out/SE.MATS.JCEC.txt --out rmats_parsed.tsv
9
+ python rna_master_tools.py run-starfusion --left left.fq --right right.fq --genome_lib_dir GRCh38_ctat_lib --outdir fusion_out
10
+ python rna_master_tools.py compute-wgcna --counts counts_normalized.tsv --out modules.tsv
11
+ python rna_master_tools.py biomarkers --counts counts_normalized.tsv --labels samplesheet.tsv --out biomarker_report.tsv
12
+ """
13
+
14
+ import os
15
+ import sys
16
+ import argparse
17
+ import logging
18
+ import subprocess
19
+ import numpy as np
20
+ import pandas as pd
21
+ import matplotlib.pyplot as plt
22
+
23
+ from sklearn.ensemble import RandomForestClassifier
24
+ from sklearn.model_selection import train_test_split
25
+ from sklearn.cluster import AgglomerativeClustering
26
+ from sklearn.metrics import classification_report
27
+
28
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
29
+ logger = logging.getLogger(__name__)
30
+
31
+ # -------------------------
32
+ # Utilities
33
+ # -------------------------
34
+ def ensure_dir(path):
35
+ os.makedirs(path, exist_ok=True)
36
+ return path
37
+
38
+ def run_cmd(cmd, check=True):
39
+ logger.info("Run cmd: %s", " ".join(cmd))
40
+ try:
41
+ res = subprocess.run(cmd, check=check, capture_output=True, text=True)
42
+ logger.debug("stdout: %s", res.stdout[:200])
43
+ logger.debug("stderr: %s", res.stderr[:200])
44
+ return res
45
+ except subprocess.CalledProcessError as e:
46
+ logger.error("Command failed: %s", e.stderr)
47
+ raise
48
+
49
+ def read_table(path, index_col=None):
50
+ return pd.read_csv(path, sep="\t", index_col=index_col)
51
+
52
+ def write_table(df, path):
53
+ df.to_csv(path, sep="\t", index=True)
54
+
55
+ # -------------------------
56
+ # rMATS wrappers & parsers
57
+ # -------------------------
58
+ def run_rmats_cli(b1_list, b2_list, gtf, outdir, readlen=100, threads=8, rmats_cmd="rmats.py"):
59
+ """
60
+ Run rMATS CLI. b1_list/b2_list are comma-separated BAM paths strings.
61
+ """
62
+ ensure_dir(outdir)
63
+ tmpdir = ensure_dir(os.path.join(outdir, "tmp"))
64
+ cmd = [
65
+ rmats_cmd,
66
+ "--b1", b1_list,
67
+ "--b2", b2_list,
68
+ "--gtf", gtf,
69
+ "--od", outdir,
70
+ "--tmp", tmpdir,
71
+ "--readLength", str(readlen),
72
+ "--nthread", str(threads)
73
+ ]
74
+ run_cmd(cmd)
75
+ return outdir
76
+
77
+ def parse_rmats_event_table(event_file, out_tsv=None):
78
+ """
79
+ Parse rMATS event file and compute PSI means and dPSI.
80
+ """
81
+ df = pd.read_csv(event_file, sep="\t", low_memory=False)
82
+ # helper
83
+ def mean_psi(s):
84
+ try:
85
+ vals = [float(x) for x in str(s).split(",") if x not in (".", "", "NA")]
86
+ return np.nanmean(vals) if len(vals)>0 else np.nan
87
+ except:
88
+ return np.nan
89
+ if 'IncLevel1' in df.columns:
90
+ df['PSI_group1'] = df['IncLevel1'].apply(mean_psi)
91
+ if 'IncLevel2' in df.columns:
92
+ df['PSI_group2'] = df['IncLevel2'].apply(mean_psi)
93
+ if 'PSI_group1' in df and 'PSI_group2' in df:
94
+ df['dPSI'] = df['PSI_group1'] - df['PSI_group2']
95
+ for col in ['PValue','FDR']:
96
+ if col in df.columns:
97
+ df[col] = pd.to_numeric(df[col], errors='coerce')
98
+ if out_tsv:
99
+ df.to_csv(out_tsv, sep="\t", index=False)
100
+ return df
101
+
102
+ def plot_rmats_volcano(df, out_png=None, fdr_thresh=0.05, dpsi_thresh=0.1):
103
+ neglog = -np.log10(df['FDR'].replace(0,1e-300))
104
+ sig = (df['FDR'] <= fdr_thresh) & (df['dPSI'].abs() >= dpsi_thresh)
105
+ plt.figure(figsize=(7,6))
106
+ plt.scatter(df['dPSI'], neglog, s=8, c='gray', alpha=0.7)
107
+ if sig.any():
108
+ plt.scatter(df.loc[sig,'dPSI'], neglog.loc[sig], s=10, c='red', alpha=0.8)
109
+ plt.axvline(dpsi_thresh, color='blue', linestyle='--')
110
+ plt.axvline(-dpsi_thresh, color='blue', linestyle='--')
111
+ plt.axhline(-np.log10(fdr_thresh), color='green', linestyle='--')
112
+ plt.xlabel('dPSI'); plt.ylabel('-log10(FDR)'); plt.title('rMATS volcano')
113
+ if out_png:
114
+ plt.savefig(out_png, dpi=150)
115
+ return plt
116
+
117
+ # -------------------------
118
+ # STAR-Fusion wrapper & parser
119
+ # -------------------------
120
+ def run_star_fusion_cli(left_fq, right_fq, genome_lib_dir, outdir, threads=8):
121
+ """
122
+ Run STAR-Fusion. Assumes STAR-Fusion installed and in PATH or in container.
123
+ """
124
+ ensure_dir(outdir)
125
+ cmd = [
126
+ "STAR-Fusion",
127
+ "--left_fq", left_fq,
128
+ "--right_fq", right_fq,
129
+ "--genome_lib_dir", genome_lib_dir,
130
+ "--output_dir", outdir,
131
+ "--CPU", str(threads)
132
+ ]
133
+ run_cmd(cmd)
134
+ return outdir
135
+
136
+ def parse_star_fusion_tsv(fusion_tsv, out_tsv=None):
137
+ df = pd.read_csv(fusion_tsv, sep="\t", comment='#', low_memory=False)
138
+ if out_tsv:
139
+ df.to_csv(out_tsv, sep="\t", index=False)
140
+ return df
141
+
142
+ # -------------------------
143
+ # WGCNA-like coexpression (lightweight)
144
+ # -------------------------
145
+ def compute_wgcna_modules_cli(counts_tsv, out_tsv=None, soft_power=6):
146
+ """
147
+ counts_tsv: genes x samples (rows genes, columns samples) or samples x genes.
148
+ We'll accept genes rows (index=gene).
149
+ Returns DataFrame gene->module
150
+ """
151
+ df = pd.read_csv(counts_tsv, sep="\t", index_col=0)
152
+ # ensure genes are rows
153
+ if df.shape[0] < df.shape[1] and df.index.str.contains('ENS').any():
154
+ # likely already genes x samples; keep
155
+ pass
156
+ # compute correlation between genes (use transpose if needed)
157
+ expr = df
158
+ # if many samples < genes, correlation across samples => gene-gene correlation requires samples dimension
159
+ corr = expr.T.corr() # gene x gene
160
+ adj = np.abs(corr) ** soft_power
161
+ # approximate TOM (simple)
162
+ tom = adj * np.abs(corr)
163
+ dist = 1 - tom
164
+ # fill nans
165
+ dist = dist.fillna(1.0)
166
+ # clustering
167
+ # AgglomerativeClustering does not accept distance matrix directly for fit_predict unless precomputed affinity
168
+ clustering = AgglomerativeClustering(n_clusters=None, distance_threshold=0.2, affinity='precomputed', linkage='average')
169
+ labels = clustering.fit_predict(dist.values)
170
+ modules = pd.DataFrame({'gene': corr.index, 'module': labels})
171
+ if out_tsv:
172
+ modules.to_csv(out_tsv, sep="\t", index=False)
173
+ return modules
174
+
175
+ # -------------------------
176
+ # BIOMARKER SELECTION + ML
177
+ # -------------------------
178
+ def select_features_rf_cli(counts_tsv, labels_tsv, out_tsv=None, topk=50):
179
+ """
180
+ counts_tsv: genes x samples (index gene, columns samples) OR samples x genes.
181
+ labels_tsv: must include columns sampleID, label
182
+ We'll convert to samples x features for ML.
183
+ """
184
+ counts = pd.read_csv(counts_tsv, sep="\t", index_col=0)
185
+ ss = pd.read_csv(labels_tsv, sep="\t")
186
+ # convert to samples x genes if needed
187
+ if counts.index[0].startswith('ENS') or counts.index[0].isalpha():
188
+ # genes x samples -> transpose
189
+ X = counts.T
190
+ else:
191
+ X = counts
192
+ # align samples
193
+ if 'sampleID' in ss.columns:
194
+ ss = ss.set_index('sampleID')
195
+ labels = ss.loc[X.index, ss.columns[-1]] # last column is label
196
+ # RF
197
+ rf = RandomForestClassifier(n_estimators=500, random_state=42, n_jobs=1)
198
+ rf.fit(X, labels)
199
+ imp = pd.Series(rf.feature_importances_, index=X.columns).sort_values(ascending=False)
200
+ top = imp.head(topk)
201
+ if out_tsv:
202
+ top.to_csv(out_tsv, sep="\t", header=['importance'])
203
+ return top
204
+
205
+ def train_classifiers_cli(counts_tsv, labels_tsv, out_tsv=None, test_size=0.25):
206
+ counts = pd.read_csv(counts_tsv, sep="\t", index_col=0)
207
+ ss = pd.read_csv(labels_tsv, sep="\t")
208
+ if counts.index[0].startswith('ENS') or counts.index[0].isalpha():
209
+ X = counts.T
210
+ else:
211
+ X = counts
212
+ if 'sampleID' in ss.columns:
213
+ ss = ss.set_index('sampleID')
214
+ y = ss.loc[X.index, ss.columns[-1]]
215
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
216
+ rf = RandomForestClassifier(n_estimators=400, random_state=42)
217
+ rf.fit(X_train, y_train)
218
+ pred = rf.predict(X_test)
219
+ rep = classification_report(y_test, pred, output_dict=True)
220
+ if out_tsv:
221
+ pd.DataFrame(rep).to_csv(out_tsv, sep="\t")
222
+ return rep
223
+
224
+ # -------------------------
225
+ # CLI: argparse dispatcher
226
+ # -------------------------
227
+ def build_parser():
228
+ p = argparse.ArgumentParser(prog="rna_master_tools.py")
229
+ sub = p.add_subparsers(dest="cmd")
230
+
231
+ # rMATS run
232
+ a = sub.add_parser("run-rmats")
233
+ a.add_argument("--b1", required=True, help="Comma-separated BAMs for group1")
234
+ a.add_argument("--b2", required=True, help="Comma-separated BAMs for group2")
235
+ a.add_argument("--gtf", required=True)
236
+ a.add_argument("--outdir", required=True)
237
+ a.add_argument("--readlen", type=int, default=100)
238
+ a.add_argument("--threads", type=int, default=8)
239
+ a.add_argument("--rmats-cmd", default="rmats.py")
240
+
241
+ # parse rMATS
242
+ b = sub.add_parser("parse-rmats")
243
+ b.add_argument("--in", dest="infile", required=True)
244
+ b.add_argument("--out", dest="out", default=None)
245
+
246
+ # plot rMATS volcano
247
+ c = sub.add_parser("plot-rmats")
248
+ c.add_argument("--in", dest="infile", required=True)
249
+ c.add_argument("--out", dest="out", default=None)
250
+
251
+ # STAR-Fusion run
252
+ d = sub.add_parser("run-starfusion")
253
+ d.add_argument("--left", required=True)
254
+ d.add_argument("--right", required=True)
255
+ d.add_argument("--genome_lib_dir", required=True)
256
+ d.add_argument("--outdir", required=True)
257
+ d.add_argument("--threads", type=int, default=8)
258
+
259
+ # parse star-fusion
260
+ e = sub.add_parser("parse-starfusion")
261
+ e.add_argument("--in", dest="infile", required=True)
262
+ e.add_argument("--out", dest="out", default=None)
263
+
264
+ # WGCNA
265
+ f = sub.add_parser("compute-wgcna")
266
+ f.add_argument("--counts", required=True)
267
+ f.add_argument("--out", dest="out", default=None)
268
+ f.add_argument("--power", type=int, default=6)
269
+
270
+ # biomarkers select
271
+ g = sub.add_parser("biomarkers")
272
+ g.add_argument("--counts", required=True)
273
+ g.add_argument("--labels", required=True)
274
+ g.add_argument("--out", dest="out", default=None)
275
+ g.add_argument("--topk", type=int, default=50)
276
+
277
+ # classifiers
278
+ h = sub.add_parser("train-classifiers")
279
+ h.add_argument("--counts", required=True)
280
+ h.add_argument("--labels", required=True)
281
+ h.add_argument("--out", dest="out", default=None)
282
+
283
+ return p
284
+
285
+ def main(argv=None):
286
+ p = build_parser()
287
+ args = p.parse_args(argv)
288
+
289
+ if args.cmd == "run-rmats":
290
+ run_rmats_cli(args.b1, args.b2, args.gtf, args.outdir, readlen=args.readlen, threads=args.threads, rmats_cmd=args.rmats_cmd)
291
+ elif args.cmd == "parse-rmats":
292
+ parse_rmats_event_table(args.infile, out_tsv=args.out)
293
+ elif args.cmd == "plot-rmats":
294
+ df = parse_rmats_event_table(args.infile)
295
+ plot_rmats_volcano(df, out_png=args.out)
296
+ elif args.cmd == "run-starfusion":
297
+ run_star_fusion_cli(args.left, args.right, args.genome_lib_dir, args.outdir, threads=args.threads)
298
+ elif args.cmd == "parse-starfusion":
299
+ parse_star_fusion_tsv(args.infile, out_tsv=args.out)
300
+ elif args.cmd == "compute-wgcna":
301
+ compute_wgcna_modules_cli(args.counts, out_tsv=args.out, soft_power=args.power)
302
+ elif args.cmd == "biomarkers":
303
+ select_features_rf_cli(args.counts, args.labels, out_tsv=args.out, topk=args.topk)
304
+ elif args.cmd == "train-classifiers":
305
+ train_classifiers_cli(args.counts, args.labels, out_tsv=args.out)
306
+ else:
307
+ p.print_help()
308
+ sys.exit(1)
309
+
310
+ if __name__ == "__main__":
311
+ main()