biopipen 0.32.3__py3-none-any.whl → 0.33.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of biopipen might be problematic. Click here for more details.

Files changed (118) hide show
  1. biopipen/__init__.py +1 -1
  2. biopipen/core/config.toml +6 -0
  3. biopipen/core/filters.py +35 -23
  4. biopipen/core/testing.py +6 -1
  5. biopipen/ns/bam.py +39 -0
  6. biopipen/ns/cellranger.py +5 -0
  7. biopipen/ns/cellranger_pipeline.py +2 -2
  8. biopipen/ns/cnvkit_pipeline.py +4 -1
  9. biopipen/ns/delim.py +33 -27
  10. biopipen/ns/protein.py +99 -0
  11. biopipen/ns/scrna.py +428 -250
  12. biopipen/ns/snp.py +16 -3
  13. biopipen/ns/tcr.py +125 -1
  14. biopipen/ns/vcf.py +34 -0
  15. biopipen/ns/web.py +5 -1
  16. biopipen/reports/scrna/SeuratClusterStats.svelte +1 -1
  17. biopipen/reports/scrna/SeuratMap2Ref.svelte +15 -2
  18. biopipen/reports/tcr/ClonalStats.svelte +15 -0
  19. biopipen/reports/utils/misc.liq +20 -7
  20. biopipen/scripts/bam/BamMerge.py +2 -2
  21. biopipen/scripts/bam/BamSampling.py +4 -4
  22. biopipen/scripts/bam/BamSort.py +141 -0
  23. biopipen/scripts/bam/BamSplitChroms.py +10 -10
  24. biopipen/scripts/bam/BamSubsetByBed.py +3 -3
  25. biopipen/scripts/bam/CNVpytor.py +10 -10
  26. biopipen/scripts/bam/ControlFREEC.py +11 -11
  27. biopipen/scripts/bed/Bed2Vcf.py +5 -5
  28. biopipen/scripts/bed/BedConsensus.py +5 -5
  29. biopipen/scripts/bed/BedLiftOver.sh +6 -4
  30. biopipen/scripts/bed/BedtoolsIntersect.py +4 -4
  31. biopipen/scripts/bed/BedtoolsMakeWindows.py +3 -3
  32. biopipen/scripts/bed/BedtoolsMerge.py +4 -4
  33. biopipen/scripts/cellranger/CellRangerCount.py +20 -9
  34. biopipen/scripts/cellranger/CellRangerSummary.R +20 -29
  35. biopipen/scripts/cellranger/CellRangerVdj.py +8 -8
  36. biopipen/scripts/cnvkit/CNVkitAccess.py +6 -6
  37. biopipen/scripts/cnvkit/CNVkitAutobin.py +25 -18
  38. biopipen/scripts/cnvkit/CNVkitBatch.py +5 -5
  39. biopipen/scripts/cnvkit/CNVkitCall.py +3 -3
  40. biopipen/scripts/cnvkit/CNVkitCoverage.py +2 -2
  41. biopipen/scripts/cnvkit/CNVkitDiagram.py +5 -5
  42. biopipen/scripts/cnvkit/CNVkitFix.py +3 -3
  43. biopipen/scripts/cnvkit/CNVkitGuessBaits.py +9 -5
  44. biopipen/scripts/cnvkit/CNVkitHeatmap.py +4 -4
  45. biopipen/scripts/cnvkit/CNVkitReference.py +2 -2
  46. biopipen/scripts/cnvkit/CNVkitScatter.py +5 -5
  47. biopipen/scripts/cnvkit/CNVkitSegment.py +5 -5
  48. biopipen/scripts/cnvkit/guess_baits.py +166 -93
  49. biopipen/scripts/delim/SampleInfo.R +94 -148
  50. biopipen/scripts/misc/Config2File.py +2 -2
  51. biopipen/scripts/misc/Str2File.py +2 -2
  52. biopipen/scripts/protein/MMCIF2PDB.py +33 -0
  53. biopipen/scripts/protein/PDB2Fasta.py +60 -0
  54. biopipen/scripts/protein/Prodigy.py +4 -4
  55. biopipen/scripts/protein/RMSD.py +178 -0
  56. biopipen/scripts/regulatory/MotifScan.py +8 -8
  57. biopipen/scripts/scrna/CellCellCommunication.py +59 -22
  58. biopipen/scripts/scrna/LoomTo10X.R +51 -0
  59. biopipen/scripts/scrna/MarkersFinder.R +273 -654
  60. biopipen/scripts/scrna/RadarPlots.R +73 -53
  61. biopipen/scripts/scrna/SCP-plot.R +15202 -0
  62. biopipen/scripts/scrna/ScVelo.py +0 -0
  63. biopipen/scripts/scrna/SeuratClusterStats-clustree.R +23 -31
  64. biopipen/scripts/scrna/SeuratClusterStats-dimplots.R +26 -54
  65. biopipen/scripts/scrna/SeuratClusterStats-features.R +85 -403
  66. biopipen/scripts/scrna/SeuratClusterStats-ngenes.R +32 -17
  67. biopipen/scripts/scrna/SeuratClusterStats-stats.R +45 -239
  68. biopipen/scripts/scrna/SeuratClusterStats.R +13 -19
  69. biopipen/scripts/scrna/SeuratMap2Ref.R +16 -12
  70. biopipen/scripts/scrna/SeuratPreparing.R +138 -81
  71. biopipen/scripts/scrna/SlingShot.R +71 -0
  72. biopipen/scripts/scrna/celltypist-wrapper.py +7 -6
  73. biopipen/scripts/snp/Plink2GTMat.py +26 -11
  74. biopipen/scripts/snp/PlinkFilter.py +7 -7
  75. biopipen/scripts/snp/PlinkFromVcf.py +8 -5
  76. biopipen/scripts/snp/PlinkSimulation.py +4 -4
  77. biopipen/scripts/snp/PlinkUpdateName.py +4 -4
  78. biopipen/scripts/stats/ChowTest.R +48 -22
  79. biopipen/scripts/tcgamaf/Maf2Vcf.py +2 -2
  80. biopipen/scripts/tcgamaf/MafAddChr.py +2 -2
  81. biopipen/scripts/tcr/ClonalStats.R +484 -0
  82. biopipen/scripts/tcr/ScRepLoading.R +127 -0
  83. biopipen/scripts/tcr/TCRDock.py +10 -6
  84. biopipen/scripts/tcr/vdjtools-patch.sh +1 -1
  85. biopipen/scripts/vcf/BcftoolsAnnotate.py +8 -8
  86. biopipen/scripts/vcf/BcftoolsFilter.py +3 -3
  87. biopipen/scripts/vcf/BcftoolsMerge.py +31 -0
  88. biopipen/scripts/vcf/BcftoolsSort.py +4 -4
  89. biopipen/scripts/vcf/BcftoolsView.py +5 -5
  90. biopipen/scripts/vcf/Vcf2Bed.py +2 -2
  91. biopipen/scripts/vcf/VcfAnno.py +11 -11
  92. biopipen/scripts/vcf/VcfDownSample.sh +22 -10
  93. biopipen/scripts/vcf/VcfFilter.py +5 -5
  94. biopipen/scripts/vcf/VcfFix.py +7 -7
  95. biopipen/scripts/vcf/VcfFix_utils.py +12 -3
  96. biopipen/scripts/vcf/VcfIndex.py +3 -3
  97. biopipen/scripts/vcf/VcfIntersect.py +3 -3
  98. biopipen/scripts/vcf/VcfLiftOver.sh +5 -0
  99. biopipen/scripts/vcf/VcfSplitSamples.py +4 -4
  100. biopipen/scripts/vcf/bcftools_utils.py +3 -3
  101. biopipen/scripts/web/Download.py +8 -4
  102. biopipen/scripts/web/DownloadList.py +5 -5
  103. biopipen/scripts/web/GCloudStorageDownloadBucket.py +5 -5
  104. biopipen/scripts/web/GCloudStorageDownloadFile.py +3 -3
  105. biopipen/scripts/web/gcloud_common.py +1 -1
  106. biopipen/utils/gsea.R +75 -35
  107. biopipen/utils/misc.R +205 -7
  108. biopipen/utils/misc.py +17 -8
  109. biopipen/utils/reference.py +11 -11
  110. biopipen/utils/repr.R +146 -0
  111. biopipen/utils/vcf.py +1 -1
  112. {biopipen-0.32.3.dist-info → biopipen-0.33.1.dist-info}/METADATA +8 -8
  113. {biopipen-0.32.3.dist-info → biopipen-0.33.1.dist-info}/RECORD +115 -105
  114. {biopipen-0.32.3.dist-info → biopipen-0.33.1.dist-info}/WHEEL +1 -1
  115. biopipen/scripts/scrna/SeuratClusterStats-hists.R +0 -144
  116. biopipen/scripts/scrna/SeuratPreparing-common.R +0 -467
  117. biopipen/scripts/scrna/SeuratPreparing-doublet_detection.R +0 -204
  118. {biopipen-0.32.3.dist-info → biopipen-0.33.1.dist-info}/entry_points.txt +0 -0
biopipen/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.32.3"
1
+ __version__ = "0.33.1"
biopipen/core/config.toml CHANGED
@@ -1,9 +1,13 @@
1
1
  # Executables or binaries
2
2
  [exe]
3
+ # BeEM: https://github.com/kad-ecoli/BeEM
4
+ beem = "BeEM"
3
5
  # bedtools to handle bed files
4
6
  bedtools = "bedtools"
5
7
  # bcftools to handle bcf/vcf files
6
8
  bcftools = "bcftools"
9
+ # calculate_rmsd: https://github.com/charnley/rmsd
10
+ calculate_rmsd = "calculate_rmsd"
7
11
  # cellranger
8
12
  cellranger = "cellranger"
9
13
  # Control-FREEC to call cnvs
@@ -27,6 +31,8 @@ cnvnator2vcf = "cnvnator2VCF.pl"
27
31
  convert = "convert"
28
32
  # fimo from meme
29
33
  fimo = "fimo"
34
+ # MAXIT: https://sw-tools.rcsb.org/apps/MAXIT/
35
+ maxit = "maxit"
30
36
  # wget
31
37
  wget = "wget"
32
38
  # aria2c
biopipen/core/filters.py CHANGED
@@ -1,6 +1,7 @@
1
1
  """Additional filters for pipen"""
2
2
  from __future__ import annotations
3
3
 
4
+ import re
4
5
  import shlex
5
6
  from pathlib import Path
6
7
  from typing import Any, List, Mapping
@@ -9,6 +10,8 @@ from argx import Namespace
9
10
  from liquid.filters.manager import FilterManager
10
11
  from pipen_report.filters import register_component, render_ui, _tag
11
12
 
13
+ # from .defaults import BIOPIPEN_DIR
14
+
12
15
  filtermanager = FilterManager()
13
16
 
14
17
 
@@ -171,6 +174,8 @@ def r(
171
174
  return "FALSE"
172
175
  if obj.upper() == "NA" or obj.upper() == "NULL":
173
176
  return obj.upper()
177
+ if re.match(r"^\d+:\d+$", obj):
178
+ return obj
174
179
  if obj.startswith("r:") or obj.startswith("R:"):
175
180
  return str(obj)[2:]
176
181
  return repr(str(obj))
@@ -222,7 +227,7 @@ def r(
222
227
 
223
228
 
224
229
  @filtermanager.register
225
- def source_r(path: str | Path) -> str:
230
+ def source_r(path: str | Path, chdir: bool = False) -> str:
226
231
  """Source an R script.
227
232
 
228
233
  In addition to generating `source(path)`, we also include the mtime for the script
@@ -238,7 +243,8 @@ def source_r(path: str | Path) -> str:
238
243
  mtime = int(path.stat().st_mtime)
239
244
  return (
240
245
  f"# Last modified: {mtime}\n"
241
- f"source('{path}')"
246
+ # f"biopipen_dir = {r(BIOPIPEN_DIR)}\n"
247
+ f"source('{path}', chdir = {r(chdir)})"
242
248
  )
243
249
 
244
250
 
@@ -375,15 +381,15 @@ def _render_enrichr(
375
381
  components = []
376
382
 
377
383
  for db in dbs:
378
- enrichr_plot = Path(cont["dir"]).joinpath(f"Enrichr-{db}.png")
379
- if enrichr_plot.exists():
384
+ enrichr_plots = list(Path(cont["dir"]).glob(f"Enrichr-{db}.*.png"))
385
+ if len(enrichr_plots) == 0:
380
386
  components.append(
381
387
  {
382
388
  "title": db,
383
389
  "ui": "tabs",
384
390
  "contents": [
385
391
  {
386
- "title": "Plot",
392
+ "title": "Error",
387
393
  "ui": "flat",
388
394
  "contents": [
389
395
  {
@@ -400,21 +406,8 @@ def _render_enrichr(
400
406
  )
401
407
  },
402
408
  {
403
- "kind": "image",
404
- "src": str(enrichr_plot),
405
- "download": str(enrichr_plot.with_suffix(".pdf")),
406
- }
407
- ],
408
- },
409
- {
410
- "title": "Table",
411
- "ui": "flat",
412
- "contents": [
413
- {
414
- "kind": "table",
415
- "src": str(
416
- Path(cont["dir"]).joinpath(f"Enrichr-{db}.txt")
417
- ),
409
+ "kind": "error",
410
+ "content": "No enriched terms found.",
418
411
  }
419
412
  ],
420
413
  },
@@ -422,18 +415,37 @@ def _render_enrichr(
422
415
  }
423
416
  )
424
417
  else:
418
+ contents = []
419
+ for enrichr_plot in enrichr_plots:
420
+ plot_type = enrichr_plot.stem.split(".")[-1]
421
+ pdf = enrichr_plot.with_suffix(".pdf")
422
+ contents.append(
423
+ {
424
+ "src": str(enrichr_plot),
425
+ "title": f"{plot_type.title()} Plot",
426
+ "download": str(pdf),
427
+ }
428
+ )
429
+
425
430
  components.append(
426
431
  {
427
432
  "title": db,
428
433
  "ui": "tabs",
429
434
  "contents": [
430
435
  {
431
- "title": "Error",
436
+ "title": "Plots",
437
+ "ui": "table_of_images",
438
+ "contents": contents,
439
+ },
440
+ {
441
+ "title": "Table",
432
442
  "ui": "flat",
433
443
  "contents": [
434
444
  {
435
- "kind": "error",
436
- "content": "No enriched terms found.",
445
+ "kind": "table",
446
+ "src": str(
447
+ Path(cont["dir"]).joinpath(f"Enrichr-{db}.txt")
448
+ ),
437
449
  }
438
450
  ],
439
451
  },
biopipen/core/testing.py CHANGED
@@ -96,7 +96,12 @@ def r_test(mem: callable) -> callable:
96
96
  )
97
97
  rcode = f"{expect}\n\n{rcode}\n\ncat('PASSED')\n"
98
98
  if source is not None:
99
- rcode = f'suppressWarnings(source("{self.SOURCE_FILE}"))\n\n{rcode}'
99
+ if not isinstance(source, (list, tuple)):
100
+ source = [source]
101
+
102
+ libs = "\n".join([f"suppressWarnings(source('{s}'))" for s in source])
103
+ rcode = f'{libs}\n\n{rcode}'
104
+
100
105
  out = _run_rcode(rcode)
101
106
  self.assertEqual(
102
107
  out,
biopipen/ns/bam.py CHANGED
@@ -329,3 +329,42 @@ class BamSubsetByBed(Proc):
329
329
  "index": True,
330
330
  }
331
331
  script = "file://../scripts/bam/BamSubsetByBed.py"
332
+
333
+
334
+ class BamSort(Proc):
335
+ """Sort bam file
336
+
337
+ Input:
338
+ bamfile: The bam file
339
+
340
+ Output:
341
+ outfile: The output bam file
342
+
343
+ Envs:
344
+ tool (choice): The tool to use.
345
+ - samtools: Use `samtools`
346
+ - sambamba: Use `sambamba`
347
+ ncores (type=int): Number of cores to use
348
+ samtools: Path to samtools executable
349
+ sambamba: Path to sambamba executable
350
+ tmpdir: The temporary directory to use
351
+ byname (flag): Whether to sort by read name
352
+ index (flag): Whether to index the output bam file
353
+ The index file will be created in the same directory as the output
354
+ bam file
355
+ <more>: Other arguments passed to the sorting tool
356
+ See `samtools sort` or `sambamba sort`
357
+ """
358
+ input = "bamfile:file"
359
+ output = "outfile:file:{{in.bamfile | stem}}.sorted.bam"
360
+ lang = config.lang.python
361
+ envs = {
362
+ "tool": "samtools",
363
+ "ncores": config.misc.ncores,
364
+ "samtools": config.exe.samtools,
365
+ "sambamba": config.exe.sambamba,
366
+ "tmpdir": config.path.tmpdir,
367
+ "byname": False,
368
+ "index": True,
369
+ }
370
+ script = "file://../scripts/bam/BamSort.py"
biopipen/ns/cellranger.py CHANGED
@@ -16,6 +16,10 @@ class CellRangerCount(Proc):
16
16
  element.
17
17
  id: The id defining output directory. If not provided, it is inferred
18
18
  from the fastq files.
19
+ Note that, unlike the `--id` argument of cellranger, this will not select
20
+ the samples from `in.fastqs`. In stead, it will symlink the fastq files
21
+ to a temporary directory with this `id` as prefix and pass that to
22
+ cellranger.
19
23
 
20
24
  Output:
21
25
  outdir: The output directory
@@ -141,6 +145,7 @@ class CellRangerSummary(Proc):
141
145
  The file should be tab-delimited with no header.
142
146
  """
143
147
  input = "indirs:dirs"
148
+ input_data = lambda ch: [list(ch.iloc[:, 0])]
144
149
  output = "outdir:dir:{{in.indirs | first | stem | append: '-etc.summary'}}"
145
150
  lang = config.lang.rscript
146
151
  script = "file://../scripts/cellranger/CellRangerSummary.R"
@@ -28,7 +28,7 @@ class CellRangerCountPipeline(ProcGroup):
28
28
 
29
29
  def post_init(self):
30
30
  """Check if the input is a list of fastq files"""
31
- if not is_loading_pipeline() and (
31
+ if not is_loading_pipeline("-h", "-h+", "--help", "--help+") and (
32
32
  not isinstance(self.opts.input, (list, tuple))
33
33
  or len(self.opts.input) == 0
34
34
  ):
@@ -84,7 +84,7 @@ class CellRangerVdjPipeline(ProcGroup):
84
84
 
85
85
  def post_init(self):
86
86
  """Check if the input is a list of fastq files"""
87
- if not is_loading_pipeline() and (
87
+ if not is_loading_pipeline("-h", "-h+", "--help", "--help+") and (
88
88
  not isinstance(self.opts.input, (list, tuple))
89
89
  or len(self.opts.input) == 0
90
90
  ):
@@ -276,7 +276,10 @@ class CNVkitPipeline(ProcGroup):
276
276
  """Build CNVkitGuessBaits process"""
277
277
  from .cnvkit import CNVkitGuessBaits
278
278
 
279
- if not self.opts.guessbaits and not is_loading_pipeline():
279
+ if (
280
+ not self.opts.guessbaits and
281
+ not is_loading_pipeline("-h", "-h+", "--help", "--help+")
282
+ ):
280
283
  return None
281
284
 
282
285
  def _guess_baits_bams(ch):
biopipen/ns/delim.py CHANGED
@@ -51,6 +51,10 @@ class SampleInfo(Proc):
51
51
  Output:
52
52
  outfile: The output file with sample information, with mutated columns
53
53
  if `envs.save_mutated` is True.
54
+ The basename of the output file will be the same as the input file.
55
+ The file name of each plot will be slugified from the case name.
56
+ Each plot has 3 formats: pdf, png and code.zip, which contains the
57
+ data and R code to reproduce the plot.
54
58
 
55
59
  Envs:
56
60
  sep: The separator of the input file.
@@ -76,30 +80,34 @@ class SampleInfo(Proc):
76
80
  If `FALSE`, you can mutate the meta data frame with the
77
81
  returned ids. Non-paired ids will be `NA`.
78
82
  save_mutated (flag): Whether to save the mutated columns.
79
- exclude_cols: The columns to exclude in the table in the report.
83
+ exclude_cols (auto): The columns to exclude in the table in the report.
80
84
  Could be a list or a string separated by comma.
81
85
  defaults (ns): The default parameters for `envs.stats`.
82
- - on: The column name in the data for the stats.
83
- Default is `Sample`. The column could be either continuous or not.
84
- - subset: An R expression to subset the data.
85
- If you want to keep the distinct records, you can use
86
- `!duplicated(<col>)`.
87
- - group: The column name in the data for the group ids.
88
- If not provided, all records will be regarded as one group.
89
- - na_group (flag): Whether to include `NA`s in the group.
90
- - each: The column in the data to split the analysis in different
91
- plots.
92
- - ncol (type=int): The number of columns in the plot when `each`
93
- is not `NULL`. Default is 2.
94
- - na_each (flag): Whether to include `NA`s in the `each` column.
95
- - plot: Type of plot. If `on` is continuous, it could be
96
- `boxplot` (default), `violin`, `violin+boxplot` or `histogram`.
97
- If `on` is not continuous, it could be `barplot` or
98
- `pie` (default).
86
+ - plot_type: The type of the plot.
87
+ See the supported plot types here:
88
+ <https://pwwang.github.io/plotthis/reference/index.html>
89
+ The plot_type should be lower case and the plot function used in
90
+ `plotthis` should be used. The mapping from plot_type to the
91
+ plot function is like `bar -> BarPlot`, `box -> BoxPlot`, etc.
92
+ - more_formats (list): The additional formats to save the plot.
93
+ By default, the plot will be saved in png, which is also used to
94
+ display in the report. You can add more formats to save the plot.
95
+ For example, `more_formats = ["pdf", "svg"]`.
96
+ - save_code (flag): Whether to save the R code to reproduce the plot.
97
+ The data used to plot will also be saved.
98
+ - subset: An expression to subset the data frame before plotting.
99
+ The expression should be a string of R expression that will be passed
100
+ to `dplyr::filter`. For example, `subset = "Sample == 'A'"`.
101
+ - section: The section name in the report.
102
+ In case you want to group the plots in the report.
99
103
  - devpars (ns): The device parameters for the plot.
100
104
  - width (type=int): The width of the plot.
101
105
  - height (type=int): The height of the plot.
102
106
  - res (type=int): The resolution of the plot.
107
+ - descr: The description of the plot, shown in the report.
108
+ - <more>: You can add more parameters to the defaults.
109
+ These parameters will be expanded to the `envs.stats` for each case,
110
+ and passed to individual plot functions.
103
111
  stats (type=json): The statistics to perform.
104
112
  The keys are the case names and the values are the parameters
105
113
  inheirted from `envs.defaults`.
@@ -112,15 +120,13 @@ class SampleInfo(Proc):
112
120
  "save_mutated": False,
113
121
  "exclude_cols": None,
114
122
  "defaults": {
115
- "on": "Sample",
116
- # "distinct": None,
117
- "group": None,
118
- "na_group": False,
119
- "each": None,
120
- "ncol": 2,
121
- "na_each": False,
122
- "plot": None,
123
- "devpars": {"width": 800, "height": 600, "res": 100},
123
+ "plot_type": "bar",
124
+ "more_formats": [],
125
+ "save_code": False,
126
+ "subset": None,
127
+ "section": None,
128
+ "descr": None,
129
+ "devpars": {"width": None, "height": None, "res": 100},
124
130
  },
125
131
  "stats": {},
126
132
  }
biopipen/ns/protein.py CHANGED
@@ -82,3 +82,102 @@ class ProdigySummary(Proc):
82
82
  envs = {"group": None}
83
83
  script = "file://../scripts/protein/ProdigySummary.R"
84
84
  plugin_opts = {"report": "file://../reports/protein/ProdigySummary.svelte"}
85
+
86
+
87
+ class MMCIF2PDB(Proc):
88
+ """Convert mmCIF or PDBx file to PDB file.
89
+
90
+ Using [BeEM](https://github.com/kad-ecoli/BeEM)
91
+
92
+ Input:
93
+ infile: The input mmCIF or PDBx file.
94
+
95
+ Output:
96
+ outfile: The output PDB file.
97
+ The "outfmt" set to 3 to always output a single PDB file.
98
+
99
+ Envs:
100
+ tool (choice): The tool to use for conversion.
101
+ - maxit: Use MAXIT.
102
+ - beem: Use BeEM.
103
+ maxit: The path to the MAXIT executable.
104
+ beem: The path to the BeEM executable.
105
+ <more>: Other options for MAXIT/BeEM.
106
+ For BeEM, "outfmt" will not be used as it is set to 3.
107
+ """
108
+ input = "infile:file"
109
+ output = "outfile:file:{{in.infile | stem}}.pdb"
110
+ lang = config.lang.python
111
+ envs = {
112
+ "tool": "maxit",
113
+ "maxit": config.exe.maxit,
114
+ "beem": config.exe.beem,
115
+ }
116
+ script = "file://../scripts/protein/MMCIF2PDB.py"
117
+
118
+
119
+ class RMSD(Proc):
120
+ """Calculate the RMSD between two structures.
121
+
122
+ See also https://github.com/charnley/rmsd.
123
+
124
+ If the input is in mmCIF format, convert it to PDB first.
125
+
126
+ Input:
127
+ infile1: The first structure file.
128
+ infile2: The second structure file.
129
+
130
+ Output:
131
+ outfile: The output file containing the RMSD value.
132
+
133
+ Envs:
134
+ beem: The path to the BeEM executable.
135
+ calculate_rmsd: The path to the calculate_rmsd executable.
136
+ conv_tool (choice): The tool to use for conversion.
137
+ - maxit: Use MAXIT.
138
+ - beem: Use BeEM.
139
+ ca_only (flag): Whether to calculate RMSD using only C-alpha atoms.
140
+ duel (choice): How to handle the duel atoms. Default is "keep".
141
+ - keep: Keep both atoms.
142
+ - keep_first: Keep the first atom.
143
+ - keep_last: Keep the last atom.
144
+ - average: Average the coordinates.
145
+ reorder (flag): Whether to reorder the atoms in the structures.
146
+ <more>: Other options for calculate_rmsd.
147
+ """
148
+ input = "infile1:file, infile2:file"
149
+ output = "outfile:file:{{in.infile1 | stem}}-{{in.infile2 | stem}}.rmsd.txt"
150
+ lang = config.lang.python
151
+ envs = {
152
+ "maxit": config.exe.maxit,
153
+ "beem": config.exe.beem,
154
+ "calculate_rmsd": config.exe.calculate_rmsd,
155
+ "conv_tool": "maxit",
156
+ "ca_only": False,
157
+ "duel": "keep",
158
+ "reorder": True,
159
+ }
160
+ script = "file://../scripts/protein/RMSD.py"
161
+
162
+
163
+ class PDB2Fasta(Proc):
164
+ """Convert PDB file to FASTA file.
165
+
166
+ Input:
167
+ infile: The input PDB file.
168
+
169
+ Output:
170
+ outfile: The output FASTA file.
171
+
172
+ Envs:
173
+ chains (auto): The chains to extract. A list of chain IDs or separated by
174
+ commas.
175
+ If None, extract all chains.
176
+ wrap (type=int): The number of residues per line in the output FASTA
177
+ file. Set to 0 to disable wrapping.
178
+ """
179
+ input = "infile:file"
180
+ output = "outfile:file:{{in.infile | stem}}.fasta"
181
+ lang = config.lang.python
182
+ envs = {"chains": None, "wrap": 80}
183
+ script = "file://../scripts/protein/PDB2Fasta.py"