panxpress 0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of panxpress might be problematic. Click here for more details.

Files changed (53) hide show
  1. panxpress-0.2/LICENSE +21 -0
  2. panxpress-0.2/MANIFEST.in +5 -0
  3. panxpress-0.2/PKG-INFO +374 -0
  4. panxpress-0.2/README.md +327 -0
  5. panxpress-0.2/panxpress/cuckoo_filter_utils.py +207 -0
  6. panxpress-0.2/panxpress/dnaencode.py +277 -0
  7. panxpress-0.2/panxpress/dnaencode_fast.py +156 -0
  8. panxpress-0.2/panxpress/fastcash_info.py +156 -0
  9. panxpress-0.2/panxpress/fastcash_main.py +271 -0
  10. panxpress-0.2/panxpress/fastcash_weak_ptr.py +489 -0
  11. panxpress-0.2/panxpress/hash_new.py +742 -0
  12. panxpress-0.2/panxpress/hashfunctions.py +221 -0
  13. panxpress-0.2/panxpress/io/binaryio.py +62 -0
  14. panxpress-0.2/panxpress/io/fastaio.py +461 -0
  15. panxpress-0.2/panxpress/io/fastqio.py +596 -0
  16. panxpress-0.2/panxpress/io/filterio.py +112 -0
  17. panxpress-0.2/panxpress/io/generaldsio.py +46 -0
  18. panxpress-0.2/panxpress/io/generalio.py +252 -0
  19. panxpress-0.2/panxpress/io/hashio.py +514 -0
  20. panxpress-0.2/panxpress/io/seqio.py +200 -0
  21. panxpress-0.2/panxpress/io/textio.py +21 -0
  22. panxpress-0.2/panxpress/io/xorio.py +94 -0
  23. panxpress-0.2/panxpress/kmers.py +474 -0
  24. panxpress-0.2/panxpress/lowlevel/aligned_arrays.py +42 -0
  25. panxpress-0.2/panxpress/lowlevel/bitarray.py +228 -0
  26. panxpress-0.2/panxpress/lowlevel/conpro.py +504 -0
  27. panxpress-0.2/panxpress/lowlevel/debug.py +97 -0
  28. panxpress-0.2/panxpress/lowlevel/intbitarray.py +252 -0
  29. panxpress-0.2/panxpress/lowlevel/libc.py +174 -0
  30. panxpress-0.2/panxpress/lowlevel/llvm.py +638 -0
  31. panxpress-0.2/panxpress/lowlevel/lowlevelfunctions.txt +1 -0
  32. panxpress-0.2/panxpress/lowlevel/numbautils.py +25 -0
  33. panxpress-0.2/panxpress/lowlevel/packedarray.py +186 -0
  34. panxpress-0.2/panxpress/mask.py +69 -0
  35. panxpress-0.2/panxpress/mathutils.py +296 -0
  36. panxpress-0.2/panxpress/panxpress/config/index.yaml +7 -0
  37. panxpress-0.2/panxpress/panxpress/panxpress_build_reference.py +475 -0
  38. panxpress-0.2/panxpress/panxpress/panxpress_correct_gff.py +1342 -0
  39. panxpress-0.2/panxpress/panxpress/panxpress_index.py +286 -0
  40. panxpress-0.2/panxpress/panxpress/panxpress_main.py +308 -0
  41. panxpress-0.2/panxpress/panxpress/panxpress_map_parallel.py +480 -0
  42. panxpress-0.2/panxpress/parameters.py +63 -0
  43. panxpress-0.2/panxpress/srhash.py +594 -0
  44. panxpress-0.2/panxpress/subtable_hashfunctions.py +395 -0
  45. panxpress-0.2/panxpress/values/panxpress.py +87 -0
  46. panxpress-0.2/panxpress.egg-info/PKG-INFO +374 -0
  47. panxpress-0.2/panxpress.egg-info/SOURCES.txt +51 -0
  48. panxpress-0.2/panxpress.egg-info/dependency_links.txt +1 -0
  49. panxpress-0.2/panxpress.egg-info/entry_points.txt +3 -0
  50. panxpress-0.2/panxpress.egg-info/requires.txt +5 -0
  51. panxpress-0.2/panxpress.egg-info/top_level.txt +1 -0
  52. panxpress-0.2/pyproject.toml +55 -0
  53. panxpress-0.2/setup.cfg +4 -0
panxpress-0.2/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026- Inês Alves Ferreira, Jens Zentgraf, Johanna Elena Schmitz & Sven Rahmann
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,5 @@
1
+ include panxpress/panxpress/*
2
+ include panxpress/io/*
3
+ include panxpress/lowlevel/*
4
+ include panxpress/values/*
5
+ include panxpress/panxpress/config/*
panxpress-0.2/PKG-INFO ADDED
@@ -0,0 +1,374 @@
1
+ Metadata-Version: 2.4
2
+ Name: panxpress
3
+ Version: 0.2
4
+ Summary: Gene expression quantification with a pan-transcriptomic gapped k-mer index
5
+ Author-email: Inês Alves Ferreira <zentgraf@cs.uni-saarland.de>, Jens Zentgraf <zentgraf@cs.uni-saarland.de>, Johanna Elena Schmitz <jschmitz@cs.uni-saarland.de>, Sven Rahmann <rahmann@cs.uni-saarland.de>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026- Inês Alves Ferreira, Jens Zentgraf, Johanna Elena Schmitz & Sven Rahmann
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://gitlab.com/rahmannlab/panxpress
29
+ Project-URL: Bug Tracker, https://gitlab.com/rahmannlab/panxpress/issues
30
+ Classifier: Programming Language :: Python :: 3
31
+ Classifier: License :: OSI Approved :: MIT License
32
+ Classifier: Operating System :: OS Independent
33
+ Classifier: Development Status :: 5 - Production/Stable
34
+ Classifier: Environment :: Console
35
+ Classifier: Intended Audience :: Science/Research
36
+ Classifier: Natural Language :: English
37
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
38
+ Requires-Python: >=3.12
39
+ Description-Content-Type: text/markdown
40
+ License-File: LICENSE
41
+ Requires-Dist: numpy
42
+ Requires-Dist: numba
43
+ Requires-Dist: pytest
44
+ Requires-Dist: jsonargparse>=4.29.0
45
+ Requires-Dist: pip
46
+ Dynamic: license-file
47
+
48
+ ![](logo.png)
49
+ # PanXpress: Gene Expression Quantification with a Pan-Transcriptomic Gapped K-mer Index
50
+
51
+ PanXpress is a unified framework for bacterial pan-transcriptomics that:
52
+
53
+ - Corrects ambiguous annotations in GFF files using a two-step algorithm
54
+ - Constructs a pan-transcriptomic reference FASTA file from genomic FASTA and corrected GFF annotation files
55
+ - Builds a gapped k-mer index over the pan-transcriptomic reference
56
+ - Supports alignment-free mapping of reads to genes from FASTQ samples
57
+ - Quantifies gene expression across strains
58
+
59
+ In case of problems, please file an issue in the issue tracker.
60
+
61
+ See `CHANGELOG.md` for recent changes.
62
+
63
+ ---
64
+
65
+ # Usage Guide
66
+
67
+ To use PanXpress you will need the following data and files:
68
+
69
+ - **GFF annotation files** and **genome FASTA files** for the bacterial strains to be included in the reference index. For a given strain, the annotation and genome files must share the same filename (differing only in their extension).
70
+ - **FASTQ samples** for mapping.
71
+
72
+ Additionally, for the correction of annotation files where similar proteins are grouped into gene groups, protein FASTA files can be provided by the user. If these are not available, they can be generated by PanXpress using the `agat` tool.
73
+
74
+ The typical workflow proceeds in five steps: (1) correct the GFF annotation files, (2) build the pan-transcriptomic reference, (3) construct the index, (4) map reads, and (5) convert the raw reads counts to transcripts per million.
75
+
76
+ ---
77
+
78
+ ## Installation Guide
79
+
80
+ To run the software, a conda environment with the required libraries needs to be created.
81
+ A list of needed libraries is provided in the environment.yml file in the repository. You can create the environment with the following command:
82
+
83
+ ```bash
84
+ conda env create
85
+ ```
86
+
87
+ An environment with the name `panxpress` will be created. To activate the environment and install the package from the repository, run:
88
+
89
+ ```bash
90
+ conda activate panxpress
91
+ pip install -e .
92
+ ```
93
+
94
+ ## Examples
95
+
96
+ To better understand how to run PanXpress, we included into this repository a folder `reads` with a few simulated reads from a mixture of 3 strains of pseudomonas aerugionosa (both single and paired end) and a folder `ref`, that contains `pa_2_strains`, with the **GFF** annotation files, the genome and protein **FASTA** files for 2 strains of pseudomonas aerugionsa. With will use these files to exemplify how to use PanXpress.
97
+
98
+ ---
99
+
100
+ ## Step 1 — Correcting the Annotation Files
101
+
102
+ We recommend applying a correction algorithm to the annotation GFF files using the `panxpress correct_gff` command.
103
+
104
+ Proteins are grouped into gene groups via a two-step process:
105
+
106
+ 1. **Jaccard filtering:** The Jaccard similarity is computed for all pairs of proteins across bacterial strains. Pairs with a similarity score above a threshold `t1` proceed to the next step.
107
+ 2. **Alignment filtering:** Surviving pairs are aligned. Pairs with a normalized alignment score above a threshold `t2` are grouped into the same gene group and assigned a shared gene group name. This information is then used to rewrite the annotation files.
108
+
109
+ ### Arguments
110
+
111
+ - `--input_gff_folder` | Path to the folder containing the input annotation `.gff` files.
112
+ - `--input_normalized_gff_folder` | Path to a folder where normalized GFF files will be written. Two corrections are applied: (1) each protein ID is given a single consistent name across strains; (2) genes from plasmids are appended with the suffix `"Plasmid"` to distinguish them from chromosomal genes in expression results.
113
+ - `--output_gff_folder` | Path to the folder where corrected annotation `.gff` files will be stored. Hypothetical proteins are assigned the gene name `"unnamed"`.
114
+ - `--t1` | Similarity threshold for step 1 (Jaccard filtering).
115
+ - `--t2` | Score threshold for step 2 (alignment filtering).
116
+ - `--threads` | Number of threads for parallelized Jaccard similarity calculation and protein pair alignments.
117
+ - `--output_plot` | Path prefix for plots of the alignment score distribution.
118
+ - `--output_folder_data` | Path prefix for output data files (Jaccard similarity values, alignment scores, group names, etc.).
119
+ - `--input_protein_folder` | Path to a folder containing protein `.faa` files. If not available, provide the two arguments below instead.
120
+ - `--input_genome_folder` | *(Alternative to `--input_protein_folder`)* Path to a folder containing genome `.fna` files, used to generate protein files.
121
+ - `--output_protein_folder` | *(Alternative to `--input_protein_folder`)* Path to the folder where generated protein `.faa` files will be stored.
122
+
123
+ ### How to run (general input)
124
+
125
+ **With protein FASTA files provided:**
126
+
127
+ ```bash
128
+ panxpress correct_gff \
129
+ --input_protein_folder <folder_with_proteins> \
130
+ --input_gff_folder <folder_with_annotations> \
131
+ --input_normalized_gff_folder <folder_for_normalized_annotations> \
132
+ --output_gff_folder <folder_for_corrected_annotations> \
133
+ --k 7 \
134
+ --t1 0.02 \
135
+ --t2 0.75 \
136
+ --threads 8 \
137
+ --output_folder_data <filename_prefix_for_data>
138
+ ```
139
+
140
+ **Without protein FASTA files (generate them from genomes):**
141
+
142
+ ```bash
143
+ panxpress correct_gff \
144
+ --input_genome_folder <folder_with_genomes> \
145
+ --output_protein_folder <folder_with_proteins> \
146
+ --input_gff_folder <folder_with_annotations> \
147
+ --input_normalized_gff_folder <folder_for_normalized_annotations> \
148
+ --output_gff_folder <folder_for_corrected_annotations> \
149
+ --k 7 \
150
+ --t1 0.02 \
151
+ --t2 0.75 \
152
+ --threads 8 \
153
+ --output_folder_data <filename_prefix_for_data>
154
+ ```
155
+
156
+ ### How to run (provided files)
157
+
158
+ **With protein FASTA files provided:**
159
+
160
+ ```bash
161
+ panxpress correct_gff \
162
+ --input_protein_folder ref/pa_2_strains/protein_ncbi \
163
+ --input_gff_folder ref/pa_2_strains/gff3 \
164
+ --input_normalized_gff_folder ref/pa_2_strains/gff3_normalized \
165
+ --output_gff_folder ref/pa_2_strains/gff3_corrected \
166
+ --k 7 \
167
+ --t1 0.02 \
168
+ --t2 0.75 \
169
+ --threads 8 \
170
+ --output_folder_data ref/pa_2_strains
171
+ ```
172
+
173
+ **Without protein FASTA files (generate them from genomes):**
174
+
175
+ ```bash
176
+ panxpress correct_gff \
177
+ --input_genome_folder ref/pa_2_strains/genomes \
178
+ --output_protein_folder ref/pa_2_strains/protein_agat \
179
+ --input_gff_folder ref/pa_2_strains/gff3 \
180
+ --input_normalized_gff_folder ref/pa_2_strains/gff3_normalized \
181
+ --output_gff_folder ref/pa_2_strains/gff3_corrected \
182
+ --k 7 \
183
+ --t1 0.02 \
184
+ --t2 0.75 \
185
+ --threads 8 \
186
+ --output_folder_data ref/pa_2_strains
187
+ ```
188
+
189
+ ---
190
+
191
+ ## Step 2 — Building the Reference
192
+
193
+ The pan-transcriptomic reference is built using the `panxpress build_reference` command.
194
+
195
+ The reference is a FASTA file where each header is a unique gene identifier and each entry corresponds to one occurrence of a given gene name in a given strain. Because many gene names are shared across strains, multiple entries may share the same header. Since some tools (e.g., Bowtie2 and Salmon) do not support FASTA files with duplicate headers, PanXpress also generates a version of the reference with fully unique IDs.
196
+
197
+ ### Arguments
198
+
199
+ - `--annotation_dir` | Path to the folder containing the corrected annotation `.gff` files.
200
+ - `--genomes_dir` | Path to the folder containing the genome `.fna` files.
201
+ - `--output_reference_file` | Output FASTA file.
202
+ - `--output_reference_file_unique` | Output FASTA file — unique headers variant.
203
+ - `--output_reference_file_genes` | Output FASTA file containing only named genes (hypothetical proteins excluded).
204
+ - `--output_reference_file_unique_genes` | Output FASTA file containing only named genes — unique headers variant.
205
+ - `--output_aux_files` | Path prefix for auxiliary data files (e.g., mapping of gene names to gene IDs used during the mapping step).
206
+
207
+ ### How to run (general input)
208
+
209
+ ```bash
210
+ panxpress build_reference \
211
+ --annotation_dir <folder_with_annotations> \
212
+ --genomes_dir <folder_with_genomes> \
213
+ --output_reference_file <filename>.fna \
214
+ --output_reference_file_unique <filename>.fna \
215
+ --output_reference_file_genes <filename>.fna \
216
+ --output_reference_file_unique_genes <filename>.fna \
217
+ --output_aux_files <path_prefix_for_auxiliary_data>
218
+ ```
219
+
220
+ ### How to run (provided files)
221
+
222
+ ```bash
223
+ panxpress build_reference \
224
+ --annotation_dir ref/pa_2_strains/gff3_corrected \
225
+ --genomes_dir ref/pa_2_strains/genomes \
226
+ --output_reference_file ref/spliced_genomes/pantranscriptome_2_strains.fna \
227
+ --output_reference_file_unique ref/spliced_genomes/pantranscriptome_unique_headers_2_strains.fna \
228
+ --output_reference_file_genes ref/spliced_genomes/pantranscriptome_genes_2_strains.fna \
229
+ --output_reference_file_unique_genes ref/spliced_genomes/pantranscriptome_genes_unique_headers_2_strains.fna \
230
+ --output_aux_files ref/pa_2_strains/pa_2_strains > ref/build_reference_2_strains.log
231
+ ```
232
+
233
+ ---
234
+
235
+ ## Step 3 — Index Construction
236
+
237
+ The index is built using the `panxpress index` command.
238
+
239
+ The index is backed by a cuckoo hash table that stores a mapping of gapped k-mers to the genes in which they appear. The number of genes tracked per k-mer is controlled by the `--colorset-size` parameter. If you want PanXpress to maximize the number of colors within memory constraints, provide the total number of genes in the pan-transcriptomic reference via `--ngenes` and the maximum color set size will be calculated automatically.
240
+
241
+ ### Arguments
242
+
243
+ - `--genes` | FASTA file of the reference.
244
+ - `--index` | Output path prefix for the resulting index.
245
+ - `-n` | Estimated number of k-mers.
246
+ - `--ngenes` | Number of unique gene IDs.
247
+ - `--colorset-size` | Number of genes tracked per k-mer entry.
248
+ - `--mask` | Mask pattern used to compute gapped k-mers (e.g., - `"####_###_####_#__#__#_####_###_####"`).
249
+ - `--k` | K-mer length.
250
+ - `--fill` | Fill rate parameter for the cuckoo hash table.
251
+
252
+ **Tip**: For pseudomonas aeruginosa, 4 is a good number for the maximum number of colors since only a few k-mers occur in more than 4 different genes. To get a better idea on how to choose `n` and `ngenes` you can check the output from the `build_reference` command. In that output, the maximum gene ID is printed (use this for `ngenes`) and the length of the pan-transcriptome is an upper bound for `n`.
253
+
254
+ ### How to run (general input)
255
+
256
+ ```bash
257
+ panxpress index \
258
+ --genes <reference>.fna \
259
+ --index <path_prefix_for_index> \
260
+ -n 20000000 \
261
+ --ngenes 2600 \
262
+ --colorset-size 4 \
263
+ --mask "####_###_####_#__#__#_####_###_####" \
264
+ --fill 0.95
265
+ ```
266
+
267
+ ### How to run (provided files)
268
+
269
+ ```bash
270
+ panxpress index \
271
+ --genes ref/spliced_genomes/pantranscriptome_2_strains.fna \
272
+ --index ref/pa_2_strains/spliced_index_2_strains \
273
+ -n 20000000 \
274
+ --ngenes 2600 \
275
+ --colorset-size 4 \
276
+ --mask "####_###_####_#__#__#_####_###_####" \
277
+ --fill 0.95
278
+ ```
279
+
280
+ ---
281
+
282
+ ## Step 4 — Mapping
283
+
284
+ Read mapping is supported by the `panxpress pmap` command.
285
+
286
+ ### Arguments
287
+
288
+ - `--index` | Path prefix of the index.
289
+ - `--fastq` | Input FASTQ file.
290
+ - `--mapping-file` | Pickle dictionary mapping gene IDs to gene names, generated during the reference building step.
291
+ - `--output-file` | Output folder name.
292
+ - `--threads-mapping` | Number of threads for mapping.
293
+ - `--unnamed_gene_id` | Gene ID for hypothetical proteins. This can be obtained from the mapping file.
294
+
295
+ **Tip**: To get the correct value for `unnamed_gene_id`, you can use the following bash command:
296
+
297
+ ```bash
298
+ unnamed_gene_id=$(python3 -c "import pickle; f='<output_aux_files>_gene_name_to_gene_id'; d=pickle.load(open(f,'rb')); print(d['unnamed'])")
299
+ ```
300
+
301
+ Note that `output_aux_files` should be the parameter you used in the `build_reference` command.
302
+
303
+ ### How to run (general input)
304
+
305
+ ```bash
306
+ panxpress pmap \
307
+ --index <index_prefix> \
308
+ --fastq <reads_fastq> \
309
+ --output-file <output_folder> \
310
+ --threads-mapping 8 \
311
+ --mapping-file <gene_id_to_gene_name> \
312
+ --unnamed_gene_id 3
313
+ ```
314
+
315
+ ### How to run (provided files) - SINGLE END READS
316
+
317
+ ```bash
318
+ panxpress map \
319
+ --index ref/pa_2_strains/spliced_index_2_strains \
320
+ --fastq reads/simulated_regulated_single_reads_pa_3_strains.fq \
321
+ --output-file results/single_end_reads_pa_3_strains \
322
+ --threads-mapping 8 \
323
+ --mapping-file ref/pa_2_strains/pa_2_strains_gene_id_to_gene_name \
324
+ --unnamed_gene_id $unnamed_gene_id
325
+ ```
326
+
327
+ ### How to run (provided files) - PAIRED END READS
328
+
329
+ ```bash
330
+ panxpress map \
331
+ --index ref/pa_2_strains/spliced_index_2_strains \
332
+ --fastq reads/simulated_regulated_paired_end_reads_pa_3_strains_1.fq \
333
+ --paired-end reads/simulated_regulated_paired_end_reads_pa_3_strains_2.fq \
334
+ --output-file results/paired_end_reads_pa_3_strains \
335
+ --threads-mapping 8 \
336
+ --mapping-file ref/pa_2_strains/pa_2_strains_gene_id_to_gene_name \
337
+ --unnamed_gene_id $unnamed_gene_id
338
+ ```
339
+
340
+ ---
341
+
342
+ ## Step 5 — Gene expression values
343
+
344
+ You can additionally convert the information of how many reads are mapped to each gene to gene expression quantification in transcripts per million. This is suported by the `panxpress convert_TPM` command.
345
+
346
+ ### Arguments
347
+
348
+ - `--raw_counts_file` | Output `.mat` file from the mapping step.
349
+ - `--genes_info_file_prefix` | Path prefix for auxiliary data files (e.g., mapping of gene names to gene IDs used during the mapping step). Identical to `output_aux_files` in the build reference command.
350
+ - `--output_file` | Output file name.
351
+
352
+ ### How to run (general input)
353
+
354
+ ```bash
355
+ panxpress convert_TPM \
356
+ --raw_counts_file <raw_counts_file> \
357
+ --genes_info_file_prefix <genes_info_file_prefix> \
358
+ --output_file <output_file>
359
+ ```
360
+
361
+ ### How to run (provided files)
362
+
363
+ ```bash
364
+ panxpress convert_TPM \
365
+ --raw_counts_file results/single_end_reads_pa_3_strains/count.mat \
366
+ --genes_info_file_prefix ref/pa_2_strains/pa_2_strains \
367
+ --output_file results/single_end_reads_pa_3_strains/counts_TPM.txt
368
+ ```
369
+
370
+ ---
371
+
372
+ ## Full Workflow Scripts
373
+
374
+ For ready-to-run workflows on both simulated and real read data, please refer to the README in the `scripts/` folder.