RefgenDetector 3.0.2__tar.gz → 3.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {refgendetector-3.0.2/src/RefgenDetector.egg-info → refgendetector-3.0.4}/PKG-INFO +20 -63
  2. {refgendetector-3.0.2 → refgendetector-3.0.4}/README.md +19 -62
  3. refgendetector-3.0.4/setup.py +62 -0
  4. {refgendetector-3.0.2 → refgendetector-3.0.4/src/RefgenDetector.egg-info}/PKG-INFO +20 -63
  5. {refgendetector-3.0.2 → refgendetector-3.0.4}/src/RefgenDetector.egg-info/SOURCES.txt +3 -1
  6. {refgendetector-3.0.2 → refgendetector-3.0.4}/src/RefgenDetector.egg-info/entry_points.txt +1 -0
  7. refgendetector-3.0.4/src/refgenDetector/__init__.py +3 -0
  8. {refgendetector-3.0.2 → refgendetector-3.0.4}/src/refgenDetector/aligment_files.py +2 -2
  9. refgendetector-3.0.4/src/refgenDetector/download_reference.py +155 -0
  10. {refgendetector-3.0.2 → refgendetector-3.0.4}/src/refgenDetector/ref_manager.py +3 -7
  11. {refgendetector-3.0.2 → refgendetector-3.0.4}/src/refgenDetector/reference_genome_dictionaries.py +2 -3
  12. {refgendetector-3.0.2 → refgendetector-3.0.4}/src/refgenDetector/refgenDetector_main.py +3 -2
  13. {refgendetector-3.0.2 → refgendetector-3.0.4}/src/refgenDetector/variant_files.py +5 -3
  14. refgendetector-3.0.2/setup.py +0 -31
  15. {refgendetector-3.0.2 → refgendetector-3.0.4}/LICENSE +0 -0
  16. {refgendetector-3.0.2 → refgendetector-3.0.4}/setup.cfg +0 -0
  17. {refgendetector-3.0.2 → refgendetector-3.0.4}/src/RefgenDetector.egg-info/dependency_links.txt +0 -0
  18. {refgendetector-3.0.2 → refgendetector-3.0.4}/src/RefgenDetector.egg-info/requires.txt +0 -0
  19. {refgendetector-3.0.2 → refgendetector-3.0.4}/src/RefgenDetector.egg-info/top_level.txt +0 -0
  20. {refgendetector-3.0.2 → refgendetector-3.0.4}/src/refgenDetector/chromosomes_dict.py +0 -0
  21. {refgendetector-3.0.2 → refgendetector-3.0.4}/src/refgenDetector/exceptions/NoFileException.py +0 -0
  22. {refgendetector-3.0.2/src/refgenDetector → refgendetector-3.0.4/src/refgenDetector/exceptions}/__init__.py +0 -0
  23. {refgendetector-3.0.2/src/refgenDetector/exceptions → refgendetector-3.0.4/src/refgenDetector/msgpacks}/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: RefgenDetector
3
- Version: 3.0.2
3
+ Version: 3.0.4
4
4
  Author: Mireia Marin i Ginestar
5
5
  Author-email: <mireia.marin@crg.eu>
6
6
  Keywords: python
@@ -129,7 +129,8 @@ python ref_manager.py <command> [options]
129
129
  #### Add a reference
130
130
 
131
131
  ```bash
132
- python ref_manager.py add <genome.fai> <reference_name> <species>
132
+ python ref_manager.py add <genome.fai> <reference_name> <species> # script
133
+ refgenDetector-manager add <genome.fai> <reference_name> <species> # pip installation
133
134
  ```
134
135
 
135
136
  Registers a new reference from a valid `.fai` file. If the contig structure matches an existing reference, the entry is not added.
@@ -137,7 +138,8 @@ Registers a new reference from a valid `.fai` file. If the contig structure matc
137
138
  #### List references
138
139
 
139
140
  ```bash
140
- python ref_manager.py list
141
+ python ref_manager.py list # scripts
142
+ refgenDetector-manager list # pip installation
141
143
  ```
142
144
 
143
145
  Displays all available references, including both built-in and user-defined assemblies.
@@ -145,7 +147,8 @@ Displays all available references, including both built-in and user-defined asse
145
147
  #### Remove a reference
146
148
 
147
149
  ```bash
148
- python ref_manager.py remove <reference_name>
150
+ python ref_manager.py remove <reference_name> # scripts
151
+ refgenDetector-manager remove <reference_name> # pip installation
149
152
  ```
150
153
 
151
154
  Removes a custom reference from the local database. Built-in references cannot be removed.
@@ -188,33 +191,31 @@ Depending on how you want to install the package:
188
191
  - pip
189
192
  - Docker
190
193
 
191
- Download the `msgpack` files for the inference with VCFs:
192
-
193
- 1. [Download the msgpack reference](https://crgcnag-my.sharepoint.com/:u:/g/personal/mimarin_crg_es/IQDa5CICZDAoRZmbfhBG3ZPEAWdVnNqvefFJB_r5Hc8aM70?e=kID7zn)
194
-
195
- 2. Move the `msgpack` to the correct path:
196
-
197
- ```
198
- mv msgpack.zip /refgenDetector/src/refgenDetector/
199
- unzip /refgenDetector/src/refgenDetector/msgpack.zip
200
- ```
201
-
202
194
  ## Installation
203
195
 
204
196
  ### Cloning this repository
205
197
 
206
198
  1. Clone this repository
207
199
 
208
- 2. ``` $ cd PATH_WHERE_YOU_CLONED_THE_REPOSITORY/src/refgenDetector ```
200
+ 2.```git clone https://github.com/EGA-archive/refgenDetector.git
201
+ cd refgenDetector
202
+ pip install -e . ```
209
203
 
210
204
  3. ``$ python3 refgenDetector_main.py -h ``
211
205
 
206
+ 4. Download the `msgpack` files for the inference with VCFs: [Download the msgpack reference](https://crgcnag-my.sharepoint.com/:u:/g/personal/mimarin_crg_es/IQDa5CICZDAoRZmbfhBG3ZPEAWdVnNqvefFJB_r5Hc8aM70?e=kID7zn)
207
+
208
+ 5. Move the `msgpack` to the correct path:
209
+
210
+ ```
211
+ mv msgpacks.zip /refgenDetector/src/refgenDetector/
212
+ unzip /refgenDetector/src/refgenDetector/msgpacks.zip
213
+ ```
214
+
212
215
  ### From pypi
213
216
 
214
217
  ``$ pip install refgenDetector``
215
218
 
216
- ### From Docker
217
- ``
218
219
 
219
220
  ## Usage
220
221
 
@@ -244,51 +245,7 @@ optional arguments:
244
245
 
245
246
  ## Test RefgenDetector
246
247
 
247
- In the folder **examples** you can find headers, BAM and CRAMs to test the working of RefgenDetector.
248
-
249
- *All this files belong to the [synthetics data cohort](https://ega-archive.org/synthetic-data) from the European
250
- Genome-Phenome Archive ([EGA](https://ega-archive.org/)).*
251
-
252
- ### Test with headers in a TXT
253
-
254
- In the folder TEST_HEADERS there are four headers obtained from synthetic BAM an CRAMs stored in the EGA. Each one of
255
- them belongs to a different synthetic study:
256
-
257
- - Test Study for EGA using data from 1000 Genomes Project - Phase
258
- 3 [EGAS00001005042](https://ega-archive.org/studies/EGAS00001005042).
259
- - Synthetic data - Genome in a Bottle - [EGAS00001005591](https://ega-archive.org/studies/EGAS00001005591).
260
- - Human genomic and phenotypic synthetic data for the study of rare
261
- diseases - [EGAS00001005702](https://ega-archive.org/studies/EGAS00001005702).
262
- - CINECA synthetic data.Please note: This study contains synthetic data (with cohort “participants” / ”subjects” marked
263
- with FAKE) has no identifiable data and cannot be used to make any inference about cohort data or
264
- results - [EGAS00001002472](https://ega-archive.org/studies/EGAS00001002472).
265
-
266
- Further information about them can be found in the file *where_to_find_this_files.txt*, saved in the same folder.
267
-
268
- To run RefgenDetector with the files:
269
-
270
- 1. Modify the txt *path_to_headers* so the paths match those in your computer.
271
- 2. Run:
272
-
273
- ``` $ refgenDetector -p /PATH_WHERE_YOU_CLONED_THE_REPOSITORY/refgenDetector/examples/path_to_headers -t Headers```
274
-
275
- ### Test with BAM and CRAMs
276
-
277
- In the folder TEST_BAM_CRAM there are a BAM and a CRAM obtained from synthetic BAM an CRAMs stored in the EGA. They
278
- belong to the synthetic study - Test Study for EGA using data from 1000 Genomes Project - Phase
279
- 3 [EGAS00001005042](https://ega-archive.org/studies/EGAS00001005042).
280
-
281
- Further information about them can be found in the file *where_to_find_this_files.txt*, saved in the same folder.
282
-
283
- To run RefgenDetector with the files:
284
-
285
- 1. Modify the txt *path_to_bam_cram* so the paths match those in your computer.
286
-
287
- 2. Run:
288
-
289
- ``` $ refgenDetector -p /PATH_WHERE_YOU_CLONED_THE_REPOSITORY/refgenDetector_pip-master/examples/path_to_bam_cram -t BAM/CRAM```
290
-
291
-
248
+ In the folder [examples](https://github.com/EGA-archive/refgenDetector/tree/main/examples) you can find headers, alignment and variant files to test the working of RefgenDetector.
292
249
 
293
250
  ## Licence and funding
294
251
 
@@ -103,7 +103,8 @@ python ref_manager.py <command> [options]
103
103
  #### Add a reference
104
104
 
105
105
  ```bash
106
- python ref_manager.py add <genome.fai> <reference_name> <species>
106
+ python ref_manager.py add <genome.fai> <reference_name> <species> # script
107
+ refgenDetector-manager add <genome.fai> <reference_name> <species> # pip installation
107
108
  ```
108
109
 
109
110
  Registers a new reference from a valid `.fai` file. If the contig structure matches an existing reference, the entry is not added.
@@ -111,7 +112,8 @@ Registers a new reference from a valid `.fai` file. If the contig structure matc
111
112
  #### List references
112
113
 
113
114
  ```bash
114
- python ref_manager.py list
115
+ python ref_manager.py list # scripts
116
+ refgenDetector-manager list # pip installation
115
117
  ```
116
118
 
117
119
  Displays all available references, including both built-in and user-defined assemblies.
@@ -119,7 +121,8 @@ Displays all available references, including both built-in and user-defined asse
119
121
  #### Remove a reference
120
122
 
121
123
  ```bash
122
- python ref_manager.py remove <reference_name>
124
+ python ref_manager.py remove <reference_name> # scripts
125
+ refgenDetector-manager remove <reference_name> # pip installation
123
126
  ```
124
127
 
125
128
  Removes a custom reference from the local database. Built-in references cannot be removed.
@@ -162,33 +165,31 @@ Depending on how you want to install the package:
162
165
  - pip
163
166
  - Docker
164
167
 
165
- Download the `msgpack` files for the inference with VCFs:
166
-
167
- 1. [Download the msgpack reference](https://crgcnag-my.sharepoint.com/:u:/g/personal/mimarin_crg_es/IQDa5CICZDAoRZmbfhBG3ZPEAWdVnNqvefFJB_r5Hc8aM70?e=kID7zn)
168
-
169
- 2. Move the `msgpack` to the correct path:
170
-
171
- ```
172
- mv msgpack.zip /refgenDetector/src/refgenDetector/
173
- unzip /refgenDetector/src/refgenDetector/msgpack.zip
174
- ```
175
-
176
168
  ## Installation
177
169
 
178
170
  ### Cloning this repository
179
171
 
180
172
  1. Clone this repository
181
173
 
182
- 2. ``` $ cd PATH_WHERE_YOU_CLONED_THE_REPOSITORY/src/refgenDetector ```
174
+ 2.```git clone https://github.com/EGA-archive/refgenDetector.git
175
+ cd refgenDetector
176
+ pip install -e . ```
183
177
 
184
178
  3. ``$ python3 refgenDetector_main.py -h ``
185
179
 
180
+ 4. Download the `msgpack` files for the inference with VCFs: [Download the msgpack reference](https://crgcnag-my.sharepoint.com/:u:/g/personal/mimarin_crg_es/IQDa5CICZDAoRZmbfhBG3ZPEAWdVnNqvefFJB_r5Hc8aM70?e=kID7zn)
181
+
182
+ 5. Move the `msgpack` to the correct path:
183
+
184
+ ```
185
+ mv msgpacks.zip /refgenDetector/src/refgenDetector/
186
+ unzip /refgenDetector/src/refgenDetector/msgpacks.zip
187
+ ```
188
+
186
189
  ### From pypi
187
190
 
188
191
  ``$ pip install refgenDetector``
189
192
 
190
- ### From Docker
191
- ``
192
193
 
193
194
  ## Usage
194
195
 
@@ -218,51 +219,7 @@ optional arguments:
218
219
 
219
220
  ## Test RefgenDetector
220
221
 
221
- In the folder **examples** you can find headers, BAM and CRAMs to test the working of RefgenDetector.
222
-
223
- *All this files belong to the [synthetics data cohort](https://ega-archive.org/synthetic-data) from the European
224
- Genome-Phenome Archive ([EGA](https://ega-archive.org/)).*
225
-
226
- ### Test with headers in a TXT
227
-
228
- In the folder TEST_HEADERS there are four headers obtained from synthetic BAM an CRAMs stored in the EGA. Each one of
229
- them belongs to a different synthetic study:
230
-
231
- - Test Study for EGA using data from 1000 Genomes Project - Phase
232
- 3 [EGAS00001005042](https://ega-archive.org/studies/EGAS00001005042).
233
- - Synthetic data - Genome in a Bottle - [EGAS00001005591](https://ega-archive.org/studies/EGAS00001005591).
234
- - Human genomic and phenotypic synthetic data for the study of rare
235
- diseases - [EGAS00001005702](https://ega-archive.org/studies/EGAS00001005702).
236
- - CINECA synthetic data.Please note: This study contains synthetic data (with cohort “participants” / ”subjects” marked
237
- with FAKE) has no identifiable data and cannot be used to make any inference about cohort data or
238
- results - [EGAS00001002472](https://ega-archive.org/studies/EGAS00001002472).
239
-
240
- Further information about them can be found in the file *where_to_find_this_files.txt*, saved in the same folder.
241
-
242
- To run RefgenDetector with the files:
243
-
244
- 1. Modify the txt *path_to_headers* so the paths match those in your computer.
245
- 2. Run:
246
-
247
- ``` $ refgenDetector -p /PATH_WHERE_YOU_CLONED_THE_REPOSITORY/refgenDetector/examples/path_to_headers -t Headers```
248
-
249
- ### Test with BAM and CRAMs
250
-
251
- In the folder TEST_BAM_CRAM there are a BAM and a CRAM obtained from synthetic BAM an CRAMs stored in the EGA. They
252
- belong to the synthetic study - Test Study for EGA using data from 1000 Genomes Project - Phase
253
- 3 [EGAS00001005042](https://ega-archive.org/studies/EGAS00001005042).
254
-
255
- Further information about them can be found in the file *where_to_find_this_files.txt*, saved in the same folder.
256
-
257
- To run RefgenDetector with the files:
258
-
259
- 1. Modify the txt *path_to_bam_cram* so the paths match those in your computer.
260
-
261
- 2. Run:
262
-
263
- ``` $ refgenDetector -p /PATH_WHERE_YOU_CLONED_THE_REPOSITORY/refgenDetector_pip-master/examples/path_to_bam_cram -t BAM/CRAM```
264
-
265
-
222
+ In the folder [examples](https://github.com/EGA-archive/refgenDetector/tree/main/examples) you can find headers, alignment and variant files to test the working of RefgenDetector.
266
223
 
267
224
  ## Licence and funding
268
225
 
@@ -0,0 +1,62 @@
1
+ from setuptools import setup, find_packages
2
+ from setuptools.command.install import install
3
+ from setuptools.command.develop import develop
4
+ from pathlib import Path
5
+
6
+ this_directory = Path(__file__).parent
7
+ long_description = (this_directory / "README.md").read_text()
8
+ VERSION = '3.0.4'
9
+ DESCRIPTION = 'RefgenDetector'
10
+
11
+
12
+ # ── Post-install hook ─────────────────────────────────────────────────────────
13
+
14
+ def run_post_install():
15
+ try:
16
+ from refgenDetector.post_install import run
17
+ run()
18
+ except Exception as exc:
19
+ print(
20
+ f"\n[refgenDetector] WARNING: post-install setup failed: {exc}\n"
21
+ "You can run it manually later with:\n"
22
+ " python -c 'from refgenDetector.post_install import run; run()'\n"
23
+ )
24
+
25
+ class PostInstallCommand(install):
26
+ def run(self):
27
+ super().run()
28
+ run_post_install()
29
+
30
+ class PostDevelopCommand(develop):
31
+ def run(self):
32
+ super().run()
33
+ run_post_install()
34
+
35
+
36
+ # ── Setup ─────────────────────────────────────────────────────────────────────
37
+
38
+ setup(
39
+ name="RefgenDetector",
40
+ version=VERSION,
41
+ author="Mireia Marin i Ginestar",
42
+ author_email="<mireia.marin@crg.eu>",
43
+ long_description=long_description,
44
+ long_description_content_type='text/markdown',
45
+ install_requires=['pysam', 'psutil', 'rich', 'pandas', 'dnspython', 'msgpack', 'numpy'],
46
+ keywords=['python'],
47
+ classifiers=[
48
+ "Programming Language :: Python :: 3",
49
+ "Operating System :: Unix",
50
+ ],
51
+ entry_points={
52
+ 'console_scripts': [
53
+ 'refgenDetector=refgenDetector.refgenDetector_main:main',
54
+ 'refgenDetector-manager=refgenDetector.ref_manager:main'
55
+ ],
56
+ },
57
+ packages=find_packages(where='src'),
58
+ package_dir={'': 'src'},
59
+ package_data={
60
+ "refgenDetector": ["post_install.py"], # ships post_install.py in the wheel
61
+ }
62
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: RefgenDetector
3
- Version: 3.0.2
3
+ Version: 3.0.4
4
4
  Author: Mireia Marin i Ginestar
5
5
  Author-email: <mireia.marin@crg.eu>
6
6
  Keywords: python
@@ -129,7 +129,8 @@ python ref_manager.py <command> [options]
129
129
  #### Add a reference
130
130
 
131
131
  ```bash
132
- python ref_manager.py add <genome.fai> <reference_name> <species>
132
+ python ref_manager.py add <genome.fai> <reference_name> <species> # script
133
+ refgenDetector-manager add <genome.fai> <reference_name> <species> # pip installation
133
134
  ```
134
135
 
135
136
  Registers a new reference from a valid `.fai` file. If the contig structure matches an existing reference, the entry is not added.
@@ -137,7 +138,8 @@ Registers a new reference from a valid `.fai` file. If the contig structure matc
137
138
  #### List references
138
139
 
139
140
  ```bash
140
- python ref_manager.py list
141
+ python ref_manager.py list # scripts
142
+ refgenDetector-manager list # pip installation
141
143
  ```
142
144
 
143
145
  Displays all available references, including both built-in and user-defined assemblies.
@@ -145,7 +147,8 @@ Displays all available references, including both built-in and user-defined asse
145
147
  #### Remove a reference
146
148
 
147
149
  ```bash
148
- python ref_manager.py remove <reference_name>
150
+ python ref_manager.py remove <reference_name> # scripts
151
+ refgenDetector-manager remove <reference_name> # pip installation
149
152
  ```
150
153
 
151
154
  Removes a custom reference from the local database. Built-in references cannot be removed.
@@ -188,33 +191,31 @@ Depending on how you want to install the package:
188
191
  - pip
189
192
  - Docker
190
193
 
191
- Download the `msgpack` files for the inference with VCFs:
192
-
193
- 1. [Download the msgpack reference](https://crgcnag-my.sharepoint.com/:u:/g/personal/mimarin_crg_es/IQDa5CICZDAoRZmbfhBG3ZPEAWdVnNqvefFJB_r5Hc8aM70?e=kID7zn)
194
-
195
- 2. Move the `msgpack` to the correct path:
196
-
197
- ```
198
- mv msgpack.zip /refgenDetector/src/refgenDetector/
199
- unzip /refgenDetector/src/refgenDetector/msgpack.zip
200
- ```
201
-
202
194
  ## Installation
203
195
 
204
196
  ### Cloning this repository
205
197
 
206
198
  1. Clone this repository
207
199
 
208
- 2. ``` $ cd PATH_WHERE_YOU_CLONED_THE_REPOSITORY/src/refgenDetector ```
200
+ 2.```git clone https://github.com/EGA-archive/refgenDetector.git
201
+ cd refgenDetector
202
+ pip install -e . ```
209
203
 
210
204
  3. ``$ python3 refgenDetector_main.py -h ``
211
205
 
206
+ 4. Download the `msgpack` files for the inference with VCFs: [Download the msgpack reference](https://crgcnag-my.sharepoint.com/:u:/g/personal/mimarin_crg_es/IQDa5CICZDAoRZmbfhBG3ZPEAWdVnNqvefFJB_r5Hc8aM70?e=kID7zn)
207
+
208
+ 5. Move the `msgpack` to the correct path:
209
+
210
+ ```
211
+ mv msgpacks.zip /refgenDetector/src/refgenDetector/
212
+ unzip /refgenDetector/src/refgenDetector/msgpacks.zip
213
+ ```
214
+
212
215
  ### From pypi
213
216
 
214
217
  ``$ pip install refgenDetector``
215
218
 
216
- ### From Docker
217
- ``
218
219
 
219
220
  ## Usage
220
221
 
@@ -244,51 +245,7 @@ optional arguments:
244
245
 
245
246
  ## Test RefgenDetector
246
247
 
247
- In the folder **examples** you can find headers, BAM and CRAMs to test the working of RefgenDetector.
248
-
249
- *All this files belong to the [synthetics data cohort](https://ega-archive.org/synthetic-data) from the European
250
- Genome-Phenome Archive ([EGA](https://ega-archive.org/)).*
251
-
252
- ### Test with headers in a TXT
253
-
254
- In the folder TEST_HEADERS there are four headers obtained from synthetic BAM an CRAMs stored in the EGA. Each one of
255
- them belongs to a different synthetic study:
256
-
257
- - Test Study for EGA using data from 1000 Genomes Project - Phase
258
- 3 [EGAS00001005042](https://ega-archive.org/studies/EGAS00001005042).
259
- - Synthetic data - Genome in a Bottle - [EGAS00001005591](https://ega-archive.org/studies/EGAS00001005591).
260
- - Human genomic and phenotypic synthetic data for the study of rare
261
- diseases - [EGAS00001005702](https://ega-archive.org/studies/EGAS00001005702).
262
- - CINECA synthetic data.Please note: This study contains synthetic data (with cohort “participants” / ”subjects” marked
263
- with FAKE) has no identifiable data and cannot be used to make any inference about cohort data or
264
- results - [EGAS00001002472](https://ega-archive.org/studies/EGAS00001002472).
265
-
266
- Further information about them can be found in the file *where_to_find_this_files.txt*, saved in the same folder.
267
-
268
- To run RefgenDetector with the files:
269
-
270
- 1. Modify the txt *path_to_headers* so the paths match those in your computer.
271
- 2. Run:
272
-
273
- ``` $ refgenDetector -p /PATH_WHERE_YOU_CLONED_THE_REPOSITORY/refgenDetector/examples/path_to_headers -t Headers```
274
-
275
- ### Test with BAM and CRAMs
276
-
277
- In the folder TEST_BAM_CRAM there are a BAM and a CRAM obtained from synthetic BAM an CRAMs stored in the EGA. They
278
- belong to the synthetic study - Test Study for EGA using data from 1000 Genomes Project - Phase
279
- 3 [EGAS00001005042](https://ega-archive.org/studies/EGAS00001005042).
280
-
281
- Further information about them can be found in the file *where_to_find_this_files.txt*, saved in the same folder.
282
-
283
- To run RefgenDetector with the files:
284
-
285
- 1. Modify the txt *path_to_bam_cram* so the paths match those in your computer.
286
-
287
- 2. Run:
288
-
289
- ``` $ refgenDetector -p /PATH_WHERE_YOU_CLONED_THE_REPOSITORY/refgenDetector_pip-master/examples/path_to_bam_cram -t BAM/CRAM```
290
-
291
-
248
+ In the folder [examples](https://github.com/EGA-archive/refgenDetector/tree/main/examples) you can find headers, alignment and variant files to test the working of RefgenDetector.
292
249
 
293
250
  ## Licence and funding
294
251
 
@@ -10,9 +10,11 @@ src/RefgenDetector.egg-info/top_level.txt
10
10
  src/refgenDetector/__init__.py
11
11
  src/refgenDetector/aligment_files.py
12
12
  src/refgenDetector/chromosomes_dict.py
13
+ src/refgenDetector/download_reference.py
13
14
  src/refgenDetector/ref_manager.py
14
15
  src/refgenDetector/reference_genome_dictionaries.py
15
16
  src/refgenDetector/refgenDetector_main.py
16
17
  src/refgenDetector/variant_files.py
17
18
  src/refgenDetector/exceptions/NoFileException.py
18
- src/refgenDetector/exceptions/__init__.py
19
+ src/refgenDetector/exceptions/__init__.py
20
+ src/refgenDetector/msgpacks/__init__.py
@@ -1,2 +1,3 @@
1
1
  [console_scripts]
2
2
  refgenDetector = refgenDetector.refgenDetector_main:main
3
+ refgenDetector-manager = refgenDetector.ref_manager:main
@@ -0,0 +1,3 @@
1
+ # Auto-setup reference files on first import
2
+ from refgenDetector.download_reference import run as _setup_files
3
+ _setup_files()
@@ -14,8 +14,8 @@ try:
14
14
  from .exceptions.NoFileException import *
15
15
  except ImportError:
16
16
  # Works when run directly as a script
17
- from refgenDetector.reference_genome_dictionaries import *
18
- from refgenDetector.exceptions.NoFileException import *
17
+ from reference_genome_dictionaries import *
18
+ from exceptions.NoFileException import *
19
19
 
20
20
  console = Console()
21
21
 
@@ -0,0 +1,155 @@
1
+ """
2
+ Post-install script for refgenDetector.
3
+
4
+ Handles two installation scenarios:
5
+ 1. Cloned from GitHub: moves + decompresses .xz files from github_msgpacks/
6
+ 2. Installed via pip: downloads github_msgpacks/ from GitHub, then moves + decompresses.
7
+
8
+ Runs only once — skips everything if msgpacks/ already contains .msgpack files.
9
+ """
10
+
11
+ import lzma
12
+ import shutil
13
+ import urllib.request
14
+ import urllib.error
15
+ import json
16
+ from pathlib import Path
17
+
18
+
19
+ # CONFIGURATION
20
+ GITHUB_USER = "EGA-archive"
21
+ GITHUB_REPO = "refgenDetector"
22
+ GITHUB_BRANCH = "main"
23
+
24
+ GITHUB_API_URL = (
25
+ f"https://api.github.com/repos/{GITHUB_USER}/{GITHUB_REPO}"
26
+ f"/contents/src/refgenDetector/github_msgpacks"
27
+ f"?ref={GITHUB_BRANCH}"
28
+ )
29
+
30
+ GITHUB_RAW_BASE = (
31
+ f"https://raw.githubusercontent.com/{GITHUB_USER}/{GITHUB_REPO}"
32
+ f"/{GITHUB_BRANCH}/src/refgenDetector/github_msgpacks"
33
+ )
34
+
35
+ # PATH RESOLUTION
36
+
37
+ def _package_root() -> Path:
38
+ """Return the installed package directory (contains __init__.py)."""
39
+ # Works for both editable installs and regular pip installs
40
+ try:
41
+ import refgenDetector
42
+ return Path(refgenDetector.__file__).parent
43
+ except ImportError:
44
+ # Fallback: relative to this script
45
+ return Path(__file__).parent
46
+
47
+
48
+ def get_paths():
49
+ pkg = _package_root()
50
+ src_clone = pkg / "github_msgpacks" # present when repo is cloned
51
+ src_pip = pkg / "_downloaded_msgpacks" # temp download dir for pip
52
+ dst = pkg / "msgpacks"
53
+ return src_clone, src_pip, dst
54
+
55
+
56
+ # HELPERS
57
+
58
+ def is_already_setup(dst: Path) -> bool:
59
+ """Return True if at least one .msgpack file exists in dst."""
60
+ if not dst.exists():
61
+ return False
62
+ return any(dst.glob("*.msgpack"))
63
+
64
+
65
+ def decompress_xz(src_file: Path, dst_dir: Path) -> None:
66
+ """Decompress a single .xz file into dst_dir, keeping the base name."""
67
+ # e.g. foo.msgpack.xz -> dst_dir/foo.msgpack
68
+ out_name = src_file.stem # strips the last suffix (.xz)
69
+ out_path = dst_dir / out_name
70
+ print(f" Decompressing {src_file.name} -> {out_path.name}")
71
+ with lzma.open(src_file, "rb") as f_in, open(out_path, "wb") as f_out:
72
+ shutil.copyfileobj(f_in, f_out)
73
+
74
+
75
+ def move_and_decompress(src_dir: Path, dst_dir: Path) -> None:
76
+ """Move every .xz file from src_dir to dst_dir and decompress it."""
77
+ dst_dir.mkdir(parents=True, exist_ok=True)
78
+ xz_files = list(src_dir.glob("*.xz"))
79
+ if not xz_files:
80
+ print(f" WARNING: no .xz files found in {src_dir}")
81
+ return
82
+ for xz_file in xz_files:
83
+ decompress_xz(xz_file, dst_dir)
84
+ print(f" Done — {len(xz_files)} file(s) decompressed into {dst_dir}")
85
+
86
+
87
+ # DOWNLOAD LOGIC
88
+
89
+ def _github_api_request(url: str) -> list:
90
+ """Fetch JSON from the GitHub contents API."""
91
+ req = urllib.request.Request(url, headers={"User-Agent": "refgenDetector-installer"})
92
+ try:
93
+ with urllib.request.urlopen(req, timeout=30) as resp:
94
+ return json.loads(resp.read().decode())
95
+ except urllib.error.HTTPError as e:
96
+ raise RuntimeError(
97
+ f"GitHub API error {e.code} for {url}.\n"
98
+ "Check GITHUB_USER / GITHUB_REPO / GITHUB_BRANCH in post_install.py."
99
+ ) from e
100
+
101
+
102
+ def download_github_msgpacks(dst_dir: Path) -> None:
103
+ """Download every .xz file from the GitHub folder into dst_dir."""
104
+ print(f" Fetching file list from GitHub …")
105
+ entries = _github_api_request(GITHUB_API_URL)
106
+
107
+ xz_entries = [e for e in entries if e["name"].endswith(".xz")]
108
+ if not xz_entries:
109
+ raise RuntimeError("No .xz files found in the GitHub folder. Check the repo path.")
110
+
111
+ dst_dir.mkdir(parents=True, exist_ok=True)
112
+ for entry in xz_entries:
113
+ raw_url = f"{GITHUB_RAW_BASE}/{entry['name']}"
114
+ out_path = dst_dir / entry["name"]
115
+ print(f" Downloading {entry['name']} ({entry.get('size', '?')} bytes) …")
116
+ req = urllib.request.Request(raw_url, headers={"User-Agent": "refgenDetector-installer"})
117
+ with urllib.request.urlopen(req, timeout=120) as resp, open(out_path, "wb") as f:
118
+ shutil.copyfileobj(resp, f)
119
+
120
+ print(f" Downloaded {len(xz_entries)} file(s).")
121
+
122
+
123
+ # MAIN
124
+
125
+ def run():
126
+ src_clone, src_pip, dst = get_paths()
127
+
128
+ # Check if the reference files have already been downloaded and decompressed
129
+ if is_already_setup(dst):
130
+ print("[refgenDetector] Reference files already present — skipping setup.")
131
+ return
132
+
133
+ print("[refgenDetector] Setting up reference files - This will only run once")
134
+
135
+ # Intallation done by clonning the repo - github_msgpacks/ is already present
136
+ if src_clone.exists() and any(src_clone.glob("*.xz")):
137
+ print(f" Detected clone install — using local {src_clone.name}/")
138
+ move_and_decompress(src_clone, dst)
139
+
140
+ # Installation done via pip — need to download from GitHub and decompress
141
+ else:
142
+ print(" Detected pip install — downloading from GitHub …")
143
+ try:
144
+ download_github_msgpacks(src_pip)
145
+ move_and_decompress(src_pip, dst)
146
+ finally:
147
+ # Clean up temp download dir regardless of success/failure
148
+ if src_pip.exists():
149
+ shutil.rmtree(src_pip, ignore_errors=True)
150
+
151
+ print("[refgenDetector] Reference files ready.\n")
152
+
153
+
154
+ if __name__ == "__main__":
155
+ run()
@@ -11,14 +11,10 @@ except ImportError:
11
11
 
12
12
  DEFAULT_MAJOR_RELEASES = dict(major_releases)
13
13
 
14
- CUSTOM_DB = Path("custom_references.json")
15
-
16
- if CUSTOM_DB.exists():
17
- with open(CUSTOM_DB) as f:
18
- custom_refs = json.load(f)
19
-
20
- major_releases.update(custom_refs)
14
+ CUSTOM_DB = Path.home() / ".refgenDetector" / "custom_references.json"
21
15
 
16
+ # make sure the directory exists
17
+ CUSTOM_DB.parent.mkdir(parents=True, exist_ok=True)
22
18
 
23
19
  def load_from_fai(file_path):
24
20
  """
@@ -707,7 +707,7 @@ major_releases = {"hg16": {"ref_gen": hg16, "build": "hg16", "species": "Homo sa
707
707
  "mm10": {"ref_gen": mm10, "build": "mm10", "species": "Mus musculus"},
708
708
  "mm39": {"ref_gen": mm39, "build": "mm39", "species": "Mus musculus"},
709
709
  "dm5": {"ref_gen": dm5, "build": "dm5", "species": "Drosophila Melanogaster"},
710
- #"dm6": {"ref_gen": dm6, "build": "dm6", "species": "Drosophila Melanogaster"},
710
+ "dm6": {"ref_gen": dm6, "build": "dm6", "species": "Drosophila Melanogaster"},
711
711
  "danRer10": {"ref_gen": danRer10, "build": "danRer10", "species": "Danio Rerio"},
712
712
  "danRer11": {"ref_gen": danRer11, "build": "danRer11", "species": "Danio Rerio"},
713
713
  "WBcel215": {"ref_gen": WBcel215, "build": "WBcel215", "species": "Caenorhabditis elegans"},
@@ -718,8 +718,7 @@ major_releases = {"hg16": {"ref_gen": hg16, "build": "hg16", "species": "Homo sa
718
718
  "ASM886v2": {"ref_gen": ASM886v2, "build": "ASM886v2", "species": "Escherichia coli"},
719
719
  "ASM584v2": {"ref_gen": ASM584v2, "build": "ASM584v2", "species": "Escherichia Coli"},
720
720
  "pantro3_0": {"ref_gen": pantro3_0, "build": "pantro3_0", "species": "Pan troglodytes"},
721
- "pantro_2_1_4": {"ref_gen": pantro_2_1_4, "build": "pantro_2_1_4",
722
- "species": "Pan troglodytes"},
721
+ "pantro_2_1_4": {"ref_gen": pantro_2_1_4, "build": "pantro_2_1_4","species": "Pan troglodytes"},
723
722
  "Mmul10": {"ref_gen": Mmul10, "build": "Mmul10", "species": "Macaca mulatta"},
724
723
  "rheMac8": {"ref_gen": rheMac8, "build": "rheMac8", "species": "Macaca mulatta"},
725
724
  "rheMac3": {"ref_gen": rheMac3, "build": "rheMac3", "species": "Macaca mulatta"},
@@ -3,12 +3,12 @@
3
3
  """ refgenDetector.py: Script to infer the reference genome used to create a BAM or CRAM"""
4
4
 
5
5
  __author__ = "Mireia Marin Ginestar"
6
- __version__ = "3.0.1"
6
+ __version__ = "3.0.4"
7
7
  __maintainer__ = "Mireia Marin Ginestar"
8
8
  __email__ = "mireia.marin@crg.eu"
9
9
  __status__ = "Developement"
10
10
 
11
- version = "3.0.1"
11
+ version = "3.0.4"
12
12
 
13
13
  import os
14
14
  import sys
@@ -34,6 +34,7 @@ except ImportError:
34
34
  console = Console()
35
35
 
36
36
 
37
+
37
38
  def monitor_resources(func):
38
39
  """Decorator to print resource usage (CPU, memory, I/O, runtime)."""
39
40
  def wrapper(*args, **kwargs):
@@ -16,12 +16,14 @@ except ImportError:
16
16
  from aligment_files import comparison
17
17
  from chromosomes_dict import *
18
18
 
19
+ # Works both when installed as a pip package and when run directly as a script
20
+ MSGPACK_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "msgpacks")
21
+
19
22
 
20
23
 
21
24
  final_results = []
22
25
  console = Console(highlight=False)
23
26
  _msgpack_cache = {}
24
- MSGPACK_DIR = "./msgpacks"
25
27
 
26
28
  def gather_and_sum(lists):
27
29
  """
@@ -66,7 +68,7 @@ def get_matches(snps_dict, chr_):
66
68
  cache_key = f"{version_name}-{chr_}"
67
69
 
68
70
  if cache_key not in _msgpack_cache:
69
- path = f"{MSGPACK_DIR}/{cache_key}.msgpack"
71
+ path = os.path.join(MSGPACK_DIR, f"{cache_key}.msgpack")
70
72
  if not os.path.exists(path):
71
73
  continue
72
74
  with open(path, "rb") as f:
@@ -80,7 +82,7 @@ def get_matches(snps_dict, chr_):
80
82
 
81
83
  matches.append([version_name, match_count])
82
84
 
83
- console.print("Getting matches. Took:", time.time() - start, "s")
85
+ #console.print("Getting matches. Took:", time.time() - start, "s")
84
86
  return matches
85
87
 
86
88
 
@@ -1,31 +0,0 @@
1
- from setuptools import setup, find_packages
2
- from pathlib import Path
3
-
4
- this_directory = Path(__file__).parent
5
- long_description = (this_directory / "README.md").read_text()
6
-
7
- VERSION = '3.0.2'
8
- DESCRIPTION = 'RefgenDetector'
9
-
10
- setup(
11
- name="RefgenDetector",
12
- version=VERSION,
13
- author="Mireia Marin i Ginestar",
14
- author_email="<mireia.marin@crg.eu>",
15
- long_description=long_description,
16
- long_description_content_type='text/markdown',
17
- install_requires=['pysam', 'psutil', 'rich', 'pandas', 'dnspython', 'msgpack', 'numpy'],
18
- keywords=['python'],
19
- classifiers=[
20
- "Programming Language :: Python :: 3",
21
- "Operating System :: Unix",
22
- ],
23
- entry_points={
24
- 'console_scripts': [
25
- 'refgenDetector=refgenDetector.refgenDetector_main:main',
26
- ],
27
- },
28
- packages=find_packages(where='src'),
29
- package_dir={'': 'src'},
30
- package_data={"refgenDetector": ["*.msgpack"]}
31
- )
File without changes
File without changes