RefgenDetector 3.0.4__tar.gz → 3.0.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {refgendetector-3.0.4/src/RefgenDetector.egg-info → refgendetector-3.0.6}/PKG-INFO +1 -1
- {refgendetector-3.0.4 → refgendetector-3.0.6}/setup.py +1 -1
- {refgendetector-3.0.4 → refgendetector-3.0.6/src/RefgenDetector.egg-info}/PKG-INFO +1 -1
- {refgendetector-3.0.4 → refgendetector-3.0.6}/src/RefgenDetector.egg-info/SOURCES.txt +1 -0
- {refgendetector-3.0.4 → refgendetector-3.0.6}/src/refgenDetector/aligment_files.py +17 -10
- {refgendetector-3.0.4 → refgendetector-3.0.6}/src/refgenDetector/download_reference.py +1 -0
- refgendetector-3.0.6/src/refgenDetector/msgpacks/__init__.py +0 -0
- {refgendetector-3.0.4 → refgendetector-3.0.6}/src/refgenDetector/refgenDetector_main.py +1 -1
- {refgendetector-3.0.4 → refgendetector-3.0.6}/src/refgenDetector/variant_files.py +9 -4
- {refgendetector-3.0.4 → refgendetector-3.0.6}/LICENSE +0 -0
- {refgendetector-3.0.4 → refgendetector-3.0.6}/README.md +0 -0
- {refgendetector-3.0.4 → refgendetector-3.0.6}/setup.cfg +0 -0
- {refgendetector-3.0.4 → refgendetector-3.0.6}/src/RefgenDetector.egg-info/dependency_links.txt +0 -0
- {refgendetector-3.0.4 → refgendetector-3.0.6}/src/RefgenDetector.egg-info/entry_points.txt +0 -0
- {refgendetector-3.0.4 → refgendetector-3.0.6}/src/RefgenDetector.egg-info/requires.txt +0 -0
- {refgendetector-3.0.4 → refgendetector-3.0.6}/src/RefgenDetector.egg-info/top_level.txt +0 -0
- {refgendetector-3.0.4 → refgendetector-3.0.6}/src/refgenDetector/__init__.py +0 -0
- {refgendetector-3.0.4 → refgendetector-3.0.6}/src/refgenDetector/chromosomes_dict.py +0 -0
- {refgendetector-3.0.4 → refgendetector-3.0.6}/src/refgenDetector/exceptions/NoFileException.py +0 -0
- {refgendetector-3.0.4 → refgendetector-3.0.6}/src/refgenDetector/exceptions/__init__.py +0 -0
- {refgendetector-3.0.4/src/refgenDetector/msgpacks → refgendetector-3.0.6/src/refgenDetector/github_msgpacks}/__init__.py +0 -0
- {refgendetector-3.0.4 → refgendetector-3.0.6}/src/refgenDetector/ref_manager.py +0 -0
- {refgendetector-3.0.4 → refgendetector-3.0.6}/src/refgenDetector/reference_genome_dictionaries.py +0 -0
|
@@ -17,4 +17,5 @@ src/refgenDetector/refgenDetector_main.py
|
|
|
17
17
|
src/refgenDetector/variant_files.py
|
|
18
18
|
src/refgenDetector/exceptions/NoFileException.py
|
|
19
19
|
src/refgenDetector/exceptions/__init__.py
|
|
20
|
+
src/refgenDetector/github_msgpacks/__init__.py
|
|
20
21
|
src/refgenDetector/msgpacks/__init__.py
|
|
@@ -124,13 +124,18 @@ def comparison(dict_SN_LN, target_file):
|
|
|
124
124
|
for ref in flavors_GRCh37
|
|
125
125
|
]
|
|
126
126
|
|
|
127
|
-
match_flavors = max(matches_flavors, key=lambda x: x[0])
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
127
|
+
match_flavors = max(matches_flavors, key=lambda x: len(x[0]))
|
|
128
|
+
|
|
129
|
+
if len(match_flavors[0]) > 0:
|
|
130
|
+
console.print(
|
|
131
|
+
f"[bold]Species detected:[/bold] {match_flavors[2]} "
|
|
132
|
+
f"[bold]\nReference genome version :[/bold] {match_flavors[1]}"
|
|
133
|
+
)
|
|
134
|
+
else:
|
|
135
|
+
console.print(
|
|
136
|
+
"[bold]Species detected:[/bold] Homo sapiens "
|
|
137
|
+
"\n[bold]Reference genome version :[/bold] GRCh37"
|
|
138
|
+
)
|
|
134
139
|
|
|
135
140
|
elif max_match[1] == "GRCh38": #checks for GRCh38 flavors
|
|
136
141
|
|
|
@@ -145,9 +150,11 @@ def comparison(dict_SN_LN, target_file):
|
|
|
145
150
|
else: # if no GRCh38 flavor is inferred, the major release is printed
|
|
146
151
|
console.print(f"[bold]Species detected:[/bold] Homo sapiens \n["
|
|
147
152
|
f"bold]Reference genome version :[/bold] GRCh38")
|
|
148
|
-
else:
|
|
149
|
-
console.print(
|
|
150
|
-
|
|
153
|
+
else: # print the major releases with no considered flavors
|
|
154
|
+
console.print(
|
|
155
|
+
f"[bold]Species detected:[/bold] {max_match[2]} "
|
|
156
|
+
f"\n[bold]Reference genome version:[/bold] {max_match[1]}"
|
|
157
|
+
)
|
|
151
158
|
|
|
152
159
|
|
|
153
160
|
|
|
File without changes
|
|
@@ -24,6 +24,7 @@ MSGPACK_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "msgpacks
|
|
|
24
24
|
final_results = []
|
|
25
25
|
console = Console(highlight=False)
|
|
26
26
|
_msgpack_cache = {}
|
|
27
|
+
_current_chr = None
|
|
27
28
|
|
|
28
29
|
def gather_and_sum(lists):
|
|
29
30
|
"""
|
|
@@ -54,13 +55,18 @@ def get_matches(snps_dict, chr_):
|
|
|
54
55
|
A list with the number of matches for each version, which is used to infer the reference genome version.
|
|
55
56
|
"""
|
|
56
57
|
|
|
57
|
-
global _msgpack_cache
|
|
58
|
+
global _msgpack_cache, _current_chr
|
|
58
59
|
start = time.time()
|
|
59
60
|
|
|
60
61
|
genome_versions = ["hg18", "GRCh37", "GRCh38", "T2T"]
|
|
61
62
|
matches = []
|
|
62
63
|
|
|
63
|
-
#
|
|
64
|
+
# If we've moved on to a different chromosome, drop the previous
|
|
65
|
+
# chromosome's msgpacks before loading the new ones.
|
|
66
|
+
if chr_ != _current_chr:
|
|
67
|
+
_msgpack_cache.clear()
|
|
68
|
+
_current_chr = chr_
|
|
69
|
+
|
|
64
70
|
positions = np.array(list(snps_dict.keys()), dtype=np.int64)
|
|
65
71
|
nucleotides = np.array(list(snps_dict.values()))
|
|
66
72
|
|
|
@@ -76,16 +82,15 @@ def get_matches(snps_dict, chr_):
|
|
|
76
82
|
|
|
77
83
|
ref_dict = _msgpack_cache[cache_key]
|
|
78
84
|
|
|
79
|
-
# Vectorized: look up all positions at once, compare arrays
|
|
80
85
|
ref_nucs = np.array([ref_dict.get(p, None) for p in positions])
|
|
81
86
|
match_count = int(np.sum(ref_nucs == nucleotides))
|
|
82
87
|
|
|
83
88
|
matches.append([version_name, match_count])
|
|
84
89
|
|
|
85
|
-
#console.print("Getting matches. Took:", time.time() - start, "s")
|
|
86
90
|
return matches
|
|
87
91
|
|
|
88
92
|
|
|
93
|
+
|
|
89
94
|
def trimming_indels(content, ref):
|
|
90
95
|
"""
|
|
91
96
|
If a row is longer than one position it is deleted, deleting this way any indels
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{refgendetector-3.0.4 → refgendetector-3.0.6}/src/RefgenDetector.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{refgendetector-3.0.4 → refgendetector-3.0.6}/src/refgenDetector/exceptions/NoFileException.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{refgendetector-3.0.4 → refgendetector-3.0.6}/src/refgenDetector/reference_genome_dictionaries.py
RENAMED
|
File without changes
|