RefgenDetector 3.0.4__tar.gz → 3.0.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {refgendetector-3.0.4/src/RefgenDetector.egg-info → refgendetector-3.0.6}/PKG-INFO +1 -1
  2. {refgendetector-3.0.4 → refgendetector-3.0.6}/setup.py +1 -1
  3. {refgendetector-3.0.4 → refgendetector-3.0.6/src/RefgenDetector.egg-info}/PKG-INFO +1 -1
  4. {refgendetector-3.0.4 → refgendetector-3.0.6}/src/RefgenDetector.egg-info/SOURCES.txt +1 -0
  5. {refgendetector-3.0.4 → refgendetector-3.0.6}/src/refgenDetector/aligment_files.py +17 -10
  6. {refgendetector-3.0.4 → refgendetector-3.0.6}/src/refgenDetector/download_reference.py +1 -0
  7. refgendetector-3.0.6/src/refgenDetector/msgpacks/__init__.py +0 -0
  8. {refgendetector-3.0.4 → refgendetector-3.0.6}/src/refgenDetector/refgenDetector_main.py +1 -1
  9. {refgendetector-3.0.4 → refgendetector-3.0.6}/src/refgenDetector/variant_files.py +9 -4
  10. {refgendetector-3.0.4 → refgendetector-3.0.6}/LICENSE +0 -0
  11. {refgendetector-3.0.4 → refgendetector-3.0.6}/README.md +0 -0
  12. {refgendetector-3.0.4 → refgendetector-3.0.6}/setup.cfg +0 -0
  13. {refgendetector-3.0.4 → refgendetector-3.0.6}/src/RefgenDetector.egg-info/dependency_links.txt +0 -0
  14. {refgendetector-3.0.4 → refgendetector-3.0.6}/src/RefgenDetector.egg-info/entry_points.txt +0 -0
  15. {refgendetector-3.0.4 → refgendetector-3.0.6}/src/RefgenDetector.egg-info/requires.txt +0 -0
  16. {refgendetector-3.0.4 → refgendetector-3.0.6}/src/RefgenDetector.egg-info/top_level.txt +0 -0
  17. {refgendetector-3.0.4 → refgendetector-3.0.6}/src/refgenDetector/__init__.py +0 -0
  18. {refgendetector-3.0.4 → refgendetector-3.0.6}/src/refgenDetector/chromosomes_dict.py +0 -0
  19. {refgendetector-3.0.4 → refgendetector-3.0.6}/src/refgenDetector/exceptions/NoFileException.py +0 -0
  20. {refgendetector-3.0.4 → refgendetector-3.0.6}/src/refgenDetector/exceptions/__init__.py +0 -0
  21. {refgendetector-3.0.4/src/refgenDetector/msgpacks → refgendetector-3.0.6/src/refgenDetector/github_msgpacks}/__init__.py +0 -0
  22. {refgendetector-3.0.4 → refgendetector-3.0.6}/src/refgenDetector/ref_manager.py +0 -0
  23. {refgendetector-3.0.4 → refgendetector-3.0.6}/src/refgenDetector/reference_genome_dictionaries.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: RefgenDetector
3
- Version: 3.0.4
3
+ Version: 3.0.6
4
4
  Author: Mireia Marin i Ginestar
5
5
  Author-email: <mireia.marin@crg.eu>
6
6
  Keywords: python
@@ -5,7 +5,7 @@ from pathlib import Path
5
5
 
6
6
  this_directory = Path(__file__).parent
7
7
  long_description = (this_directory / "README.md").read_text()
8
- VERSION = '3.0.4'
8
+ VERSION = '3.0.6'
9
9
  DESCRIPTION = 'RefgenDetector'
10
10
 
11
11
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: RefgenDetector
3
- Version: 3.0.4
3
+ Version: 3.0.6
4
4
  Author: Mireia Marin i Ginestar
5
5
  Author-email: <mireia.marin@crg.eu>
6
6
  Keywords: python
@@ -17,4 +17,5 @@ src/refgenDetector/refgenDetector_main.py
17
17
  src/refgenDetector/variant_files.py
18
18
  src/refgenDetector/exceptions/NoFileException.py
19
19
  src/refgenDetector/exceptions/__init__.py
20
+ src/refgenDetector/github_msgpacks/__init__.py
20
21
  src/refgenDetector/msgpacks/__init__.py
@@ -124,13 +124,18 @@ def comparison(dict_SN_LN, target_file):
124
124
  for ref in flavors_GRCh37
125
125
  ]
126
126
 
127
- match_flavors = max(matches_flavors, key=lambda x: x[0])
128
- if match_flavors: #if some flavor was defined it prints it
129
- console.print(f"[bold]Species detected:[/bold] {match_flavors[2]} "
130
- f"[bold]\nReference genome version :[/bold] {match_flavors[1]}")
131
- else: #if there wasnt any flavor inferred, the major release it printed
132
- console.print(f"[bold]Species detected:[/bold] Homo sapiens \n["
133
- f"bold]Reference genome version :[/bold] GRCh37")
127
+ match_flavors = max(matches_flavors, key=lambda x: len(x[0]))
128
+
129
+ if len(match_flavors[0]) > 0:
130
+ console.print(
131
+ f"[bold]Species detected:[/bold] {match_flavors[2]} "
132
+ f"[bold]\nReference genome version :[/bold] {match_flavors[1]}"
133
+ )
134
+ else:
135
+ console.print(
136
+ "[bold]Species detected:[/bold] Homo sapiens "
137
+ "\n[bold]Reference genome version :[/bold] GRCh37"
138
+ )
134
139
 
135
140
  elif max_match[1] == "GRCh38": #checks for GRCh38 flavors
136
141
 
@@ -145,9 +150,11 @@ def comparison(dict_SN_LN, target_file):
145
150
  else: # if no GRCh38 flavor is inferred, the major release is printed
146
151
  console.print(f"[bold]Species detected:[/bold] Homo sapiens \n["
147
152
  f"bold]Reference genome version :[/bold] GRCh38")
148
- else: # print the major releases with no considered flavors.
149
- console.print(f"[bold]Species detected:[/bold] {match[2]} "
150
- f"\n[bold]Reference genome version:[/bold] {match[1]}")
153
+ else: # print the major releases with no considered flavors
154
+ console.print(
155
+ f"[bold]Species detected:[/bold] {max_match[2]} "
156
+ f"\n[bold]Reference genome version:[/bold] {max_match[1]}"
157
+ )
151
158
 
152
159
 
153
160
 
@@ -57,6 +57,7 @@ def get_paths():
57
57
 
58
58
  def is_already_setup(dst: Path) -> bool:
59
59
  """Return True if at least one .msgpack file exists in dst."""
60
+ print(dst)
60
61
  if not dst.exists():
61
62
  return False
62
63
  return any(dst.glob("*.msgpack"))
@@ -8,7 +8,7 @@ __maintainer__ = "Mireia Marin Ginestar"
8
8
  __email__ = "mireia.marin@crg.eu"
9
9
  __status__ = "Developement"
10
10
 
11
- version = "3.0.4"
11
+ version = "3.0.6"
12
12
 
13
13
  import os
14
14
  import sys
@@ -24,6 +24,7 @@ MSGPACK_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "msgpacks
24
24
  final_results = []
25
25
  console = Console(highlight=False)
26
26
  _msgpack_cache = {}
27
+ _current_chr = None
27
28
 
28
29
  def gather_and_sum(lists):
29
30
  """
@@ -54,13 +55,18 @@ def get_matches(snps_dict, chr_):
54
55
  A list with the number of matches for each version, which is used to infer the reference genome version.
55
56
  """
56
57
 
57
- global _msgpack_cache
58
+ global _msgpack_cache, _current_chr
58
59
  start = time.time()
59
60
 
60
61
  genome_versions = ["hg18", "GRCh37", "GRCh38", "T2T"]
61
62
  matches = []
62
63
 
63
- # Extract arrays once
64
+ # If we've moved on to a different chromosome, drop the previous
65
+ # chromosome's msgpacks before loading the new ones.
66
+ if chr_ != _current_chr:
67
+ _msgpack_cache.clear()
68
+ _current_chr = chr_
69
+
64
70
  positions = np.array(list(snps_dict.keys()), dtype=np.int64)
65
71
  nucleotides = np.array(list(snps_dict.values()))
66
72
 
@@ -76,16 +82,15 @@ def get_matches(snps_dict, chr_):
76
82
 
77
83
  ref_dict = _msgpack_cache[cache_key]
78
84
 
79
- # Vectorized: look up all positions at once, compare arrays
80
85
  ref_nucs = np.array([ref_dict.get(p, None) for p in positions])
81
86
  match_count = int(np.sum(ref_nucs == nucleotides))
82
87
 
83
88
  matches.append([version_name, match_count])
84
89
 
85
- #console.print("Getting matches. Took:", time.time() - start, "s")
86
90
  return matches
87
91
 
88
92
 
93
+
89
94
  def trimming_indels(content, ref):
90
95
  """
91
96
  If a row is longer than one position it is deleted, deleting this way any indels
File without changes
File without changes
File without changes