PyamilySeq 0.5.2__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
PyamilySeq/Constants.py CHANGED
@@ -1,2 +1,2 @@
1
- PyamilySeq_Version = 'v0.5.2'
1
+ PyamilySeq_Version = 'v0.6.0'
2
2
 
PyamilySeq/PyamilySeq.py CHANGED
@@ -7,11 +7,13 @@ import subprocess
7
7
 
8
8
 
9
9
  try:
10
- from .PyamilySeq_Species import cluster
10
+ from .PyamilySeq_Species import cluster as species_cluster
11
+ from .PyamilySeq_Genus import cluster as genus_cluster
11
12
  from .Constants import *
12
13
  from .utils import *
13
14
  except (ModuleNotFoundError, ImportError, NameError, TypeError) as error:
14
- from PyamilySeq_Species import cluster
15
+ from PyamilySeq_Species import cluster as species_cluster
16
+ from PyamilySeq_Genus import cluster as genus_cluster
15
17
  from Constants import *
16
18
  from utils import *
17
19
 
@@ -44,8 +46,8 @@ def main():
44
46
  required.add_argument('-run_mode', action='store', dest='run_mode', choices=['Full','Partial'],
45
47
  help='Run Mode: Should PyamilySeq be run in "Full" or "Partial" mode?',
46
48
  required=True)
47
- required.add_argument('-group_mode', action='store', dest='group_type', choices=['Species'],
48
- help='Group Mode: Should PyamilySeq be run in "Species" or "Genus" mode? - Genus mode not currently functioning',
49
+ required.add_argument('-group_mode', action='store', dest='group_type', choices=['Species', 'Genus'],
50
+ help='Group Mode: Should PyamilySeq be run in "Species" or "Genus" mode? ',
49
51
  required=True)
50
52
  required.add_argument("-clust_tool", action="store", dest="clust_tool", choices=['CD-HIT'],
51
53
  help="Clustering tool to use: CD-HIT, DIAMOND, BLAST or MMseqs2.",
@@ -88,13 +90,17 @@ def main():
88
90
 
89
91
  ###Grouping Arguments
90
92
  grouping_args = parser.add_argument_group('Grouping Arguments - Use to fine-tune grouping of genes after clustering')
91
- grouping_args.add_argument('-reclustered', action='store', dest='reclustered', help='Clustering output file from secondary round of clustering',
93
+ grouping_args.add_argument('-reclustered', action='store', dest='reclustered',
94
+ help='Currently only works on Partial Mode: Clustering output file from secondary round of clustering.',
92
95
  required=False)
93
96
  grouping_args.add_argument('-seq_tag', action='store', dest='sequence_tag', default='StORF',
94
97
  help='Default - "StORF": Unique identifier to be used to distinguish the second of two rounds of clustered sequences',
95
98
  required=False)
96
- grouping_args.add_argument('-groups', action="store", dest='core_groups', default="99,95,15",
97
- help='Default - (\'99,95,15\'): Gene family groups to use',
99
+ grouping_args.add_argument('-core_groups', action="store", dest='core_groups', default="99,95,15",
100
+ help='Default - (\'99,95,15\'): Gene family groups to use for "Species" mode',
101
+ required=False)
102
+ grouping_args.add_argument('-genus_groups', action="store", dest='genus_groups', default="1,2,3,4,5,6",
103
+ help='Default - (\'1,2,3,4,5,6\'): Gene family groups to use for "Genus" mode',
98
104
  required=False)
99
105
 
100
106
  ###Output Arguments
@@ -126,6 +132,8 @@ def main():
126
132
 
127
133
  ### Checking all required parameters are provided by user
128
134
  if options.run_mode == 'Full':
135
+ if options.reclustered != None:
136
+ sys.exit("Currently reclustering only works on Partial Mode.")
129
137
  required_full_mode = [options.input_type, options.input_dir, options.name_split, options.clust_tool,
130
138
  options.pident, options.len_diff]
131
139
  if all(required_full_mode):
@@ -165,7 +173,7 @@ def main():
165
173
  else:
166
174
  exit("mafft is not installed. Please install mafft to proceed.")
167
175
  ##CD-HIT
168
- if options.clust_tool == 'CD-HIT':
176
+ if options.clust_tool == 'CD-HIT' and options.run_mode == 'Full':
169
177
  if is_tool_installed('cd-hit'):
170
178
  if options.verbose == True:
171
179
  print("cd-hit is installed. Proceeding with clustering.")
@@ -175,7 +183,7 @@ def main():
175
183
  if options.write_families != None and options.original_fasta == False:
176
184
  exit("-fasta must br provided if -w is used")
177
185
 
178
- options.core_groups = options.core_groups + ',0'
186
+
179
187
 
180
188
 
181
189
  if options.cluster_file:
@@ -191,24 +199,30 @@ def main():
191
199
  combined_out_file = os.path.join(output_path, "combined_sequences.fasta")
192
200
  clustering_output = os.path.join(output_path, 'clustering_' + options.clust_tool)
193
201
 
194
-
195
- if options.run_mode == 'Full':
196
-
202
+ if options.group_type == 'Species':
203
+ options.core_groups = options.core_groups + ',0'
204
+ groups_to_use = options.core_groups
205
+ else:
206
+ options.genus_groups = options.genus_groups + ',>'
207
+ groups_to_use = options.genus_groups
197
208
 
198
209
 
210
+ if options.run_mode == 'Full':
199
211
  if options.input_type == 'separate':
200
212
  read_separate_files(options.input_dir, options.name_split, combined_out_file)
201
213
  else:
202
214
  read_combined_files(options.input_dir, options.name_split, combined_out_file)
203
215
 
204
216
  run_cd_hit(combined_out_file, clustering_output, options)
217
+
205
218
  class clustering_options:
206
219
  def __init__(self):
207
220
  self.cluster_format = options.clust_tool
208
221
  self.reclustered = options.reclustered
209
222
  self.sequence_tag = options.sequence_tag
210
- self.core_groups = '99,95,15,0'
223
+ self.core_groups = groups_to_use
211
224
  self.clusters = clustering_output + clust_affix
225
+ self.output_dir = options.output_dir
212
226
  self.gene_presence_absence_out = options.gene_presence_absence_out
213
227
  self.write_families = options.write_families
214
228
  self.con_core = options.con_core
@@ -223,8 +237,9 @@ def main():
223
237
  self.cluster_format = options.clust_tool
224
238
  self.reclustered = options.reclustered
225
239
  self.sequence_tag = options.sequence_tag
226
- self.core_groups = '99,95,15,0'
240
+ self.core_groups = groups_to_use
227
241
  self.clusters = options.cluster_file
242
+ self.output_dir = options.output_dir
228
243
  self.gene_presence_absence_out = options.gene_presence_absence_out
229
244
  self.write_families = options.write_families
230
245
  self.con_core = options.con_core
@@ -234,9 +249,10 @@ def main():
234
249
  clustering_options = clustering_options()
235
250
 
236
251
 
237
-
238
-
239
- cluster(clustering_options)
252
+ if options.group_type == 'Species':
253
+ species_cluster(clustering_options)
254
+ elif options.group_type == 'Genus':
255
+ genus_cluster((clustering_options))
240
256
 
241
257
  print("Thank you for using PyamilySeq -- A detailed user manual can be found at https://github.com/NickJD/PyamilySeq\n"
242
258
  "Please report any issues to: https://github.com/NickJD/PyamilySeq/issues\n#####")