seqtrimnext 2.0.35 → 2.0.36

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,3 +1,7 @@
1
+ === 2.0.36 2011-06-24
2
+
3
+ Added clustered installation documentation
4
+
1
5
  === 2.0.35 2011-06-22
2
6
 
3
7
  FarAdapters improvement
data/README.rdoc CHANGED
@@ -46,7 +46,15 @@ Once installed, SeqtrimNEXT is very easy to use:
46
46
 
47
47
  To install core databases (it should be done at installation time):
48
48
 
49
- $> seqtrimnext -i
49
+ $> seqtrimnext -i core
50
+
51
+ Databases will be installed nearby SeqtrimNEXT by default, but you can override this location by setting the environment variable +BASTDB+. Eg.:
52
+
53
+ If you with your database installed at /var:
54
+
55
+ $> export BLASTDB=/var/DB/formatted
56
+
57
+ Be sure that this environment variable is always loaded before SeqtrimNEXT execution (Eg.: add it to /etc/profile.local).
50
58
 
51
59
  To perform an analisys using a predefined template with a FASTQ file format using 4 cpus:
52
60
 
@@ -60,7 +68,52 @@ To perform an analisys using a predefined template with a FASTQ file format:
60
68
  To get additional help and list available templates and databases:
61
69
 
62
70
  $> seqtrimnext -h
71
+
72
+ === CLUSTERED EXECUTION:
73
+
74
+ To take full advantage of a clustered installation, you can launch SeqtrimNEXT in distributed mode. You only need to provide it a list of machine names (or IPs) where workers will be launched.
75
+
76
+ Setup a workers file like this:
77
+
78
+ machine1
79
+ machine1
80
+ machine2
81
+ machine2
82
+ machine2
83
+
84
+ And launch SeqtrimNEXT this way:
85
+
86
+ $> seqtrimnext -t genomics_454.txt -Q input_file_in_FASTQ -w workers_file -s 10.0.0
87
+
88
+ This will launch 2 workers on machine1 and 3 workers on machine2 using the network whose ip starts with 10.0.0 to communicate.
89
+
90
+
91
+ == TEMPLATE MODIFICATIONS
92
+
93
+ You can modify any template to fit your workflow. To do this, you only need to copy one of the templates and edit it with a text editor, or simply modify a used_params.txt file that was produced by a previous SeqtrimNEXT execution.
63
94
 
95
+ Eg.: If you want to disable repetition removal, do this:
96
+
97
+ 1-Copy the template file you wish to customize and name it params.txt.
98
+ 2-Edit params.txt with a text editor
99
+ 3-Find a line like this:
100
+
101
+ remove_clonality = true
102
+
103
+
104
+ 4-Replace this line with:
105
+
106
+ remove_clonality = false
107
+
108
+ 5- Launch SeqtrimNEXT with params.txt file instead of a default template:
109
+
110
+ $> seqtrimnext -t params.txt -f input_file_in_FASTA -q input_file_in_QUAL
111
+
112
+
113
+
114
+ The same way you can modify any of the parameters. You can find all parameters and their description in any used_params.txt file generated by a previous SeqtrimNEXT execution. Parameters not especified in a template are automatically set to their default value at execution time.
115
+
116
+ <b>NOTE</b>: The only mandatory parameter is the plugin_list one.
64
117
 
65
118
  == REQUIREMENTS:
66
119
 
@@ -121,6 +174,8 @@ This will install seqtrimnext and all the required gems.
121
174
  SeqtrimNEXT needs some core databases to work. To install them:
122
175
 
123
176
  seqtrimnext -i core
177
+
178
+ You can change default database location by setting the environment variable +BASTDB+. Refer to SYNOPSIS for an example.
124
179
 
125
180
  === Database modifications
126
181
 
@@ -139,6 +194,89 @@ Once the databases has been modified, you will need to reformat them by issuing
139
194
  Modified databases will be rebuilt.
140
195
 
141
196
 
197
+ == CLUSTERED INSTALLATION
198
+
199
+ To install SeqtrimNEXT into a cluster, you need to have the software available on all machines. By installing it on a shared location, or installing it on each cluster node. Once installed, you need to create a init_file where your environment is correctly setup (paths, BLASTDB, etc):
200
+
201
+ export PATH=/apps/blast+/bin:/apps/cd-hit/bin
202
+ export BLASTDB=/var/DB/formatted
203
+ export SEQTRIMNEXT_INIT=path_to_init_file
204
+
205
+
206
+ And initialize the SEQTRIMNEXT_INIT environment variable on your main node (from where SeqtrimNEXT will be initially launched):
207
+
208
+ export SEQTRIMNEXT_INIT=path_to_init_file
209
+
210
+ If you use any queue system like PBS Pro or Moab/Slurm, be sure to initialize the variables on each submission script.
211
+
212
+ <b>NOTE</b>: all nodes on the cluster should use ssh keys to allow SeqtrimNEXT to launch workers without asking for a password.
213
+
214
+ == SAMPLE INIT FILES FOR CLUSTERED INSTALLATION:
215
+
216
+ === Init file
217
+
218
+ $> cat stn_init_env
219
+
220
+ source ~latex/init_env
221
+ source ~ruby19/init_env
222
+ source ~blast_plus/init_env
223
+ source ~gnuplot/init_env
224
+ source ~cdhit/init_env
225
+
226
+ export BLASTDB=~seqtrimnext/DB/formatted/
227
+ export SEQTRIMNEXT_INIT=~seqtrimnext/stn_init_env
228
+
229
+
230
+ === PBS Submission script
231
+
232
+ $> cat sample_work.sh
233
+
234
+ # 40 distributed workers and 1 GB memory per worker:
235
+ #PBS -l select=40:ncpus=1:mpiprocs=1:mem=1gb
236
+ # request 10 hours of walltime:
237
+ #PBS -l walltime=10:00:00
238
+ # cd to working directory (from where job was submitted)
239
+ cd $PBS_O_WORKDIR
240
+
241
+ # create workers file with assigned node names
242
+
243
+ cat ${PBS_NODEFILE} > workers
244
+
245
+ # init seqtrimnext
246
+ source ~seqtrimnext/init_env
247
+
248
+ time seqtrimnext -t paired_ends.txt -Q fastq -w workers -s 10.0.0
249
+
250
+
251
+ Once this submission script is created, you only need to launch it with:
252
+
253
+ qsub sample_work.sh
254
+
255
+ === MOAB/SLURM submission script
256
+
257
+ $> cat sample_work_moab.sh
258
+
259
+ #!/bin/bash
260
+ # @ job_name = STN
261
+ # @ initialdir = .
262
+ # @ output = STN_%j.out
263
+ # @ error = STN_%j.err
264
+ # @ total_tasks = 40
265
+ # @ wall_clock_limit = 10:00:00
266
+
267
+ # guardar lista de workers
268
+ sl_get_machine_list > workers
269
+
270
+ # init seqtrimnext
271
+ source ~seqtrimnext/init_env
272
+
273
+ time seqtrimnext -t paired_ends.txt -Q fastq -w workers -s 10.0.0
274
+
275
+ Then you only need to submit your job with mnsubmit
276
+
277
+ mnsubmit sample_work_moab.sh
278
+
279
+
142
280
  == LICENSE:
143
281
 
144
282
  (The MIT License)
@@ -36,7 +36,7 @@ ENV['BLASTDB']=$FORMATTED_DB_PATH
36
36
 
37
37
  OUTPUT_PATH='output_files'
38
38
 
39
-
39
+ puts "FORMATTED_DB_BLAST in workers: #{$FORMATTED_DB_PATH}"
40
40
  # $: << File.expand_path('~/progs/ruby/gems/scbi_mapreduce/lib')
41
41
 
42
42
  require 'scbi_mapreduce'
data/lib/seqtrimnext.rb CHANGED
@@ -30,7 +30,7 @@ module Seqtrimnext
30
30
  # SEQTRIM_VERSION_STAGE = 'b'
31
31
  # SEQTRIM_VERSION = "2.0.0#{SEQTRIM_VERSION_STAGE}#{SEQTRIM_VERSION_REVISION}"
32
32
 
33
- VERSION = '2.0.35'
33
+ VERSION = '2.0.36'
34
34
 
35
35
  SEQTRIM_VERSION = VERSION
36
36
 
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: seqtrimnext
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 2.0.35
5
+ version: 2.0.36
6
6
  platform: ruby
7
7
  authors:
8
8
  - Dario Guerrero & Almudena Bocinos
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-06-22 00:00:00 Z
13
+ date: 2011-06-24 00:00:00 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: narray