seqtrimnext 2.0.35 → 2.0.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,3 +1,7 @@
1
+ === 2.0.36 2011-06-24
2
+
3
+ Added clustered installation documentation
4
+
1
5
  === 2.0.35 2011-06-22
2
6
 
3
7
  FarAdapters improvement
data/README.rdoc CHANGED
@@ -46,7 +46,15 @@ Once installed, SeqtrimNEXT is very easy to use:
46
46
 
47
47
  To install core databases (it should be done at installation time):
48
48
 
49
- $> seqtrimnext -i
49
+ $> seqtrimnext -i core
50
+
51
+ Databases will be installed nearby SeqtrimNEXT by default, but you can override this location by setting the environment variable +BASTDB+. Eg.:
52
+
53
+ If you with your database installed at /var:
54
+
55
+ $> export BLASTDB=/var/DB/formatted
56
+
57
+ Be sure that this environment variable is always loaded before SeqtrimNEXT execution (Eg.: add it to /etc/profile.local).
50
58
 
51
59
  To perform an analisys using a predefined template with a FASTQ file format using 4 cpus:
52
60
 
@@ -60,7 +68,52 @@ To perform an analisys using a predefined template with a FASTQ file format:
60
68
  To get additional help and list available templates and databases:
61
69
 
62
70
  $> seqtrimnext -h
71
+
72
+ === CLUSTERED EXECUTION:
73
+
74
+ To take full advantage of a clustered installation, you can launch SeqtrimNEXT in distributed mode. You only need to provide it a list of machine names (or IPs) where workers will be launched.
75
+
76
+ Setup a workers file like this:
77
+
78
+ machine1
79
+ machine1
80
+ machine2
81
+ machine2
82
+ machine2
83
+
84
+ And launch SeqtrimNEXT this way:
85
+
86
+ $> seqtrimnext -t genomics_454.txt -Q input_file_in_FASTQ -w workers_file -s 10.0.0
87
+
88
+ This will launch 2 workers on machine1 and 3 workers on machine2 using the network whose ip starts with 10.0.0 to communicate.
89
+
90
+
91
+ == TEMPLATE MODIFICATIONS
92
+
93
+ You can modify any template to fit your workflow. To do this, you only need to copy one of the templates and edit it with a text editor, or simply modify a used_params.txt file that was produced by a previous SeqtrimNEXT execution.
63
94
 
95
+ Eg.: If you want to disable repetition removal, do this:
96
+
97
+ 1-Copy the template file you wish to customize and name it params.txt.
98
+ 2-Edit params.txt with a text editor
99
+ 3-Find a line like this:
100
+
101
+ remove_clonality = true
102
+
103
+
104
+ 4-Replace this line with:
105
+
106
+ remove_clonality = false
107
+
108
+ 5- Launch SeqtrimNEXT with params.txt file instead of a default template:
109
+
110
+ $> seqtrimnext -t params.txt -f input_file_in_FASTA -q input_file_in_QUAL
111
+
112
+
113
+
114
+ The same way you can modify any of the parameters. You can find all parameters and their description in any used_params.txt file generated by a previous SeqtrimNEXT execution. Parameters not especified in a template are automatically set to their default value at execution time.
115
+
116
+ <b>NOTE</b>: The only mandatory parameter is the plugin_list one.
64
117
 
65
118
  == REQUIREMENTS:
66
119
 
@@ -121,6 +174,8 @@ This will install seqtrimnext and all the required gems.
121
174
  SeqtrimNEXT needs some core databases to work. To install them:
122
175
 
123
176
  seqtrimnext -i core
177
+
178
+ You can change default database location by setting the environment variable +BASTDB+. Refer to SYNOPSIS for an example.
124
179
 
125
180
  === Database modifications
126
181
 
@@ -139,6 +194,89 @@ Once the databases has been modified, you will need to reformat them by issuing
139
194
  Modified databases will be rebuilt.
140
195
 
141
196
 
197
+ == CLUSTERED INSTALLATION
198
+
199
+ To install SeqtrimNEXT into a cluster, you need to have the software available on all machines. By installing it on a shared location, or installing it on each cluster node. Once installed, you need to create a init_file where your environment is correctly setup (paths, BLASTDB, etc):
200
+
201
+ export PATH=/apps/blast+/bin:/apps/cd-hit/bin
202
+ export BLASTDB=/var/DB/formatted
203
+ export SEQTRIMNEXT_INIT=path_to_init_file
204
+
205
+
206
+ And initialize the SEQTRIMNEXT_INIT environment variable on your main node (from where SeqtrimNEXT will be initially launched):
207
+
208
+ export SEQTRIMNEXT_INIT=path_to_init_file
209
+
210
+ If you use any queue system like PBS Pro or Moab/Slurm, be sure to initialize the variables on each submission script.
211
+
212
+ <b>NOTE</b>: all nodes on the cluster should use ssh keys to allow SeqtrimNEXT to launch workers without asking for a password.
213
+
214
+ == SAMPLE INIT FILES FOR CLUSTERED INSTALLATION:
215
+
216
+ === Init file
217
+
218
+ $> cat stn_init_env
219
+
220
+ source ~latex/init_env
221
+ source ~ruby19/init_env
222
+ source ~blast_plus/init_env
223
+ source ~gnuplot/init_env
224
+ source ~cdhit/init_env
225
+
226
+ export BLASTDB=~seqtrimnext/DB/formatted/
227
+ export SEQTRIMNEXT_INIT=~seqtrimnext/stn_init_env
228
+
229
+
230
+ === PBS Submission script
231
+
232
+ $> cat sample_work.sh
233
+
234
+ # 40 distributed workers and 1 GB memory per worker:
235
+ #PBS -l select=40:ncpus=1:mpiprocs=1:mem=1gb
236
+ # request 10 hours of walltime:
237
+ #PBS -l walltime=10:00:00
238
+ # cd to working directory (from where job was submitted)
239
+ cd $PBS_O_WORKDIR
240
+
241
+ # create workers file with assigned node names
242
+
243
+ cat ${PBS_NODEFILE} > workers
244
+
245
+ # init seqtrimnext
246
+ source ~seqtrimnext/init_env
247
+
248
+ time seqtrimnext -t paired_ends.txt -Q fastq -w workers -s 10.0.0
249
+
250
+
251
+ Once this submission script is created, you only need to launch it with:
252
+
253
+ qsub sample_work.sh
254
+
255
+ === MOAB/SLURM submission script
256
+
257
+ $> cat sample_work_moab.sh
258
+
259
+ #!/bin/bash
260
+ # @ job_name = STN
261
+ # @ initialdir = .
262
+ # @ output = STN_%j.out
263
+ # @ error = STN_%j.err
264
+ # @ total_tasks = 40
265
+ # @ wall_clock_limit = 10:00:00
266
+
267
+ # guardar lista de workers
268
+ sl_get_machine_list > workers
269
+
270
+ # init seqtrimnext
271
+ source ~seqtrimnext/init_env
272
+
273
+ time seqtrimnext -t paired_ends.txt -Q fastq -w workers -s 10.0.0
274
+
275
+ Then you only need to submit your job with mnsubmit
276
+
277
+ mnsubmit sample_work_moab.sh
278
+
279
+
142
280
  == LICENSE:
143
281
 
144
282
  (The MIT License)
@@ -36,7 +36,7 @@ ENV['BLASTDB']=$FORMATTED_DB_PATH
36
36
 
37
37
  OUTPUT_PATH='output_files'
38
38
 
39
-
39
+ puts "FORMATTED_DB_BLAST in workers: #{$FORMATTED_DB_PATH}"
40
40
  # $: << File.expand_path('~/progs/ruby/gems/scbi_mapreduce/lib')
41
41
 
42
42
  require 'scbi_mapreduce'
data/lib/seqtrimnext.rb CHANGED
@@ -30,7 +30,7 @@ module Seqtrimnext
30
30
  # SEQTRIM_VERSION_STAGE = 'b'
31
31
  # SEQTRIM_VERSION = "2.0.0#{SEQTRIM_VERSION_STAGE}#{SEQTRIM_VERSION_REVISION}"
32
32
 
33
- VERSION = '2.0.35'
33
+ VERSION = '2.0.36'
34
34
 
35
35
  SEQTRIM_VERSION = VERSION
36
36
 
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: seqtrimnext
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 2.0.35
5
+ version: 2.0.36
6
6
  platform: ruby
7
7
  authors:
8
8
  - Dario Guerrero & Almudena Bocinos
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-06-22 00:00:00 Z
13
+ date: 2011-06-24 00:00:00 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: narray