seqtrimnext 2.0.29
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +3 -0
- data/Manifest.txt +114 -0
- data/PostInstall.txt +7 -0
- data/README.rdoc +159 -0
- data/Rakefile +38 -0
- data/bin/create_graphs.rb +46 -0
- data/bin/extract_seqs.rb +45 -0
- data/bin/extract_seqs_from_fasta.rb +56 -0
- data/bin/extract_seqs_from_fastq.rb +45 -0
- data/bin/fasta2fastq.rb +38 -0
- data/bin/fastq2fasta.rb +35 -0
- data/bin/gen_qual.rb +46 -0
- data/bin/get_seq.rb +46 -0
- data/bin/group_by_range.rb +17 -0
- data/bin/join_ilumina_paired.rb +130 -0
- data/bin/parse_amplicons.rb +95 -0
- data/bin/parse_json_results.rb +66 -0
- data/bin/parse_params.rb +82 -0
- data/bin/resume_clusters.rb +48 -0
- data/bin/resume_rejected.sh +9 -0
- data/bin/reverse_paired.rb +49 -0
- data/bin/seqtrimnext +368 -0
- data/bin/split_fastq.rb +42 -0
- data/bin/split_ilumina_paired.rb +65 -0
- data/bin/split_paired.rb +70 -0
- data/lib/seqtrimnext/actions/action_ab_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_ab_far_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_ab_left_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_empty_insert.rb +22 -0
- data/lib/seqtrimnext/actions/action_ignore_repeated.rb +24 -0
- data/lib/seqtrimnext/actions/action_indetermination.rb +30 -0
- data/lib/seqtrimnext/actions/action_induced_low_complexity.rb +29 -0
- data/lib/seqtrimnext/actions/action_insert.rb +32 -0
- data/lib/seqtrimnext/actions/action_is_contaminated.rb +30 -0
- data/lib/seqtrimnext/actions/action_key.rb +30 -0
- data/lib/seqtrimnext/actions/action_left_adapter.rb +32 -0
- data/lib/seqtrimnext/actions/action_left_primer.rb +17 -0
- data/lib/seqtrimnext/actions/action_linker.rb +30 -0
- data/lib/seqtrimnext/actions/action_low_complexity.rb +30 -0
- data/lib/seqtrimnext/actions/action_low_high_size.rb +31 -0
- data/lib/seqtrimnext/actions/action_low_quality.rb +33 -0
- data/lib/seqtrimnext/actions/action_mid.rb +30 -0
- data/lib/seqtrimnext/actions/action_multiple_linker.rb +29 -0
- data/lib/seqtrimnext/actions/action_paired_reads.rb +28 -0
- data/lib/seqtrimnext/actions/action_poly_a.rb +29 -0
- data/lib/seqtrimnext/actions/action_poly_t.rb +29 -0
- data/lib/seqtrimnext/actions/action_rem_adit_artifacts.rb +32 -0
- data/lib/seqtrimnext/actions/action_right_adapter.rb +29 -0
- data/lib/seqtrimnext/actions/action_right_primer.rb +25 -0
- data/lib/seqtrimnext/actions/action_short_insert.rb +32 -0
- data/lib/seqtrimnext/actions/action_unexpected_poly_t.rb +29 -0
- data/lib/seqtrimnext/actions/action_unexpected_vector.rb +31 -0
- data/lib/seqtrimnext/actions/action_vectors.rb +31 -0
- data/lib/seqtrimnext/actions/seqtrim_action.rb +136 -0
- data/lib/seqtrimnext/classes/action_manager.rb +47 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_work_manager.rb +335 -0
- data/lib/seqtrimnext/classes/em_classes/seqtrim_worker.rb +290 -0
- data/lib/seqtrimnext/classes/extract_stats.rb +255 -0
- data/lib/seqtrimnext/classes/gnu_plot_graph.rb +140 -0
- data/lib/seqtrimnext/classes/graph_stats.rb +74 -0
- data/lib/seqtrimnext/classes/install_database.rb +43 -0
- data/lib/seqtrimnext/classes/install_requirements.rb +123 -0
- data/lib/seqtrimnext/classes/list_db.rb +49 -0
- data/lib/seqtrimnext/classes/make_blast_db.rb +113 -0
- data/lib/seqtrimnext/classes/one_blast.rb +41 -0
- data/lib/seqtrimnext/classes/params.rb +387 -0
- data/lib/seqtrimnext/classes/piro.rb +78 -0
- data/lib/seqtrimnext/classes/plugin_manager.rb +153 -0
- data/lib/seqtrimnext/classes/scan_for_restr_site.rb +138 -0
- data/lib/seqtrimnext/classes/scbi_stats.rb +68 -0
- data/lib/seqtrimnext/classes/seqtrim.rb +317 -0
- data/lib/seqtrimnext/classes/sequence.rb +55 -0
- data/lib/seqtrimnext/classes/sequence_group.rb +72 -0
- data/lib/seqtrimnext/classes/sequence_with_action.rb +503 -0
- data/lib/seqtrimnext/plugins/plugin.rb +267 -0
- data/lib/seqtrimnext/plugins/plugin_ab_adapters.rb +189 -0
- data/lib/seqtrimnext/plugins/plugin_adapters.rb +165 -0
- data/lib/seqtrimnext/plugins/plugin_amplicons.rb +221 -0
- data/lib/seqtrimnext/plugins/plugin_contaminants.rb +209 -0
- data/lib/seqtrimnext/plugins/plugin_extract_inserts.rb +438 -0
- data/lib/seqtrimnext/plugins/plugin_find_poly_at.rb +393 -0
- data/lib/seqtrimnext/plugins/plugin_ignore_repeated.rb +101 -0
- data/lib/seqtrimnext/plugins/plugin_indeterminations.rb +199 -0
- data/lib/seqtrimnext/plugins/plugin_key.rb +70 -0
- data/lib/seqtrimnext/plugins/plugin_linker.rb +232 -0
- data/lib/seqtrimnext/plugins/plugin_low_complexity.rb +98 -0
- data/lib/seqtrimnext/plugins/plugin_low_high_size.rb +74 -0
- data/lib/seqtrimnext/plugins/plugin_low_quality.rb +394 -0
- data/lib/seqtrimnext/plugins/plugin_mids.rb +231 -0
- data/lib/seqtrimnext/plugins/plugin_rem_adit_artifacts.rb +246 -0
- data/lib/seqtrimnext/plugins/plugin_short_insert.rb +244 -0
- data/lib/seqtrimnext/plugins/plugin_vectors.rb +191 -0
- data/lib/seqtrimnext/templates/amplicons.txt +16 -0
- data/lib/seqtrimnext/templates/genomics_454.txt +5 -0
- data/lib/seqtrimnext/templates/genomics_454_with_paired.txt +5 -0
- data/lib/seqtrimnext/templates/low_quality.txt +5 -0
- data/lib/seqtrimnext/templates/low_quality_and_low_complexity.txt +5 -0
- data/lib/seqtrimnext/templates/transcriptomics_454.txt +8 -0
- data/lib/seqtrimnext/templates/transcriptomics_plants.txt +8 -0
- data/lib/seqtrimnext/utils/extract_samples.rb +52 -0
- data/lib/seqtrimnext/utils/fasta2xml.rb +69 -0
- data/lib/seqtrimnext/utils/global_match.rb +65 -0
- data/lib/seqtrimnext/utils/hash_stats.rb +29 -0
- data/lib/seqtrimnext/utils/json_utils.rb +50 -0
- data/lib/seqtrimnext/utils/load_fasta_names_in_hash.rb +37 -0
- data/lib/seqtrimnext/utils/load_qual_in_hash.rb +37 -0
- data/lib/seqtrimnext/utils/recover_mid.rb +95 -0
- data/lib/seqtrimnext/utils/string_utils.rb +56 -0
- data/lib/seqtrimnext.rb +37 -0
- data/script/console +10 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/test/test_helper.rb +3 -0
- data/test/test_seqtrimnext.rb +11 -0
- metadata +318 -0
data/bin/seqtrimnext
ADDED
@@ -0,0 +1,368 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# SeqTrimNext: Next generation sequencing preprocessor
|
3
|
+
# Copyright (C) <2011>
|
4
|
+
# Authors: Almudena Bocinos Rioboo, Diego Dario Guerrero Fernandez,
|
5
|
+
# Rocio Bautista Moreno, Juan Falgueras Cano & M. Gonzalo Claros
|
6
|
+
# email: soporte@scbi.uma.es - http://www.scbi.uma.es
|
7
|
+
#
|
8
|
+
# This program is free software: you can redistribute it and/or modify
|
9
|
+
# it under the terms of the GNU Affero General Public License as published by
|
10
|
+
# the Free Software Foundation, either version 3 of the License, or
|
11
|
+
# (at your option) any later version.
|
12
|
+
#
|
13
|
+
# This program is distributed in the hope that it will be useful,
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16
|
+
# GNU Affero General Public License for more details.
|
17
|
+
#
|
18
|
+
# You should have received a copy of the GNU Affero General Public License
|
19
|
+
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
20
|
+
|
21
|
+
#= SEQTRIM II
|
22
|
+
#
|
23
|
+
#== Running
|
24
|
+
#
|
25
|
+
# Seqtrim can be run locally or in a parallel/distributted environment.
|
26
|
+
#
|
27
|
+
#=== Running locally
|
28
|
+
#* list
|
29
|
+
#
|
30
|
+
#=== Running in a distributted environment
|
31
|
+
#
|
32
|
+
#== SEC 2
|
33
|
+
#
|
34
|
+
#=== SUB 2.1
|
35
|
+
#
|
36
|
+
|
37
|
+
|
38
|
+
# #finds the classes that were in the folder 'classes'
|
39
|
+
# ROOT_PATH=File.dirname(__FILE__)
|
40
|
+
# $: << File.expand_path(File.join(ROOT_PATH, 'classes'))
|
41
|
+
#
|
42
|
+
# #finds the classes that were in the folder 'plugins'
|
43
|
+
# $: << File.expand_path(File.join(ROOT_PATH, 'plugins'))
|
44
|
+
#
|
45
|
+
#
|
46
|
+
# #finds the classes that were in the folder 'plugins'
|
47
|
+
# $: << File.expand_path(File.join(ROOT_PATH, 'actions'))
|
48
|
+
#
|
49
|
+
# #finds the classes that were in the folder 'utils'
|
50
|
+
# $: << File.expand_path(File.join(ROOT_PATH, 'utils'))
|
51
|
+
#
|
52
|
+
# $: << File.expand_path(File.join(ROOT_PATH, 'classes','em_classes'))
|
53
|
+
|
54
|
+
# to test scbi_drb gem locally
|
55
|
+
# $: << File.expand_path('~/progs/ruby/gems/scbi_drb/lib/')
|
56
|
+
|
57
|
+
# $: << File.expand_path(ROOT_PATH)
|
58
|
+
|
59
|
+
$: << File.expand_path('~/progs/ruby/gems/seqtrimnext/lib/')
|
60
|
+
|
61
|
+
require 'seqtrimnext'
|
62
|
+
|
63
|
+
############ PATHS #######################
|
64
|
+
$SEQTRIM_PATH = ROOT_PATH
|
65
|
+
|
66
|
+
# if there is a BLASTDB environment var, then use it
|
67
|
+
if ENV['BLASTDB']# && Dir.exists?(ENV['BLASTDB'])
|
68
|
+
$FORMATTED_DB_PATH = ENV['BLASTDB']
|
69
|
+
$DB_PATH = File.dirname($FORMATTED_DB_PATH)
|
70
|
+
else # otherwise use ROOTPATH + DB
|
71
|
+
$FORMATTED_DB_PATH = File.expand_path(File.join(ROOT_PATH, "DB",'formatted'))
|
72
|
+
$DB_PATH = File.expand_path(File.join(ROOT_PATH, "DB"))
|
73
|
+
end
|
74
|
+
|
75
|
+
ENV['BLASTDB']=$FORMATTED_DB_PATH
|
76
|
+
|
77
|
+
OUTPUT_PATH='output_files'
|
78
|
+
|
79
|
+
|
80
|
+
# TODO - COMENTAR todas las clases y metodos para que salga la descripcion cuando hagas rdoc en el terminal
|
81
|
+
|
82
|
+
#Checks install requeriments
|
83
|
+
require 'install_requirements'
|
84
|
+
|
85
|
+
ins = InstallRequirements.new
|
86
|
+
if (!ins.check_install_requirements)
|
87
|
+
exit
|
88
|
+
end
|
89
|
+
|
90
|
+
require "logger"
|
91
|
+
require 'optparse'
|
92
|
+
require "global_match"
|
93
|
+
require "seqtrim"
|
94
|
+
require "params.rb"
|
95
|
+
require "plugin.rb"
|
96
|
+
require "sequence.rb"
|
97
|
+
require "plugin_manager.rb"
|
98
|
+
require "make_blast_db"
|
99
|
+
require 'hash_stats'
|
100
|
+
require 'list_db'
|
101
|
+
require 'install_database'
|
102
|
+
require 'socket'
|
103
|
+
|
104
|
+
|
105
|
+
def show_additional_help
|
106
|
+
|
107
|
+
puts "\n"*3
|
108
|
+
puts "E.g.: processing a fastq sequences file"
|
109
|
+
puts "#{$0} -t genomics_454.txt -Q sequences.fastq"
|
110
|
+
puts "\n"*2
|
111
|
+
|
112
|
+
puts "E.g.: processing a fasta file with qual"
|
113
|
+
puts "#{$0} -t genomics_454.txt -f sequences.fasta -q sequences.qual"
|
114
|
+
|
115
|
+
templates = Dir.glob(File.join($SEQTRIM_PATH,'templates','*.txt')).map{|t| File.basename(t)}
|
116
|
+
|
117
|
+
puts "\n\n ========================================================================================================"
|
118
|
+
puts " Available templates to use with -t option (you can also use your own template):"
|
119
|
+
puts " Templates at: #{File.join($SEQTRIM_PATH,'templates')}"
|
120
|
+
puts " ========================================================================================================\n\n"
|
121
|
+
|
122
|
+
templates.map{|e| puts " "+e}
|
123
|
+
|
124
|
+
puts "\n\n ========================================================================================================"
|
125
|
+
puts " Available databases to use in custom template files (you can also use your own database):"
|
126
|
+
puts " Databases at: #{$DB_PATH}"
|
127
|
+
puts " ========================================================================================================\n\n"
|
128
|
+
|
129
|
+
ListDb.list_databases($DB_PATH).map{|e| puts " "+e}
|
130
|
+
#
|
131
|
+
# ip_list = Socket.ip_address_list.select{|e| e.ipv4?}.map{|e| e.ip_address}
|
132
|
+
#
|
133
|
+
# puts ip_list
|
134
|
+
|
135
|
+
|
136
|
+
exit
|
137
|
+
|
138
|
+
end
|
139
|
+
|
140
|
+
|
141
|
+
# Reads the parameters from console. For this is used ARGV, that is an array.
|
142
|
+
options = {}
|
143
|
+
|
144
|
+
optparse = OptionParser.new do |opts|
|
145
|
+
|
146
|
+
# Set a banner, displayed at the top
|
147
|
+
# of the help screen.
|
148
|
+
opts.banner = "Usage: #{$0} -t template_file \{-Q fastaQ_file | -f fasta_file -q qual_file\} [options]"
|
149
|
+
|
150
|
+
# Define the options, and what they do
|
151
|
+
#options[:server_ip] = '127.0.0.1'
|
152
|
+
options[:server_ip] = '0.0.0.0'
|
153
|
+
opts.on( '-s', '--server IP', 'Server ip. Can use a partial ip to select the apropriate interface' ) do |server_ip|
|
154
|
+
|
155
|
+
# get list of available ips
|
156
|
+
ip_list = Socket.ip_address_list.select{|e| e.ipv4?}.map{|e| e.ip_address}
|
157
|
+
|
158
|
+
ip=ip_list.select{|ip| ip.index(server_ip)==0}.first
|
159
|
+
|
160
|
+
if !ip
|
161
|
+
ip='0.0.0.0'
|
162
|
+
# $LOG.info("No available ip matching #{server_ip}")
|
163
|
+
end
|
164
|
+
# $ .info("Using ip #{ip}")
|
165
|
+
options[:server_ip] = ip
|
166
|
+
end
|
167
|
+
|
168
|
+
options[:port] = 0 #50000
|
169
|
+
opts.on( '-p', '--port PORT', 'Server port. If set to 0, an arbitrary empty port will be used') do |port|
|
170
|
+
options[:port] = port.to_i
|
171
|
+
end
|
172
|
+
|
173
|
+
options[:workers] = 2
|
174
|
+
|
175
|
+
opts.on( '-w', '--workers COUNT', 'Number of workers, or file containing machine names to launch workers with ssh' ) do |workers|
|
176
|
+
if File.exists?(workers)
|
177
|
+
# use workers file
|
178
|
+
options[:workers] = File.read(workers).split("\n").map{|w| w.chomp}
|
179
|
+
else
|
180
|
+
begin
|
181
|
+
options[:workers] = Integer(workers)
|
182
|
+
rescue
|
183
|
+
STDERR.puts "ERROR:Invalid workers parameter #{options[:workers]}"
|
184
|
+
exit
|
185
|
+
end
|
186
|
+
|
187
|
+
end
|
188
|
+
|
189
|
+
end
|
190
|
+
|
191
|
+
|
192
|
+
|
193
|
+
options[:only_workers] = false
|
194
|
+
opts.on( '-o', '--only_workers', 'Only launch workers' ) do
|
195
|
+
options[:only_workers] = true
|
196
|
+
end
|
197
|
+
|
198
|
+
options[:check_db] = false
|
199
|
+
opts.on( '-c', '--check_databases', 'Check Blast databases and reformat if necessary' ) do
|
200
|
+
options[:check_db] = true
|
201
|
+
end
|
202
|
+
|
203
|
+
options[:install_db] = nil
|
204
|
+
opts.on( '-i', '--install_databases TYPE', 'Install base databases and reformat them if necessary') do |db_type|
|
205
|
+
options[:install_db] = db_type
|
206
|
+
end
|
207
|
+
|
208
|
+
options[:logfile] = STDOUT
|
209
|
+
opts.on( '-l', '--logfile FILE', 'Write log to FILE' ) do |file|
|
210
|
+
options[:logfile] = file
|
211
|
+
end
|
212
|
+
|
213
|
+
options[:fastq] = nil
|
214
|
+
opts.on( '-Q', '--fastq FILE', 'Fastq input file. Use - for <STDIN>' ) do |file|
|
215
|
+
options[:fastq] = file
|
216
|
+
end
|
217
|
+
|
218
|
+
options[:fasta] = nil
|
219
|
+
opts.on( '-f', '--fasta FILE', 'Fasta input file' ) do |file|
|
220
|
+
options[:fasta] = file
|
221
|
+
end
|
222
|
+
|
223
|
+
options[:qual] = nil
|
224
|
+
opts.on( '-q', '--qual FILE', 'Qual input file' ) do |file|
|
225
|
+
options[:qual] = file
|
226
|
+
end
|
227
|
+
|
228
|
+
|
229
|
+
|
230
|
+
options[:list_db] = nil
|
231
|
+
opts.on( '-L', '--list_db DB_NAME', 'List entries IDs in DB_NAME. Use "-L all" to view all available databases' ) do |value|
|
232
|
+
options[:list_db] = value
|
233
|
+
end
|
234
|
+
|
235
|
+
options[:gen_params] = false
|
236
|
+
opts.on( '-G', '--generate_template', 'Generates a sample template file with default parameters' ) do
|
237
|
+
options[:gen_params] = true
|
238
|
+
end
|
239
|
+
|
240
|
+
options[:template] = nil
|
241
|
+
opts.on( '-t', '--template TEMPLATE_FILE', 'Use TEMPLATE_FILE instead of default parameters' ) do |file|
|
242
|
+
options[:template] = file
|
243
|
+
end
|
244
|
+
|
245
|
+
options[:chunk_size] = 100
|
246
|
+
opts.on( '-g', '--group_size chunk_size', 'Group sequences in chunks of size <chunk_size>' ) do |cs|
|
247
|
+
options[:chunk_size] = cs.to_i
|
248
|
+
end
|
249
|
+
|
250
|
+
|
251
|
+
options[:json] = nil
|
252
|
+
opts.on( '-j', '--json', 'Save results in json file' ) do
|
253
|
+
options[:json] = true
|
254
|
+
end
|
255
|
+
|
256
|
+
# This displays the help screen, all programs are
|
257
|
+
# assumed to have this option.
|
258
|
+
opts.on_tail( '-h', '--help', 'Display this screen' ) do
|
259
|
+
puts opts
|
260
|
+
show_additional_help
|
261
|
+
exit
|
262
|
+
end
|
263
|
+
end
|
264
|
+
|
265
|
+
# parse options and remove from ARGV
|
266
|
+
optparse.parse!
|
267
|
+
|
268
|
+
if options[:list_db] then
|
269
|
+
# List database entries in a database
|
270
|
+
ListDb.new($DB_PATH,options[:list_db])
|
271
|
+
exit
|
272
|
+
end
|
273
|
+
|
274
|
+
if options[:gen_params] then
|
275
|
+
# Generates a sample params file in current directory
|
276
|
+
Params.generate_sample_params
|
277
|
+
exit
|
278
|
+
end
|
279
|
+
|
280
|
+
#set logger
|
281
|
+
# system('rm logs/*')
|
282
|
+
FileUtils.mkdir('logs') if !File.exists?('logs')
|
283
|
+
|
284
|
+
$LOG = Logger.new(options[:logfile])
|
285
|
+
$LOG.datetime_format = "%Y-%m-%d %H:%M:%S"
|
286
|
+
#logger.level = Logger::INFO
|
287
|
+
|
288
|
+
#DEBUG < INFO < WARN < ERROR < FATAL < UNKNOWN
|
289
|
+
|
290
|
+
|
291
|
+
$LOG.info("Using BLASTDB: "+ $FORMATTED_DB_PATH)
|
292
|
+
$LOG.info("Using options: "+ options.to_json)
|
293
|
+
|
294
|
+
if options[:install_db] then
|
295
|
+
#install databases
|
296
|
+
InstallDatabase.new(options[:install_db],$DB_PATH)
|
297
|
+
|
298
|
+
# reformat databases
|
299
|
+
MakeBlastDb.new($DB_PATH)
|
300
|
+
exit
|
301
|
+
end
|
302
|
+
|
303
|
+
if !File.exists?($FORMATTED_DB_PATH)
|
304
|
+
STDERR.puts "Database path not found: #{$FORMATTED_DB_PATH}. \n\n\nInstall databases to this path or set your BLASTDB environment variable (eg.: export BLASTDB=new_path)"
|
305
|
+
exit
|
306
|
+
end
|
307
|
+
|
308
|
+
|
309
|
+
if options[:check_db] then
|
310
|
+
# check and format blast databases
|
311
|
+
MakeBlastDb.new($DB_PATH)
|
312
|
+
exit
|
313
|
+
end
|
314
|
+
|
315
|
+
required_options = options[:template] && (options[:fastq] || (options[:fasta]))
|
316
|
+
|
317
|
+
# if ((ARGV.count != 2) && (ARGV.count != 3)) # con esto vemos si hay argumentos,
|
318
|
+
if (ARGV.count != 0) || (!required_options) # con esto vemos si hay argumentos,
|
319
|
+
puts "You must provide all required options"
|
320
|
+
puts ""
|
321
|
+
puts optparse.help
|
322
|
+
exit
|
323
|
+
end
|
324
|
+
|
325
|
+
# check for template
|
326
|
+
if (!File.exists?(options[:template]))
|
327
|
+
if File.exists?(File.join($SEQTRIM_PATH,'templates',options[:template]))
|
328
|
+
options[:template] = File.join($SEQTRIM_PATH,'templates',options[:template])
|
329
|
+
else
|
330
|
+
$LOG.info "Params file: #{options[:template]} doesn't exists. \n\nYou can use your own template or specify one from this list:\n============================="
|
331
|
+
puts Dir.glob(File.join($SEQTRIM_PATH,'templates','*.txt')).map{|t| File.basename(t)}
|
332
|
+
exit
|
333
|
+
end
|
334
|
+
end
|
335
|
+
$LOG.info "Using params file: #{options[:template]}"
|
336
|
+
|
337
|
+
# fastq file
|
338
|
+
if (!options[:fastq].nil? && options[:fastq]!='-' && !File.exists?(options[:fastq]))
|
339
|
+
$LOG.error "Input file: #{options[:fasta]} doesn't exists"
|
340
|
+
exit
|
341
|
+
end
|
342
|
+
|
343
|
+
|
344
|
+
|
345
|
+
# fasta file
|
346
|
+
if (!options[:fasta].nil? && !File.exists?(options[:fasta]))
|
347
|
+
$LOG.error "Input file: #{options[:fasta]} doesn't exists"
|
348
|
+
exit
|
349
|
+
end
|
350
|
+
|
351
|
+
# qual file
|
352
|
+
if ((!options[:qual].nil?)&&(!File.exists?(options[:qual])))
|
353
|
+
$LOG.error "Input file: #{options[:qual]} doesn't exists"
|
354
|
+
exit
|
355
|
+
end
|
356
|
+
|
357
|
+
s = Seqtrim.new(options)
|
358
|
+
|
359
|
+
#generate report
|
360
|
+
|
361
|
+
|
362
|
+
if system("which generate_report.rb > /dev/null ")
|
363
|
+
cmd="generate_report.rb output_files 2> report_generation_errors.log"
|
364
|
+
$LOG.info "Generating report #{cmd}"
|
365
|
+
`#{cmd}`
|
366
|
+
else
|
367
|
+
$LOG.info "If you want a detailed report in PDF format, install the optional seqtrimnext_report gem (gem install seqtrimnext_report)"
|
368
|
+
end
|
data/bin/split_fastq.rb
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'scbi_fastq'
|
4
|
+
|
5
|
+
|
6
|
+
if ARGV.count < 3
|
7
|
+
puts "#{$0} FASTQ OUTPUT_NAME SPLIT_BY"
|
8
|
+
exit
|
9
|
+
end
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
fastq = ARGV.shift
|
14
|
+
output_name = ARGV.shift
|
15
|
+
split_by = ARGV.shift.to_i
|
16
|
+
|
17
|
+
|
18
|
+
file_index=1
|
19
|
+
out=FastqFile.new("#{output_name}#{file_index}.fastq",'w')
|
20
|
+
|
21
|
+
fqr=FastqFile.new(fastq)
|
22
|
+
|
23
|
+
count = 0
|
24
|
+
|
25
|
+
fqr.each do |seq_name,seq_fasta,seq_qual,comments|
|
26
|
+
|
27
|
+
out.write_seq(seq_name,seq_fasta,seq_qual,comments)
|
28
|
+
|
29
|
+
count +=1
|
30
|
+
|
31
|
+
if (count % split_by) == 0
|
32
|
+
|
33
|
+
file_index +=1
|
34
|
+
out.close
|
35
|
+
out=FastqFile.new("#{output_name}#{file_index}.fastq",'w')
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
out.close
|
41
|
+
fqr.close
|
42
|
+
|
@@ -0,0 +1,65 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# Splits a FastQ file with ilumina paired data into two separate files.
|
4
|
+
|
5
|
+
require 'scbi_fastq'
|
6
|
+
|
7
|
+
VERBOSE=false
|
8
|
+
|
9
|
+
if !(ARGV.count==2 or ARGV.count==4)
|
10
|
+
puts "Usage: #{$0} paired.fastq output_name [pared1_tag paired2_tag]"
|
11
|
+
exit
|
12
|
+
end
|
13
|
+
|
14
|
+
p1_path=ARGV[0]
|
15
|
+
output_base_name=ARGV[1]
|
16
|
+
|
17
|
+
paired1_tag='/1'
|
18
|
+
paired2_tag='/2'
|
19
|
+
|
20
|
+
if (ARGV.count==4)
|
21
|
+
paired1_tag=ARGV[2]
|
22
|
+
paired2_tag=ARGV[3]
|
23
|
+
end
|
24
|
+
|
25
|
+
PAIRED1_TAG_RE=/#{Regexp.quote(paired1_tag)}$/
|
26
|
+
PAIRED2_TAG_RE=/#{Regexp.quote(paired2_tag)}$/
|
27
|
+
|
28
|
+
|
29
|
+
if !File.exists?(p1_path)
|
30
|
+
puts "File #{p1_path} doesn't exists"
|
31
|
+
exit
|
32
|
+
end
|
33
|
+
|
34
|
+
paired1_out = FastqFile.new(output_base_name+'_paired1.fastq','w',:sanger, true)
|
35
|
+
paired2_out = FastqFile.new(output_base_name+'_paired2.fastq','w',:sanger, true)
|
36
|
+
|
37
|
+
|
38
|
+
f_file = FastqFile.new(p1_path,'r',:sanger, true)
|
39
|
+
|
40
|
+
f_file.each do |n,f,q,c|
|
41
|
+
|
42
|
+
if n=~ PAIRED1_TAG_RE
|
43
|
+
paired1_out.write_seq(n,f,q,c)
|
44
|
+
elsif n=~ PAIRED2_TAG_RE
|
45
|
+
paired2_out.write_seq(n,f,q,c)
|
46
|
+
else
|
47
|
+
STDERR.puts "Aborting due to ERROR in file: #{n} doens't match neither left (#{paired1_tag}) nor right (#{paired2_tag}) tags"
|
48
|
+
exit
|
49
|
+
end
|
50
|
+
|
51
|
+
if ((f_file.num_seqs%10000) == 0)
|
52
|
+
puts "Count: #{f_file.num_seqs}"
|
53
|
+
end
|
54
|
+
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
f_file.close
|
59
|
+
|
60
|
+
paired1_out.close
|
61
|
+
paired2_out.close
|
62
|
+
|
63
|
+
|
64
|
+
|
65
|
+
|
data/bin/split_paired.rb
ADDED
@@ -0,0 +1,70 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
|
4
|
+
require 'scbi_fasta'
|
5
|
+
|
6
|
+
if ARGV.count!=3
|
7
|
+
puts "Usage: #{$0} fasta qual output_base_name"
|
8
|
+
exit
|
9
|
+
end
|
10
|
+
|
11
|
+
fasta_path = ARGV[0]
|
12
|
+
qual_path = ARGV[1]
|
13
|
+
name = ARGV[2]
|
14
|
+
|
15
|
+
|
16
|
+
out_fasta = name+'.fasta'
|
17
|
+
out_qual = name+'.fasta.qual'
|
18
|
+
|
19
|
+
puts "Opening #{fasta_path}, #{qual_path}"
|
20
|
+
|
21
|
+
fqr=FastaQualFile.new(fasta_path,qual_path,true)
|
22
|
+
|
23
|
+
out_f=File.new(out_fasta,'w+')
|
24
|
+
out_q=File.new(out_qual,'w+')
|
25
|
+
|
26
|
+
c=0
|
27
|
+
|
28
|
+
linker = 'TCGTATAACTTCGTATAATGTATGCTATACGAAGTTATTACG'
|
29
|
+
|
30
|
+
fqr.each do |n,f,q|
|
31
|
+
l_start= 0
|
32
|
+
l_end=f.index(linker)
|
33
|
+
|
34
|
+
if l_end
|
35
|
+
r_start=l_end+linker.length
|
36
|
+
r_end =f.length
|
37
|
+
|
38
|
+
forward=f[l_start..l_end-1]
|
39
|
+
reverse=f[r_start..r_end]
|
40
|
+
|
41
|
+
forward_q = q[l_start..l_end-1]
|
42
|
+
reverse_q = q[r_start..r_end]
|
43
|
+
|
44
|
+
if (forward.length!=forward_q.length) || (reverse.length!=reverse_q.length)
|
45
|
+
puts [forward.length,forward_q.length,reverse.length,reverse_q.length].join(',')
|
46
|
+
end
|
47
|
+
|
48
|
+
out_f.puts ">#{n}F template=#{n} dir=F library=unadeellas"
|
49
|
+
out_f.puts forward
|
50
|
+
out_f.puts ">#{n}R template=#{n} dir=R library=unadeellas"
|
51
|
+
out_f.puts reverse
|
52
|
+
|
53
|
+
out_q.puts ">#{n}F template=#{n} dir=F library=unadeellas"
|
54
|
+
out_q.puts forward_q.join(' ')
|
55
|
+
out_q.puts ">#{n}R template=#{n} dir=R library=unadeellas"
|
56
|
+
out_q.puts reverse_q.join(' ')
|
57
|
+
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
c=c+1
|
62
|
+
end
|
63
|
+
|
64
|
+
puts c
|
65
|
+
|
66
|
+
fqr.close
|
67
|
+
|
68
|
+
out_f.close
|
69
|
+
out_q.close
|
70
|
+
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require "seqtrim_action"
|
2
|
+
|
3
|
+
########################################################
|
4
|
+
# Author: Almudena Bocinos Rioboo
|
5
|
+
#
|
6
|
+
# Defines the main methods that are necessary to execute Plugin1
|
7
|
+
# Inherit: Plugin
|
8
|
+
########################################################
|
9
|
+
|
10
|
+
class ActionAbAdapter < SeqtrimAction
|
11
|
+
|
12
|
+
def initialize(start_pos,end_pos)
|
13
|
+
super(start_pos,end_pos)
|
14
|
+
@cut =true
|
15
|
+
|
16
|
+
end
|
17
|
+
#
|
18
|
+
# def apply_to(seq)
|
19
|
+
#
|
20
|
+
# # seq.seq_fasta = seq.seq_fasta.slice(start_pos,end_pos)
|
21
|
+
# $LOG.debug " Applying #{self.class} . BEGIN: #{@start_pos} END: #{@end_pos} "
|
22
|
+
#
|
23
|
+
# end
|
24
|
+
|
25
|
+
def apply_decoration(char)
|
26
|
+
return char.magenta.underline
|
27
|
+
|
28
|
+
# return char.magenta.negative
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require "seqtrim_action"
|
2
|
+
|
3
|
+
########################################################
|
4
|
+
# Author: Almudena Bocinos Rioboo
|
5
|
+
#
|
6
|
+
# Defines the main methods that are necessary to execute Plugin1
|
7
|
+
# Inherit: Plugin
|
8
|
+
########################################################
|
9
|
+
|
10
|
+
class ActionAbFarAdapter < SeqtrimAction
|
11
|
+
|
12
|
+
def initialize(start_pos,end_pos)
|
13
|
+
super(start_pos,end_pos)
|
14
|
+
@cut =true
|
15
|
+
|
16
|
+
end
|
17
|
+
#
|
18
|
+
# def apply_to(seq)
|
19
|
+
#
|
20
|
+
# # seq.seq_fasta = seq.seq_fasta.slice(start_pos,end_pos)
|
21
|
+
# $LOG.debug " Applying #{self.class} . BEGIN: #{@start_pos} END: #{@end_pos} "
|
22
|
+
#
|
23
|
+
# end
|
24
|
+
|
25
|
+
def apply_decoration(char)
|
26
|
+
return char.magenta.negative
|
27
|
+
|
28
|
+
# return char.magenta.negative
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require "seqtrim_action"
|
2
|
+
|
3
|
+
########################################################
|
4
|
+
# Author: Almudena Bocinos Rioboo
|
5
|
+
#
|
6
|
+
# Defines the main methods that are necessary to execute Plugin1
|
7
|
+
# Inherit: Plugin
|
8
|
+
########################################################
|
9
|
+
|
10
|
+
class ActionAbLeftAdapter < SeqtrimAction
|
11
|
+
|
12
|
+
def initialize(start_pos,end_pos)
|
13
|
+
super(start_pos,end_pos)
|
14
|
+
@cut =true
|
15
|
+
|
16
|
+
end
|
17
|
+
#
|
18
|
+
# def apply_to(seq)
|
19
|
+
#
|
20
|
+
# # seq.seq_fasta = seq.seq_fasta.slice(start_pos,end_pos)
|
21
|
+
# $LOG.debug " Applying #{self.class} . BEGIN: #{@start_pos} END: #{@end_pos} "
|
22
|
+
#
|
23
|
+
# end
|
24
|
+
|
25
|
+
def apply_decoration(char)
|
26
|
+
return char.magenta.negative
|
27
|
+
|
28
|
+
# return char.magenta.negative
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require "seqtrim_action"
|
2
|
+
|
3
|
+
########################################################
|
4
|
+
# Author: Almudena Bocinos Rioboo
|
5
|
+
#
|
6
|
+
# Defines the main methods that are necessary to execute ActionShortInserted
|
7
|
+
# Inherit: Plugin
|
8
|
+
########################################################
|
9
|
+
|
10
|
+
class ActionEmptyInsert < SeqtrimAction
|
11
|
+
|
12
|
+
def initialize(start_pos,end_pos)
|
13
|
+
super(start_pos,end_pos)
|
14
|
+
@cut =false
|
15
|
+
@informative = true
|
16
|
+
end
|
17
|
+
|
18
|
+
def apply_decoration(char)
|
19
|
+
return char
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require "seqtrim_action"
|
2
|
+
|
3
|
+
|
4
|
+
########################################################
|
5
|
+
# Author: Almudena Bocinos Rioboo
|
6
|
+
#
|
7
|
+
# Defines the main methods that are necessary to execute Plugin1
|
8
|
+
# Inherit: Plugin
|
9
|
+
########################################################
|
10
|
+
|
11
|
+
class ActionIgnoreRepeated < SeqtrimAction
|
12
|
+
|
13
|
+
def initialize(start_pos,end_pos)
|
14
|
+
super(start_pos,end_pos)
|
15
|
+
@cut =false
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
def apply_decoration(char)
|
20
|
+
return char
|
21
|
+
# return char.magenta.negative
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|